Bio::EnsEMBL::Pipeline::SeqFetcher
OBDAIndexSeqFetcher
Toolbar
Summary
Bio::EnsEMBL::Pipeline::SeqFetcher::OBDAIndexSeqFetcher
Package variables
No package variables defined.
Included modules
Inherit
Synopsis
my $seqfetcher = Bio::EnsEMBL::Pipeline::SeqFetcher::OBDAIndexSeqFetcher->new(
-db => $db,
-format => $format,
);
my $seq = $seqfetcher->get_Seq_by_acc($acc);
where $acc is the primary key on which the index has been made (accession or id)
my $seq = $seqfetcher->get_Seq_by_secondary($name,$acc);
where $name is the namespace or identifier for the secondary key, and $acc is the accession or id.
Beware that this method can return multiple sequences as the secondary id is not necessarily unique
Description
This is basically a wrapper around the SeqFetcher Bio::DB::Flat::OBDAIndex,
to use it in Ensembl in the same way as other SeqFetcher.
It reads some configuration info from pipeConf.pl.
Sequences are fetched from a
database previously formatted with indicate (made by Steve Searle)
Methods
Methods description
Function: stores and retrieves an Bio::DB::Flat::OBDAIndex object which is initialised in new() |
Title : db Usage : $self->db('/data/blastdb/dbname'); Function: Get/set for dbs to be searched. Checks that the database appropriate files are present, but nothing else. Returns : string Args : string |
Function: Does the sequence retrieval via the OBDAIndex module Returns : Bio::Seq |
Function: Does the sequence retrieval via the OBDAIndex module using the secondary index key An index should have been made prior to this on the two keys. Returns : Bio::Seq |
Function: Does the entry retrieval via the OBDAIndex module using the primary index key Returns : String |
Function: Retrieves secondary_namespaces using OBDAIndex module Returns : Arrayref |
Methods code
sub _seqfetcher
{ my ($self, $fetcher) = @_;
if ( $fetcher ){
push( @{ $self->{'_seqfetcher'} }, $fetcher);
}
return @{ $self->{'_seqfetcher'} }; } |
sub db
{
my ($self, $dbs) = @_;
if (!defined($self->{'_db'})) {
$self->{'_db'} = [];
}
if (defined $dbs){
if (ref($dbs) eq 'ARRAY') {
foreach my $db(@$dbs){
push (@{$self->{'_db'}},$db);
}
}
}
return (@{$self->{'_db'}}); } |
sub get_Seq_by_acc
{ my ($self, $acc) = @_;
if (!defined($acc)) {
throw("No accession input");
}
my $seq;
my @seqfetchers = $self->_seqfetcher;
my $have_secondary;
foreach my $seqfetcher (@seqfetchers){
$have_secondary = 1 if($seqfetcher->secondary_namespaces);
eval{
$seq = $seqfetcher->get_Seq_by_id($acc);
};
if ( $@ ){
warning("problem fetching sequence for $acc");
}
if ( defined $seq ){
$seq->display_id( $acc );
$seq->accession_number( $acc );
$seq->desc("");
last;
}
}
if(!defined $seq){
my ($p, $f, $l) = caller;
warning("OBDAIndexSeqFetcher: could not find sequence for primary key $acc in index ".$self->index_name." $f:$l\n") if(!$have_secondary);
FETCHER:
foreach my $seqfetcher ( $self->_seqfetcher ){
my @secondary_namespaces = $seqfetcher->secondary_namespaces;
foreach my $name ( @secondary_namespaces ){
my @seqs;
eval{
@seqs = $seqfetcher->get_Seq_by_secondary($name,$acc);
};
if ( $@ ){
warning("problem fetching sequence for secondary key $acc $@");
}
if ( @seqs > 1 ){
warning("Multiple sequences (".scalar(@seqs).") for the same secondary accession $acc\n");
next;
}
if ( defined $seqs[0] ){
$seqs[0]->display_id( $acc );
$seqs[0]->accession_number( $acc );
$seqs[0]->desc("");
$seq = $seqs[0];
last FETCHER;
}
}
}
unless ($seq){
warning("could not find sequence for secondary key $acc");
}
}
if ($seq){
}
else{
print STDERR "sequence not found. Returning undef\n";
}
return $seq; } |
sub get_Seq_by_id
{ my $self = @_;
warning("cannot call get_Seq_by_id on OBDAIndexSeqFetcher, use get_Seq_by_acc instead");
return undef; } |
sub get_Seq_by_secondary
{ my ($self, $name, $acc) = @_;
if (!defined($acc)) {
throw("No secondary key input");
}
if (!defined($name)){
throw("No name space for the secondary key");
}
my @seqs;
my @seqfetchers = $self->_seqfetcher;
foreach my $seqfetcher (@seqfetchers){
eval{
@seqs = $seqfetcher->get_Seq_by_secondary($name,$acc);
};
if ( $@ ){
warning("problem fetching sequence for $acc");
}
if ( @seqs > 1 ){
warning("Multiple sequences (".scalar(@seqs).") for the same secondary accession $acc\n");
next;
}
if ( defined $seqs[0] ){
$seqs[0]->display_id( $acc );
$seqs[0]->accession_number( $acc );
$seqs[0]->desc("");
last;
}
}
unless (@seqs){
warning("OBDAIndexSeqFetcher: could not find sequence for $acc");
}
return $seqs[0]; } |
sub get_entry_by_acc
{ my ($self, $acc) = @_;
my $entry;
if (!$acc) {
throw("No accession");
}
my @entries;
my @seqfetchers = $self->_seqfetcher;
foreach my $seqfetcher (@seqfetchers) {
eval {
$entry = $seqfetcher->get_entry_by_id($acc);
};
if ( $@ ) {
warning("problem fetching entry for $acc");
}
}
return $entry;
}
1; } |
sub index_name
{ my ($self,$name) = @_;
if ($name){
$self->{index_name} = $name;
}
return $self->{index_name}; } |
sub new
{ my ($class, @args) = @_;
my $self = bless {}, $class;
my ($db, $format) = rearrange(['DB', 'FORMAT'], @args);
throw("Sorry, you must specify a database") unless defined($db);
throw("Expected a reference to an array of db\n") unless ref($db) eq 'ARRAY';
$self->db($db);
foreach my $database ( $self->db ){
if ( $database !~ /^\// ){
$database = $ENV{BLASTDB} . "/" . $database;
}
if ( $database =~/(\S+)\/$/ ){
$database = $1;
}
my @path = split /\//, $database;
my $db_name = pop( @path );
if ( $db_name =~/(\S+)\.fa/){
$db_name = $1;
}
throw("Cannot define db_name") unless ( $db_name );
my $index_dir = join '/', @path;
throw("Cannot define index_dir") unless ( $index_dir );
$self->index_name( $index_dir );
$format = 'FASTA' unless ( $format );
my $OBDAfetcher = new Bio::DB::Flat::OBDAIndex(-index_dir => $self->index_name,
-dbname => $db_name,
-format => $format
);
$self->_seqfetcher($OBDAfetcher);
}
return $self; } |
sub secondary_namespaces
{ my ($self) = @_;
my @seqfetchers = $self->_seqfetcher;
my @secondary_namespaces = undef;
foreach my $seqfetcher (@seqfetchers) {
if ($seqfetcher->secondary_namespaces) {
push @secondary_namespaces, $seqfetcher->secondary_namespaces;
}
}
return\@ secondary_namespaces; } |
General documentation
The rest of the documentation details each of the object methods.
Internal methods are usually preceded with a _