Raw content of Bio::EnsEMBL::Pipeline::SeqFetcher::FetchFromBlastDB
package Bio::EnsEMBL::Pipeline::SeqFetcher::FetchFromBlastDB;
use vars qw(@ISA);
use strict;
use Bio::EnsEMBL::Root;
use Bio::Seq;
@ISA = qw(Bio::EnsEMBL::Root);
sub new {
my ($class, @args) = @_;
my $self = bless {},$class;
my ($db) = $self->_rearrange([qw(DB)],@args);
$self->db($db) if $db;
return $self
}
sub _fetch {
my ($self, $id) = @_;
my $command;
if (ref $id eq 'ARRAY') {
for (my $i = 0; $i < scalar @$id; $i++){
$id->[$i] =~ s/(\S+).*/$1/;
}
$command = $self->db->seqfetch_command . " @$id";
} else {
$id =~ s/(\S+).*/$1/;
$command = $self->db->seqfetch_command . " " . $id;
}
open(CMD, "$command |") or die "Can't execute fetch command";
my %seqs;
my %descs;
my $id_line;
my $desc;
while (){
if (/^>/){
$id_line = $_;
$id_line =~ s/^>//;
if ($self->db->index_type =~ /wu/){
$id_line =~ s/([\w\_\.]+)\s*(.*)\n/$1/;
$desc = $2;
}
if ($self->db->index_type eq 'ncbi'){
$id_line =~ s/[^\|]+\|([\w\_\.]+)\s*(.*)\n/$1/;
$desc = $2;
}
$desc =~ s/\t/ /g;
$descs{$id_line} = $desc;
next
}
$seqs{$id_line} .= $_;
}
close CMD;
my @bioseqs;
foreach my $seq_id (keys %seqs) {
$seqs{$seq_id} =~ s/\n//g;
my $bioseq = Bio::Seq->new(-display_id => $seq_id,
-seq => $seqs{$seq_id},
-desc => $descs{$seq_id}
);
push (@bioseqs, $bioseq)
}
return \@bioseqs;
}
### Some aliae
sub fetch {
my ($self, $id) = @_;
my $seqs = $self->_fetch($id);
return shift @$seqs
}
sub get_Seq_by_acc {
my ($self, $id) = @_;
return $self->fetch($id)
}
sub batch_fetch {
my ($self, $ids) = @_;
return $self->_fetch($ids);
}
sub db {
my $self = shift;
if (@_) {
my $value = shift;
unless ($value->isa("Bio::EnsEMBL::Pipeline::Runnable::BlastDB")){
$self->throw("Blast database object is not a "
."Bio::EnsEMBL::Pipeline::Runnable::BlastDB.\n")
}
$self->{_db} = $value;
}
return $self->{_db}
}