Bio::Tools::Run
RemoteBlast
Toolbar
Summary
Bio::Tools::Run::RemoteBlast - Object for remote execution of the NCBI Blast
via HTTP
Package variables
No package variables defined.
Included modules
HTTP::Request::Common
IO::String
LWP
Inherit
Synopsis
#Remote-blast "factory object" creation and blast-parameter initialization
use Bio::Tools::Run::RemoteBlast;
use strict;
my $prog = 'blastp';
my $db = 'swissprot';
my $e_val= '1e-10';
my @params = ( '-prog' => $prog,
'-data' => $db,
'-expect' => $e_val,
'-readmethod' => 'SearchIO' );
my $factory = Bio::Tools::Run::RemoteBlast->new(@params);
#change a paramter
$Bio::Tools::Run::RemoteBlast::HEADER{'ENTREZ_QUERY'} = 'Homo sapiens [ORGN]';
#remove a parameter
delete $Bio::Tools::Run::RemoteBlast::HEADER{'FILTER'};
my $v = 1;
#$v is just to turn on and off the messages
my $str = Bio::SeqIO->new(-file=>'amino.fa' , '-format' => 'fasta' );
while (my $input = $str->next_seq()){
#Blast a sequence against a database:
#Alternatively, you could pass in a file with many
#sequences rather than loop through sequence one at a time
#Remove the loop starting 'while (my $input = $str->next_seq())'
#and swap the two lines below for an example of that.
my $r = $factory->submit_blast($input);
#my $r = $factory->submit_blast('amino.fa');
print STDERR "waiting..." if( $v > 0 );
while ( my @rids = $factory->each_rid ) {
foreach my $rid ( @rids ) {
my $rc = $factory->retrieve_blast($rid);
if( !ref($rc) ) {
if( $rc < 0 ) {
$factory->remove_rid($rid);
}
print STDERR "." if ( $v > 0 );
sleep 5;
} else {
my $result = $rc->next_result();
#save the output
my $filename = $result->query_name()."\.out";
$factory->save_output($filename);
$factory->remove_rid($rid);
print "\nQuery Name: ", $result->query_name(), "\n";
while ( my $hit = $result->next_hit ) {
next unless ( $v > 0);
print "\thit name is ", $hit->name, "\n";
while( my $hsp = $hit->next_hsp ) {
print "\t\tscore is ", $hsp->score, "\n";
}
}
}
}
}
}
# This example shows how to change a CGI parameter:
$Bio::Tools::Run::RemoteBlast::HEADER{'MATRIX_NAME'} = 'BLOSUM25';
# And this is how to delete a CGI parameter:
delete $Bio::Tools::Run::RemoteBlast::HEADER{'FILTER'};
Description
Methods
Methods description
Title : database Usage : my $db = $self->database Function: Get/Set the database to search Returns : string Args : string [ swissprot, nr, nt, etc... ] |
Title : expect Usage : my $expect = $self->expect Function: Get/Set the E value cutoff Returns : string Args : string [ '1e-4' ] |
Title : header Usage : my $header = $self->header Function: Get/Set HTTP header for blast query Returns : string Args : none |
Title : program Usage : my $prog = $self->program Function: Get/Set the program to run Returns : string Args : string [ blastp, blastn, blastx, tblastn, tblastx ] |
Title : proxy Usage : $httpproxy = $db->proxy('http') or $db->proxy(['http','ftp'], 'http://myproxy' ) Function: Get/Set a proxy for use of proxy Returns : a string indicating the proxy Args : $protocol : an array ref of the protocol(s) to set/get $proxyurl : url of the proxy to use for the specified protocol |
Title : readmethod Usage : my $readmethod = $self->readmethod Function: Get/Set the method to read the blast report Returns : string Args : string [ Blast, BPlite ] |
Title : retrieve_blast Usage : my $blastreport = $blastfactory->retrieve_blast($rid); Function: Attempts to retrieve a blast report from remote blast queue Returns : -1 on error, 0 on 'job not finished', Bio::Tools::BPlite or Bio::Tools::Blast object (depending on how object was initialized) on success Args : Remote Blast ID (RID) |
Title : saveoutput Usage : my $saveoutput = $self->save_output($filename) Function: Method to save the blast report Returns : 1 (throws error otherwise) Args : string [rid, filename] |
Title : submit_blast Usage : $self->submit_blast([$seq1,$seq2]); Function: Submit blast jobs to ncbi blast queue on sequence(s) Returns : Blast report object as defined by $self->readmethod Args : input can be: * sequence object * array ref of sequence objects * filename of file containing fasta formatted sequences |
Title : ua Usage : my $ua = $self->ua or $self->ua($ua) Function: Get/Set a LWP::UserAgent for use Returns : reference to LWP::UserAgent Object Args : none Comments: Will create a UserAgent if none has been requested before. |
Methods code
BEGIN { $URLBASE = 'http://www.ncbi.nlm.nih.gov/blast/Blast.cgi';
%HEADER = ('CMD' => 'Put',
'PROGRAM' => '',
'DATABASE' => '',
'FILTER' => 'L',
'EXPECT' => '',
'QUERY' => '',
'CDD_SEARCH' => 'off',
'COMPOSITION_BASED_STATISTICS' => 'off',
'FORMAT_OBJECT' => 'Alignment',
'SERVICE' => 'plain',
);
%RETRIEVALHEADER = ('CMD' => 'Get',
'RID' => '',
'ALIGNMENT_VIEW' => 'Pairwise',
'DESCRIPTIONS' => 100,
'ALIGNMENTS' => 50,
'FORMAT_TYPE' => 'Text',
);
$RIDLINE = 'RID\s+=\s+(\S+)';
%BLAST_PARAMS = ( 'prog' => 'blastp',
'data' => 'nr',
'expect' => '1e-3',
'readmethod' => 'SearchIO'
); } |
sub _load_input
{ my ($self, $input) = @_;
if( ! defined $input ) {
$self->throw("Calling remote blast with no input");
}
my @seqs;
if( ! ref $input ) {
if( -e $input ) {
my $seqio = new Bio::SeqIO(-format => 'fasta', -file => $input);
while( my $seq = $seqio->next_seq ) {
push @seqs, $seq;
}
} else {
$self->throw("Input $input was not a valid filename");
}
} elsif( ref($input) =~ /ARRAY/i ) {
foreach ( @$input ) {
if( ref($_) && $_->isa('Bio::PrimarySeqI') ) {
push @seqs, $_;
} else {
$self->warn("Trying to add a " . ref($_) .
" but expected a Bio::PrimarySeqI");
}
}
if( ! @seqs) {
$self->throw("Did not pass in valid input -- no sequence objects found");
}
} elsif( $input->isa('Bio::PrimarySeqI') ) {
push @seqs, $input;
}
return @seqs;
}
1;
__END__ } |
sub add_rid
{ my ($self, @vals) = @_;
foreach ( @vals ) {
$self->{'_rids'}->{$_} = 1;
}
return scalar keys %{$self->{'_rids'}}; } |
sub database
{ my ($self, $val) = @_;
if( defined $val ) {
$HEADER{'DATABASE'} = $val;
}
return $HEADER{'DATABASE'}; } |
sub each_rid
{ my ($self) = @_;
return keys %{$self->{'_rids'}}; } |
sub expect
{ my ($self, $val) = @_;
if( defined $val ) {
$HEADER{'EXPECT'} = $val;
}
return $HEADER{'EXPECT'}; } |
sub header
{ my ($self) = @_;
my %h = %HEADER;
$h{'PROGRAM'} = $self->program;
$h{'DATABASE'} = $self->database;
$h{'EXPECT'} = $self->expect;
return %h; } |
sub new
{ my ($caller, @args) = @_;
my $self = $caller->SUPER::new(@args);
$self->_initialize_io();
my ($prog, $data, $expect,
$readmethod) = $self->_rearrange([qw(PROG DATA
EXPECT
READMETHOD)],
@args);
$readmethod = $BLAST_PARAMS{'readmethod'} unless defined $readmethod;
$prog = $BLAST_PARAMS{'prog'} unless defined $prog;
$data = $BLAST_PARAMS{'data'} unless defined $data;
$expect = $BLAST_PARAMS{'expect'} unless defined $expect;
$self->readmethod($readmethod);
$self->program($prog);
$self->database($data);
$self->expect($expect);
return $self; } |
sub program
{ my ($self, $val) = @_;
if( defined $val ) {
$val = lc $val;
if( $val !~ /t?blast[pnx]/ ) {
$self->warn("trying to set program to an invalid program name ($val) -- defaulting to blastp");
$val = 'blastp';
}
$HEADER{'PROGRAM'} = $val;
}
return $HEADER{'PROGRAM'}; } |
sub proxy
{ my ($self,$protocol,$proxy) = @_;
return undef if ( !defined $self->ua || !defined $protocol
|| !defined $proxy );
return $self->ua->proxy($protocol,$proxy); } |
sub readmethod
{ my ($self, $val) = @_;
if( defined $val ) {
$self->{'_readmethod'} = $val;
}
return $self->{'_readmethod'}; } |
sub remove_rid
{ my ($self, @vals) = @_;
foreach ( @vals ) {
delete $self->{'_rids'}->{$_};
}
return scalar keys %{$self->{'_rids'}}; } |
sub retrieve_blast
{ my($self, $rid) = @_;
my (undef,$tempfile) = $self->tempfile();
my %hdr = %RETRIEVALHEADER;
$hdr{'RID'} = $rid;
my $req = POST $URLBASE, [%hdr];
if( $self->verbose > 0 ) {
$self->warn("retrieve request is " . $req->as_string());
}
my $response = $self->ua->request($req, $tempfile);
if( $self->verbose > 0 ) {
open(TMP, $tempfile) or $self->throw("cannot open $tempfile");
while(<TMP>) { print $_; }
close TMP;
}
if( $response->is_success ) {
my $size = -s $tempfile;
if( $size > 1000 ) {
my $blastobj;
if( $self->readmethod =~ /BPlite/ ) {
$blastobj = new Bio::Tools::BPlite(-file => $tempfile);
} else {
$blastobj = new Bio::SearchIO(-file => $tempfile,
-format => 'blast');
}
$self->file($tempfile);
return $blastobj;
} elsif( $size < 500 ) { open(ERR, "<$tempfile") or $self->throw("cannot open file $tempfile");
$self->warn(join("", <ERR>));
close ERR;
return -1;
} else { return 0;
}
} else {
$self->warn($response->error_as_HTML);
return -1;
} } |
sub save_output
{ my ($self, $filename) = @_;
if( ! defined $filename ) {
$self->throw("Can't save blast output. You must specify a filename to save to.");
}
my $blastfile = $self->file;
open(TMP, $blastfile) or $self->throw("cannot open $blastfile");
open(SAVEOUT, ">$filename") or $self->throw("cannot open $filename");
my $seentop=0;
while(<TMP>) {
next if (/<pre>/);
if( /^(?:[T]?BLAST[NPX])\s*.+$/i ||
/^RPS-BLAST\s*.+$/i ) {
$seentop=1;
}
next if !$seentop;
if( $seentop ) {
print SAVEOUT;
}
}
close SAVEOUT;
close TMP;
return 1; } |
sub submit_blast
{ my ($self, $input) = @_;
my @seqs = $self->_load_input($input);
return 0 unless ( @seqs );
my $tcount = 0;
my %header = $self->header;
foreach my $seq ( @seqs ) {
$header{'QUERY'} = ">".(defined $seq->display_id() ? $seq->display_id() : "").
" ".(defined $seq->desc() ? $seq->desc() : "")."\n".$seq->seq();
my $request = POST $URLBASE, [%header];
$self->warn($request->as_string) if ( $self->verbose > 0);
my $response = $self->ua->request( $request);
if( $response->is_success ) {
if( $self->verbose > 0 ) {
my ($tempfh) = $self->tempfile();
print $tempfh $response->content;
close($tempfh);
undef $tempfh;
}
my @subdata = split(/\n/, $response->content );
my $count = 0;
foreach ( @subdata ) {
if( /$RIDLINE/ ) {
$count++;
print STDERR $_ if( $self->verbose > 0);
$self->add_rid($1);
last;
}
}
if( $count == 0 ) {
$self->warn("req was ". $request->as_string() . "\n");
$self->warn(join('', @subdata));
}
$tcount += $count;
} else {
$self->warn("req was ". $request->as_string() . "\n" .
$response->error_as_HTML);
$tcount = -1;
}
}
return $tcount; } |
sub ua
{ my ($self, $value) = @_;
if( ! defined $self->{'_ua'} ) {
$self->{'_ua'} = new LWP::UserAgent;
}
return $self->{'_ua'}; } |
General documentation
User feedback is an integral part of the evolution of this and other
Bioperl modules. Send your comments and suggestions preferably to one
of the Bioperl mailing lists. Your participation is much appreciated.
bioperl-l@bioperl.org - General discussion
http://bio.perl.org/MailList.html - About the mailing lists
Report bugs to the Bioperl bug tracking system to help us keep track
the bugs and their resolution. Bug reports can be submitted via email
or the web:
bioperl-bugs@bio.perl.org
http://bio.perl.org/bioperl-bugs/
AUTHOR - Jason Stajich | Top |
The rest of the documentation details each of the object
methods. Internal methods are usually preceded with a _