Bio::EnsEMBL::Analysis::Runnable::Finished Blast
Included librariesPackage variablesDescriptionGeneral documentationMethods
Toolbar
WebCvsRaw content
Package variables
No package variables defined.
Included modules
Bio::EnsEMBL::Analysis::Config::Blast
Bio::EnsEMBL::Analysis::Config::General
Bio::EnsEMBL::Utils::Exception qw ( throw warning info )
BlastableVersion
Symbol
Inherit
Bio::EnsEMBL::Analysis::Runnable::Blast
Synopsis
No synopsis!
Description
Unlike Bio::EnsEMBL::Analysis::Runnable::Blast,
this module creates FeaturePairs from HSPs after
doing any depth filtering to save time and memory
when searching genomic sequences that generate
large numbers of blast matches. BLAST_CONFIG =>
{
Uniprot =>
{
BLAST_PARSER => 'Bio::EnsEMBL::Analysis::Tools::Finished::BPliteWrapper',
PARSER_PARAMS => {
-regex => '^\w+\s+(\w+)',
-query_type => 'pep',
-database_type => 'pep',
-threshold_type => 'PVALUE',
-threshold => 0.01,
-coverage => 10,
-discard_overlaps => 1,
                              },
BLAST_FILTER =>'Bio::EnsEMBL::Analysis::Tools::FeatureFilter',
FILTER_PARAMS => {
-min_score => 200,
-prune => 1,
},
BLAST_PARAMS => {
-unknown_error_string => 'FAILED',
-type => 'wu',
},
},
DEFAULT =>
{
BLAST_PARSER => 'Bio::EnsEMBL::Analysis::Tools::BPliteWrapper',
PARSER_PARAMS => {
-regex => '^(\w+)',
-query_type => undef,
-database_type => undef,
},
BLAST_FILTER => undef,
FILTER_PARAMS => {},
BLAST_PARAMS => {
-unknown_error_string => 'FAILED',
-type => 'wu',
}
},
BLAST_AB_INITIO_LOGICNAME => 'Genscan' }
Methods
BEGIN Code
DESTROY
No description
Code
clean_databases
No description
Code
clean_results_files
No description
Code
databases
No description
Code
get_analysis
No description
Code
get_db_versionDescriptionCode
parse_results
No description
Code
run_analysis
No description
Code
Methods description
get_db_versioncode    nextTop
    Title   :  get_db_version
[ distinguished from RunnableDB::*::db_version_searched() ]
Useage : $self->get_db_version('/data/base/path')
$obj->get_db_version()
Function: Set a blast database version from the supplied path
Get a blast database version from previously supplied path
Uses tjrc''s BlastableVersion module.
Returns : String
Args : String (should be a full database path)
Caller : $self::fetch_databases()
RunnableDB::Finished_EST::db_version_searched()
Methods code
BEGINTop
BEGIN {
	print STDERR "\nUSING " . __PACKAGE__ . "\n\n";
}
DESTROYdescriptionprevnextTop
sub DESTROY {
	my ( $self ) = @_;
	# just do the cleanup
$self->delete_files; } 1; __END__
}
clean_databasesdescriptionprevnextTop
sub clean_databases {
	my ( $self) = @_;
	$self->{databases} = [];
}
clean_results_filesdescriptionprevnextTop
sub clean_results_files {
  my ($self) = @_;
  $self->{'results_files'} = [];
}
databasesdescriptionprevnextTop
sub databases {
	my ( $self, @vals ) = @_;

	if ( not exists $self->{databases} ) {
		$self->{databases} = [];
	}

	foreach my $val (@vals) {
		my $db_names = $val;
		my @databases;
		$db_names =~ s/\s//g;

		foreach my $dbname ( split( ",", $db_names ) )
		{    # allows the use of a comma separated list in $self->database
# prepend the environment variable $BLASTDB if
# database name is not an absoloute path
unless ( $dbname =~ m!^/! ) {
$dbname = $ENV{BLASTDB} . "/" . $dbname;
} # If the expanded database name exists put this in
# the database array.
#
# If it doesn't exist then see if $database-1,$database-2 exist
# and put them in the database array
if ( -f $dbname ) { push( @databases, $dbname ); } else { my $count = 1; my $db_filename; while ( -f ( $db_filename = "${dbname}-${count}" ) ) { push( @databases, $db_filename ); $count++; } $! = undef ; # to stop pollution as it will be "No such file or directory" after while loop above.
} } if ( scalar(@databases) == 0 ) { throw( "No databases exist for " . $db_names ); } else { foreach my $db_name (@databases){ $self->get_db_version($db_name) if $db_name =~ /emnew_/; } $self->get_db_version($databases[0]); push @{$self->{databases}}, @databases; } } return $self->{databases};
}
get_analysisdescriptionprevnextTop
sub get_analysis {
	my ($self) = @_;

	my ($ana);
	unless ( $ana = $self->{'_analysis'} ) {
		my ($source) = $self->program =~ m{([^/]+)$}
		  or throw(
			"Can't parse last element from path: '" . $self->program . "'" );
		$ana = $self->{'_analysis'} = Bio::EnsEMBL::Analysis->new(
			-db              => $self->database,
			-db_version      => 1,                 # ARUUGA!!!
-program => $source, -program_version => 1, -gff_source => $source, -gff_feature => 'similarity', -logic_name => 'blast', ); } return $ana;
}
get_db_versiondescriptionprevnextTop
sub get_db_version {
	my ( $self, $db ) = @_;
	my $debug_this = 0;    # this just shows debug info.
my $force_dbi = 0; # this will force a dbi call SLOW!!!!!!
unless ( $self->{'_db_version_searched'} ) { if ($db) { $BlastableVersion::debug = $debug_this; warning "BlastableVersion is cvs revision ".$BlastableVersion::revision."\n " if $debug_this; my $ver = eval { my $blast_ver = BlastableVersion->new(); $blast_ver->force_dbi($force_dbi); # if set will be SLOW.
$blast_ver->get_version($db); $blast_ver; }; throw("I failed to get a BlastableVersion for $db") if $@; my $dbv = $ver->version(); my $sgv = $ver->sanger_version(); my $name = $ver->name(); my $date = $ver->date(); unless ($dbv) { throw( "I know nothing about $db I tried to find out:\n" . " - name <". $name . ">\n" . " - date <". $date . ">\n" . " - version <". $dbv . ">\n" . " - sanger_version <". $sgv. ">\n" ); } $self->{'_db_version_searched'} = $dbv; } else { throw( "You've asked about what I searched, but I don't know." . " It's not set. I need to be called with a database filename first" ); # The code probably got here because of a problem with the MLDBM
# cache file on the machine this was running on.
# the cache file is stored @ /var/tmp/blast_versions
# try <rm -f /var/tmp/blast_versions>
} } return $self->{'_db_version_searched'};
}
parse_resultsdescriptionprevnextTop
sub parse_results {
	my ($self)           = @_;
	my $results          = $self->results_files;
	my $bplites          = $self->parser->parse_files($results);
	my $threshold_type   = $self->parser->threshold_type;
	my $threshold        = $self->parser->threshold;
	my $discard_overlaps = $self->parser->discard_overlaps;
	my $coverage         = $self->parser->coverage;
	my $hits             =
	  $self->parser->get_best_hits( $bplites, $threshold_type, $threshold );
	my $query_length = $self->query->length
	  or throw("Couldn't get query length");
	my $output =
	  $self->parser->_apply_coverage_filter( $query_length, $hits,
		$threshold_type, $threshold, $coverage, $discard_overlaps );
	$self->output($output);
	return $output;
}
run_analysisdescriptionprevnextTop
sub run_analysis {
  my ($self) = @_;

  DB:foreach my $database (@{$self->databases}) {

    my $db = $database;
    $db =~ s/.*\///;
    #allow system call to adapt to using ncbi blastall.
#defaults to WU blast
my $command = $self->program; my $blastype = ""; my $filename = $self->queryfile; my $results_file = $self->create_filename($db, 'blast.out'); $self->files_to_delete($results_file); $self->results_files($results_file); if ($self->type eq 'ncbi') { $command .= " -d $database -i $filename "; } else { $command .= " $database $filename -gi "; } $command .= $self->options. ' 2>&1 > '.$results_file; info("Running blast ".$command); my $fh; unless (open($fh, "$command |")) { $self->delete_files; throw("Error opening Blast cmd <$command>." . " Returned error $? BLAST EXIT: '" . ($? >> 8) . "'," ." SIGNAL '" . ($? & 127) . "', There was " . ($? & 128 ? 'a' : 'no') . " core dump"); } # this loop reads the STDERR from the blast command
# checking for FATAL: messages (wublast) [what does ncbi blast say?]
# N.B. using simple die() to make it easier for RunnableDB to parse.
while(<$fh>){ if(/FATAL:(.+)/){ my $match = $1; next DB if $match =~ /There is nothing in the requested database to search/; # clean up before dying
$self->delete_files; if($match =~ /no valid contexts/){ die qq{"VOID"\n}; # hack instead
}elsif($match =~ /Bus Error signal received/){ die qq{"BUS_ERROR"\n}; # can we work out which host?
}elsif($match =~ /Segmentation Violation signal received./){ die qq{"SEGMENTATION_FAULT"\n}; # can we work out which host?
}elsif($match =~ /Out of memory;(.+)/){ # (.+) will be something like "1050704 bytes were last
#requested."
die qq{"OUT_OF_MEMORY"\n}; # resenD to big mem machine by rulemanager
}elsif($match =~ /the query sequence is shorter
than the word length/
){ #no valid context
die qq{"VOID"\n}; # hack instead
}else{ warning("Something FATAL happened to BLAST we've not ". "seen before, please add it to Package: " . __PACKAGE__ . ", File: " . __FILE__."\n[$match]\n"); die ($self->unknown_error_string."\n"); # send appropriate string
#as standard this will be failed so job can be retried
#when in pipeline
} }elsif(/WARNING:(.+)/){ # ONLY a warning usually something like hspmax=xxxx was exceeded
# skip ...
}elsif(/^\s{10}(.+)/){ # ten spaces
# Continuation of a WARNING: message
# Hope this doesn't catch more than these.
# skip ...
} } unless(close $fh){ # checking for failures when closing.
# we should't get here but if we do then $? is translated
#below see man perlvar
$self->delete_files; warning("Error running Blast cmd <$command>. Returned ". "error $? BLAST EXIT: '" . ($? >> 8) . "', SIGNAL '" . ($? & 127) . "', There was " . ($? & 128 ? 'a' : 'no') . " core dump"); die ($self->unknown_error_string."\n"); } }
}
General documentation
NAME - Bio::EnsEMBL::Analysis::Runnable::Finished::BlastTop
AUTHORTop
James Gilbert email jgrg@sanger.ac.uk
modified by Sindhu K. Pillai emailsp1@sanger.ac.uk