Bio::EnsEMBL::Analysis::Runnable::Finished
Blast
Toolbar
Package variables
No package variables defined.
Included modules
Inherit
Synopsis
No synopsis!
Description
Unlike Bio::EnsEMBL::Analysis::Runnable::Blast,
this module creates FeaturePairs from HSPs after
doing any depth filtering to save time and memory
when searching genomic sequences that generate
large numbers of blast matches.
BLAST_CONFIG =>
{
Uniprot =>
{
BLAST_PARSER => 'Bio::EnsEMBL::Analysis::Tools::Finished::BPliteWrapper',
PARSER_PARAMS => {
-regex => '^\w+\s+(\w+)',
-query_type => 'pep',
-database_type => 'pep',
-threshold_type => 'PVALUE',
-threshold => 0.01,
-coverage => 10,
-discard_overlaps => 1,
},
BLAST_FILTER =>'Bio::EnsEMBL::Analysis::Tools::FeatureFilter',
FILTER_PARAMS => {
-min_score => 200,
-prune => 1,
},
BLAST_PARAMS => {
-unknown_error_string => 'FAILED',
-type => 'wu',
},
},
DEFAULT =>
{
BLAST_PARSER => 'Bio::EnsEMBL::Analysis::Tools::BPliteWrapper',
PARSER_PARAMS => {
-regex => '^(\w+)',
-query_type => undef,
-database_type => undef,
},
BLAST_FILTER => undef,
FILTER_PARAMS => {},
BLAST_PARAMS => {
-unknown_error_string => 'FAILED',
-type => 'wu',
}
},
BLAST_AB_INITIO_LOGICNAME => 'Genscan'
}
Methods
BEGIN | | Code |
DESTROY | No description | Code |
clean_databases | No description | Code |
clean_results_files | No description | Code |
databases | No description | Code |
get_analysis | No description | Code |
get_db_version | Description | Code |
parse_results | No description | Code |
run_analysis | No description | Code |
Methods description
Title : get_db_version [ distinguished from RunnableDB::*::db_version_searched() ] Useage : $self->get_db_version('/data/base/path') $obj->get_db_version() Function: Set a blast database version from the supplied path Get a blast database version from previously supplied path Uses tjrc''s BlastableVersion module. Returns : String Args : String (should be a full database path) Caller : $self::fetch_databases() RunnableDB::Finished_EST::db_version_searched() |
Methods code
BEGIN { print STDERR "\nUSING " . __PACKAGE__ . "\n\n"; } |
sub DESTROY
{
my ( $self ) = @_;
$self->delete_files;
}
1;
__END__ } |
sub clean_databases
{ my ( $self) = @_;
$self->{databases} = []; } |
sub clean_results_files
{ my ($self) = @_;
$self->{'results_files'} = []; } |
sub databases
{ my ( $self, @vals ) = @_;
if ( not exists $self->{databases} ) {
$self->{databases} = [];
}
foreach my $val (@vals) {
my $db_names = $val;
my @databases;
$db_names =~ s/\s//g;
foreach my $dbname ( split( ",", $db_names ) )
{ unless ( $dbname =~ m!^/! ) { $dbname = $ENV{BLASTDB} . "/" . $dbname; }
if ( -f $dbname ) {
push( @databases, $dbname );
}
else {
my $count = 1;
my $db_filename;
while ( -f ( $db_filename = "${dbname}-${count}" ) ) {
push( @databases, $db_filename );
$count++;
}
$! = undef
; }
}
if ( scalar(@databases) == 0 ) {
throw( "No databases exist for " . $db_names );
} else {
foreach my $db_name (@databases){
$self->get_db_version($db_name) if $db_name =~ /emnew_/;
}
$self->get_db_version($databases[0]);
push @{$self->{databases}}, @databases;
}
}
return $self->{databases}; } |
sub get_analysis
{
my ($self) = @_;
my ($ana);
unless ( $ana = $self->{'_analysis'} ) {
my ($source) = $self->program =~ m{([^/]+)$}
or throw(
"Can't parse last element from path: '" . $self->program . "'" );
$ana = $self->{'_analysis'} = Bio::EnsEMBL::Analysis->new(
-db => $self->database,
-db_version => 1, -program => $source,
-program_version => 1,
-gff_source => $source,
-gff_feature => 'similarity',
-logic_name => 'blast',
);
}
return $ana; } |
sub get_db_version
{ my ( $self, $db ) = @_;
my $debug_this = 0; my $force_dbi = 0; unless ( $self->{'_db_version_searched'} ) {
if ($db) {
$BlastableVersion::debug = $debug_this;
warning
"BlastableVersion is cvs revision ".$BlastableVersion::revision."\n "
if $debug_this;
my $ver = eval {
my $blast_ver = BlastableVersion->new();
$blast_ver->force_dbi($force_dbi); $blast_ver->get_version($db);
$blast_ver;
};
throw("I failed to get a BlastableVersion for $db") if $@;
my $dbv = $ver->version();
my $sgv = $ver->sanger_version();
my $name = $ver->name();
my $date = $ver->date();
unless ($dbv) {
throw( "I know nothing about $db I tried to find out:\n"
. " - name <". $name . ">\n"
. " - date <". $date . ">\n"
. " - version <". $dbv . ">\n"
. " - sanger_version <". $sgv. ">\n" );
}
$self->{'_db_version_searched'} = $dbv;
}
else {
throw( "You've asked about what I searched, but I don't know."
. " It's not set. I need to be called with a database filename first"
);
}
}
return $self->{'_db_version_searched'}; } |
sub parse_results
{
my ($self) = @_;
my $results = $self->results_files;
my $bplites = $self->parser->parse_files($results);
my $threshold_type = $self->parser->threshold_type;
my $threshold = $self->parser->threshold;
my $discard_overlaps = $self->parser->discard_overlaps;
my $coverage = $self->parser->coverage;
my $hits =
$self->parser->get_best_hits( $bplites, $threshold_type, $threshold );
my $query_length = $self->query->length
or throw("Couldn't get query length");
my $output =
$self->parser->_apply_coverage_filter( $query_length, $hits,
$threshold_type, $threshold, $coverage, $discard_overlaps );
$self->output($output);
return $output; } |
sub run_analysis
{ my ($self) = @_;
DB:foreach my $database (@{$self->databases}) {
my $db = $database;
$db =~ s/.*\///;
my $command = $self->program;
my $blastype = "";
my $filename = $self->queryfile;
my $results_file = $self->create_filename($db, 'blast.out');
$self->files_to_delete($results_file);
$self->results_files($results_file);
if ($self->type eq 'ncbi') {
$command .= " -d $database -i $filename ";
} else {
$command .= " $database $filename -gi ";
}
$command .= $self->options. ' 2>&1 > '.$results_file;
info("Running blast ".$command);
my $fh;
unless (open($fh, "$command |")) {
$self->delete_files;
throw("Error opening Blast cmd <$command>." .
" Returned error $? BLAST EXIT: '" .
($? >> 8) . "'," ." SIGNAL '" . ($? & 127) .
"', There was " . ($? & 128 ? 'a' : 'no') .
" core dump");
}
while(<$fh>){
if(/FATAL:(.+)/){
my $match = $1;
next DB if $match =~ /There is nothing in the requested database to search/;
$self->delete_files;
if($match =~ /no valid contexts/){
die qq{"VOID"\n}; }elsif($match =~ /Bus Error signal received/){
die qq{"BUS_ERROR"\n}; }elsif($match =~ /Segmentation Violation signal received./){
die qq{"SEGMENTATION_FAULT"\n}; }elsif($match =~ /Out of memory;(.+)/){
die qq{"OUT_OF_MEMORY"\n};
}elsif($match =~ /the query sequence is shorter than the word length/){
die qq{"VOID"\n}; }else{
warning("Something FATAL happened to BLAST we've not ".
"seen before, please add it to Package: "
. __PACKAGE__ . ", File: " . __FILE__."\n[$match]\n");
die ($self->unknown_error_string."\n");
}
}elsif(/WARNING:(.+)/){
}elsif(/^\s{10}(.+)/){ }
}
unless(close $fh){
$self->delete_files;
warning("Error running Blast cmd <$command>. Returned ".
"error $? BLAST EXIT: '" . ($? >> 8) .
"', SIGNAL '" . ($? & 127) . "', There was " .
($? & 128 ? 'a' : 'no') . " core dump");
die ($self->unknown_error_string."\n");
}
} } |
General documentation
NAME - Bio::EnsEMBL::Analysis::Runnable::Finished::Blast | Top |