Bio::EnsEMBL::Analysis::Runnable
BlastRfam
Toolbar
Summary
Bio::EnsEMBL::Analysis::Runnable::BlastRfam
Package variables
No package variables defined.
Included modules
Inherit
Synopsis
my $blast = Bio::EnsEMBL::Analysis::Runnable::BlastRfam->
new(
-query => $slice,
-program => 'wublastn',
-database => 'embl_vertrna',
-options => 'hitdist=40 -cpus=1',
-parser => $bplitewrapper,
-filter => $featurefilter,
);
$blast->run;
my @output =@{$blast->output};
Description
Modified blast runnable for specific use with RFAMSEQ.
Use for running BLASTN of genomic vs RFAMSEQ prior to
ncRNA analysis using Infernal.
Keeps the coverage in the dna_align_feature score field.
Also clusters overlapping hits and picks the one with the lowest
evalue to represent that cluster.
Methods
Methods description
Arg [1] : Bio::EnsEMBL::Analysis::Runnable::Blast Arg [Parser] : A blast parser object must meet specified interface Arg [Filter] : A Filter object must meet specified interface Arg [Database]: string, database name/path Arg [Type] : string, wu or ncbi to specify which type of input Arg [Unknown_error_string] : the string to throw if the blast runs fails with an unexpected error 4 Function : create a Blast runnable Returntype: Bio::EnsEMBL::Analysis::Runnable::Blast Exceptions: throws if not given a database name or if not given a parser object Example : |
Arg [1] : Bio::EnsEMBL::Analysis::Runnable::Blast Function : override results parsing to allow for coverage calculations Returntype: none Exceptions: none Example : |
Arg [1] : Bio::EnsEMBL::Analysis::Runnable Arg [2] : string, directory Function : a generic run method. This checks the directory specifed to run it, write the query sequence to file, marks the query sequence file and results file for deletion, runs the analysis parses the results and deletes any files Returntype: 1 Exceptions: throws if no query sequence is specified Example : |
Methods code
sub cluster
{ my ($self,$dafs_ref)=@_;
my @dafs = @$dafs_ref;
@dafs = sort{$a->p_value <=> $b->p_value} @dafs;
my $start =0;
my @representative_sequences;
DAFS: foreach my $daf (@dafs){
$start ++;
my %family_cluster;
next DAFS unless($daf);
my $RFAM = substr($daf->hseqname,0,7);
push @{$family_cluster{$RFAM}},$daf;
MATCHES: for (my $index = $start; $index <= $#dafs ; $index ++){
next MATCHES unless ($dafs[$index]);
if ($daf->end >= $dafs[$index]->start() && $daf->start() <= $dafs[$index]->end){
my $familly = substr($dafs[$index]->hseqname,0,7);
push @{$family_cluster{$familly}},$dafs[$index];
$dafs[$index] = undef;
}
}
foreach my $domain (keys %family_cluster){
my @temp_array = @{$family_cluster{$domain}};
@temp_array = sort{$a->p_value <=> $b->p_value} @temp_array;
push @representative_sequences,shift @temp_array;
}
}
return\@ representative_sequences;
}
} |
sub new
{ my ($class,@args) = @_;
my $self = $class->SUPER::new(@args);
my ($params ) = rearrange(['PARAMS'], @args);
$self->params($params) if $params;
return $self; } |
sub params
{ my $self = shift;
$self->{'params'} = shift if(@_);
return $self->{'params'};
}
1; } |
sub parse_results
{ my ($self,$coverage_cutoff) = @_;
my $results = $self->results_files;
my @daf_coverage_results;
my $filtered_output;
my $bplite = $self->parser->get_parsers($results);
foreach my $blast (@{$bplite}){
while( my $subject = $blast->nextSbjct){
while (my $hsp = $subject->nextHSP) {
my @daf_results;
my $hsp_length = $hsp->length."\t";
my $subject_length = $subject->{'LENGTH'};
$subject_length = 1 unless ($subject_length);
my $coverage = $hsp_length/$subject_length*100; $coverage =~ s/\.\d+//;
if ($coverage_cutoff){
next unless($coverage > $coverage_cutoff);
}
$subject->name =~ /^(\S+)\/\S+\s+(\w+);\w+/;
my $name = $2."-".$1;
push @daf_results, $self->parser->split_hsp($hsp,$name);
foreach my $daf(@daf_results){
if ($daf->hstrand == -1 && $daf->strand == 1){
$daf->strand(-1);
$daf->hstrand(1);
}
$daf->score($coverage);
$daf->external_db_id(4200);
push @daf_coverage_results, $daf;
}
}
}
}
return undef unless ( @daf_coverage_results);
my $output = $self->cluster(\@daf_coverage_results);
$self->output($output);
}
} |
sub run
{ my ($self, $dir) = @_;
my $coverage = 70; $self->workdir($dir) if($dir);
throw("Can't run ".$self." without a query sequence")
unless($self->query);
$self->checkdir();
eval {
my $low_copy_db = $self->params->{'lowcopy'};
$self->throw("DATABASE NOT FOUND $low_copy_db") unless -e $low_copy_db;
my $filename = $self->write_seq_file();
$self->files_to_delete($filename);
$self->files_to_delete($self->resultsfile);
$self->run_ncbi_analysis;
$self->parse_results(20);
$self->{'results_files'} = ();
$self->run_analysis;
$self->parse_results($coverage);
$self->delete_files;
}; if ($@){ print "$@\n";}
return 1; } |
run_ncbi_analysis | description | prev | next | Top |
sub run_ncbi_analysis
{ my ($self) = @_;
my $database = $self->params->{'lowcopy'};
my $options = "-W7 -F F -b 1000000 -v 1000000 ";
my $command = "/software/bin/blastall";
my $filename = $self->queryfile;
my $results_file = $self->create_filename("Infernal", 'blast.out');
$self->files_to_delete($results_file);
$self->results_files($results_file);
$command .= " -p blastn -d $database -i $filename ";
$command .= "$options 2>&1 > ".$results_file;
print "Running blast ".$command."\n";
open(my $fh, "$command |") ||
throw("Error opening Blast cmd <$command>." .
" Returned error $? BLAST EXIT: '" .
($? >> 8) . "'," ." SIGNAL '" . ($? & 127) .
"', There was " . ($? & 128 ? 'a' : 'no') .
" core dump");
while(<$fh>){
if(/FATAL:(.+)/){
my $match = $1;
print $match;
$self->delete_files;
if($match =~ /no valid contexts/){
die qq{"VOID"\n}; }elsif($match =~ /Bus Error signal received/){
die qq{"BUS_ERROR"\n}; }elsif($match =~ /Segmentation Violation signal received./){
die qq{"SEGMENTATION_FAULT"\n}; }elsif($match =~ /Out of memory;(.+)/){
die qq{"OUT_OF_MEMORY"\n};
}elsif($match =~ /the query sequence is shorter than the word length/){
die qq{"VOID"\n}; }else{
warning("Something FATAL happened to BLAST we've not ".
"seen before, please add it to Package: "
. __PACKAGE__ . ", File: " . __FILE__);
die ($self->unknown_error_string."\n");
}
}elsif(/WARNING:(.+)/){
}elsif(/^\s{10}(.+)/){ }
}
unless(close $fh){
warning("Error running Blast cmd <$command>. Returned ".
"error $? BLAST EXIT: '" . ($? >> 8) .
"', SIGNAL '" . ($? & 127) . "', There was " .
($? & 128 ? 'a' : 'no') . " core dump");
die ($self->unknown_error_string."\n");
} } |
General documentation
Arg [1] : Bio::EnsEMBL::Analysis::Runnable::Blast
Function : gets a list of databases to run against and constructs
commandlines against each one and runs them
Returntype: none
Exceptions: throws if there is a problem opening the commandline or
if blast produces an error
Example :