Bio::EnsEMBL::Analysis::Runnable BlastRfam
SummaryIncluded librariesPackage variablesSynopsisDescriptionGeneral documentationMethods
Toolbar
WebCvsRaw content
Summary
  Bio::EnsEMBL::Analysis::Runnable::BlastRfam
Package variables
No package variables defined.
Included modules
Bio::EnsEMBL::Analysis::Runnable
Bio::EnsEMBL::Analysis::Runnable::Blast
Bio::EnsEMBL::Utils::Argument qw ( rearrange )
Bio::EnsEMBL::Utils::Exception qw ( throw warning )
Inherit
Bio::EnsEMBL::Analysis::Runnable::Blast
Synopsis
  my $blast = Bio::EnsEMBL::Analysis::Runnable::BlastRfam->
new(
-query => $slice,
-program => 'wublastn',
-database => 'embl_vertrna',
-options => 'hitdist=40 -cpus=1',
-parser => $bplitewrapper,
-filter => $featurefilter,
);
$blast->run;
my @output =@{$blast->output};
Description
Modified blast runnable for specific use with RFAMSEQ.
Use for running BLASTN of genomic vs RFAMSEQ prior to
ncRNA analysis using Infernal.
Keeps the coverage in the dna_align_feature score field.
Also clusters overlapping hits and picks the one with the lowest
evalue to represent that cluster.
Methods
cluster
No description
Code
newDescriptionCode
params
No description
Code
parse_resultsDescriptionCode
runDescriptionCode
run_ncbi_analysis
No description
Code
Methods description
newcode    nextTop
  Arg [1]       : Bio::EnsEMBL::Analysis::Runnable::Blast
Arg [Parser] : A blast parser object must meet specified interface
Arg [Filter] : A Filter object must meet specified interface
Arg [Database]: string, database name/path
Arg [Type] : string, wu or ncbi to specify which type of input
Arg [Unknown_error_string] : the string to throw if the blast runs fails
with an unexpected error 4
Function : create a Blast runnable
Returntype: Bio::EnsEMBL::Analysis::Runnable::Blast
Exceptions: throws if not given a database name or if not given
a parser object
Example :
parse_resultscodeprevnextTop
  Arg [1]   : Bio::EnsEMBL::Analysis::Runnable::Blast
Function : override results parsing to allow for
coverage calculations
Returntype: none
Exceptions: none
Example :
runcodeprevnextTop
  Arg [1]   : Bio::EnsEMBL::Analysis::Runnable
Arg [2] : string, directory
Function : a generic run method. This checks the directory specifed
to run it, write the query sequence to file, marks the query sequence
file and results file for deletion, runs the analysis parses the
results and deletes any files
Returntype: 1
Exceptions: throws if no query sequence is specified
Example :
Methods code
clusterdescriptionprevnextTop
sub cluster {
  my ($self,$dafs_ref)=@_;
  my @dafs = @$dafs_ref;
  @dafs = sort{$a->p_value <=> $b->p_value} @dafs;
  my $start =0;
  my @representative_sequences;    
 DAFS: foreach my $daf (@dafs){
    $start ++;
    my %family_cluster;
    next DAFS unless($daf);	
    my $RFAM = substr($daf->hseqname,0,7);
    push @{$family_cluster{$RFAM}},$daf;
  MATCHES:  for (my $index = $start; $index <= $#dafs ; $index ++){
      next MATCHES unless ($dafs[$index]);
      if ($daf->end >= $dafs[$index]->start() && $daf->start() <= $dafs[$index]->end){
	my $familly = substr($dafs[$index]->hseqname,0,7);
	push @{$family_cluster{$familly}},$dafs[$index];
	$dafs[$index] = undef;
      }
    }
    foreach my $domain (keys %family_cluster){
      my @temp_array = @{$family_cluster{$domain}};
      # get highest scoring alignment for each family to be representative
@temp_array = sort{$a->p_value <=> $b->p_value} @temp_array; push @representative_sequences,shift @temp_array; } } return\@ representative_sequences; } #####################################################
# USE THIS TO RUN NCBI BLAST IF YOU ARE SO INCLINED #
#####################################################
}
newdescriptionprevnextTop
sub new {
  my ($class,@args) = @_;
  my $self = $class->SUPER::new(@args);
  my ($params ) = rearrange(['PARAMS'], @args);
  $self->params($params) if $params;
  return $self;
}
paramsdescriptionprevnextTop
sub params {
  my $self = shift;
  $self->{'params'} = shift if(@_);
  return $self->{'params'};
}


1;
}
parse_resultsdescriptionprevnextTop
sub parse_results {
  my ($self,$coverage_cutoff) = @_;
  my $results = $self->results_files;
  my @daf_coverage_results;
  my $filtered_output;
  my $bplite = $self->parser->get_parsers($results);
  foreach my $blast (@{$bplite}){
      while( my $subject = $blast->nextSbjct){
	 while (my $hsp = $subject->nextHSP) {
	   my @daf_results;
	   my $hsp_length = $hsp->length."\t";
	   my $subject_length = $subject->{'LENGTH'};
	   $subject_length = 1 unless ($subject_length);
	   my $coverage = $hsp_length/$subject_length*100;
$coverage =~ s/\.\d+//; if ($coverage_cutoff){ next unless($coverage > $coverage_cutoff); } $subject->name =~ /^(\S+)\/\S+\s+(\w+);\w+/; my $name = $2."-".$1; push @daf_results, $self->parser->split_hsp($hsp,$name); # add coverage into daf score?
foreach my $daf(@daf_results){ # swaps strands over if blast aligns +strand genomic to -ve strand in
# RFAM file, RFAM file sequences are the correct orientation
# Whereas the genomic can be either, with unspliced DNA it becomes
# impossibe to tell automatically
if ($daf->hstrand == -1 && $daf->strand == 1){ $daf->strand(-1); $daf->hstrand(1); } $daf->score($coverage); $daf->external_db_id(4200); push @daf_coverage_results, $daf; } } } } return undef unless ( @daf_coverage_results); my $output = $self->cluster(\@daf_coverage_results); $self->output($output); } # foreach hit, look at all the other hits and push into an array anything that overlaps with it
# then sort the array and take the top scoring hits for heach familly that lives in it.
}
rundescriptionprevnextTop
sub run {
  my ($self, $dir) = @_;
  my $coverage = 70; # Coverage filter for very common blast hits
$self->workdir($dir) if($dir); throw("Can't run ".$self." without a query sequence") unless($self->query); $self->checkdir(); eval { my $low_copy_db = $self->params->{'lowcopy'}; $self->throw("DATABASE NOT FOUND $low_copy_db") unless -e $low_copy_db; my $filename = $self->write_seq_file(); $self->files_to_delete($filename); $self->files_to_delete($self->resultsfile); $self->run_ncbi_analysis; $self->parse_results(20); $self->{'results_files'} = (); $self->run_analysis; $self->parse_results($coverage); $self->delete_files; }; if ($@){ print "$@\n";} return 1;
}
run_ncbi_analysisdescriptionprevnextTop
sub run_ncbi_analysis {
  my ($self) = @_;
  # had coded options used in SGJs Rfam scan
my $database = $self->params->{'lowcopy'}; my $options = "-W7 -F F -b 1000000 -v 1000000 "; my $command = "/software/bin/blastall"; my $filename = $self->queryfile; my $results_file = $self->create_filename("Infernal", 'blast.out'); $self->files_to_delete($results_file); $self->results_files($results_file); $command .= " -p blastn -d $database -i $filename "; $command .= "$options 2>&1 > ".$results_file; print "Running blast ".$command."\n"; open(my $fh, "$command |") || throw("Error opening Blast cmd <$command>." . " Returned error $? BLAST EXIT: '" . ($? >> 8) . "'," ." SIGNAL '" . ($? & 127) . "', There was " . ($? & 128 ? 'a' : 'no') . " core dump"); # this loop reads the STDERR from the blast command
# checking for FATAL: messages (wublast) [what does ncbi blast say?]
# N.B. using simple die() to make it easier for RunnableDB to parse.
while(<$fh>){ if(/FATAL:(.+)/){ my $match = $1; print $match; # clean up before dying
$self->delete_files; if($match =~ /no valid contexts/){ die qq{"VOID"\n}; # hack instead
}elsif($match =~ /Bus Error signal received/){ die qq{"BUS_ERROR"\n}; # can we work out which host?
}elsif($match =~ /Segmentation Violation signal received./){ die qq{"SEGMENTATION_FAULT"\n}; # can we work out which host?
}elsif($match =~ /Out of memory;(.+)/){ # (.+) will be something like "1050704 bytes were last
#requested."
die qq{"OUT_OF_MEMORY"\n}; # resenD to big mem machine by rulemanager
}elsif($match =~ /the query sequence is shorter
than the word length/
){ #no valid context
die qq{"VOID"\n}; # hack instead
}else{ warning("Something FATAL happened to BLAST we've not ". "seen before, please add it to Package: " . __PACKAGE__ . ", File: " . __FILE__); die ($self->unknown_error_string."\n"); # send appropriate string
#as standard this will be failed so job can be retried
#when in pipeline
} }elsif(/WARNING:(.+)/){ # ONLY a warning usually something like hspmax=xxxx was exceeded
# skip ...
}elsif(/^\s{10}(.+)/){ # ten spaces
# Continuation of a WARNING: message
# Hope this doesn't catch more than these.
# skip ...
} } unless(close $fh){ # checking for failures when closing.
# we should't get here but if we do then $? is translated
#below see man perlvar
warning("Error running Blast cmd <$command>. Returned ". "error $? BLAST EXIT: '" . ($? >> 8) . "', SIGNAL '" . ($? & 127) . "', There was " . ($? & 128 ? 'a' : 'no') . " core dump"); die ($self->unknown_error_string."\n"); }
}
General documentation
CONTACTTop
Post questions to the Ensembl development list: ensembl-dev@ebi.ac.uk
run_analysisTop
  Arg [1]   : Bio::EnsEMBL::Analysis::Runnable::Blast
Function : gets a list of databases to run against and constructs
commandlines against each one and runs them
Returntype: none
Exceptions: throws if there is a problem opening the commandline or
if blast produces an error
Example :