Bio::EnsEMBL::Analysis::Runnable
BlastmiRNA
Toolbar
Summary
Bio::EnsEMBL::Analysis::Runnable::BlastmiRNA
Package variables
No package variables defined.
Included modules
Inherit
Synopsis
my $blast = Bio::EnsEMBL::Analysis::Runnable::BlastmiRNA->new
(
-query => $slice,
-program => 'wublastn',
-database => 'embl_vertrna',
-options => 'hitdist=40 -cpus=1',
-parser => $bplitewrapper,
-filter => $featurefilter,
);
$blast->run;
my @output =@{$blast->output};
Description
Modified blast runnable for specific use with miRNAs
Use for running BLASTN of genomic vs miRNAs prior to
miRNA analysis.
Keeps the coverage in the dna_align_feature score field.
Also clusters overlapping hits and picks the one with the lowest
evalue to represent that cluster.
Methods
Methods description
Arg [1] : Array ref of Bio::EnsEMBL::DnaDnaAlignFeature Function : Clusters overlapping blast hits and chooses the one to represent the cluster. Returntype: Array ref of Bio::EnsEMBL::DnaDnaAlignFeature Exceptions: None Example : my $output = $self->cluster(\@dna_align_features); |
Arg [1] : Scalar coverage cutoff value Function : overrides the parse results method in runnable to allow for : coverage calculations Returntype: none Exceptions: none Example : $self->parse_results(80); |
Arg : string, directory Function : a generic run method. This checks the directory specifed : to run it, write the query sequence to file, marks the query sequence : file and results file for deletion, runs the analysis parses the : results and deletes any files Returntype: 1 Exceptions: throws if no query sequence is specified Example : |
Methods code
sub cluster
{ my ($self,$dafs_ref)=@_;
my @dafs = @$dafs_ref;
@dafs = sort{$a->p_value <=> $b->p_value} @dafs;
my $start =0;
my @representative_sequences;
DAFS: foreach my $daf (@dafs){
my @cluster;
$start++;
next DAFS unless($daf);
push @cluster,$daf;
MATCHES: for (my $index = $start; $index <= $#dafs ; $index++){
next MATCHES unless ($dafs[$index]);
if ($daf->end >= $dafs[$index]->start() && $daf->start() <= $dafs[$index]->end){
push @cluster,$dafs[$index];
$dafs[$index] = undef;
}
}
foreach my $daf ( @cluster ){
if($daf->score >= 100 && $daf->percent_id == 100){
push @representative_sequences, $daf;
next DAFS;
}
}
@cluster = sort{$b->p_value <=> $a->p_value} @cluster;
push @representative_sequences, pop @cluster;
}
return\@ representative_sequences;
}
1; } |
sub parse_results
{ my ($self,$coverage_cutoff) = @_;
my $results = $self->results_files;
my @daf_coverage_results;
my $filtered_output;
my $bplite = $self->parser->get_parsers($results);
foreach my $blast (@{$bplite}){
while( my $subject = $blast->nextSbjct){
while (my $hsp = $subject->nextHSP) {
my @daf_results;
my $hsp_length = $hsp->length."\t";
my $subject_length = $subject->{'LENGTH'};
$subject_length = 1 unless ($subject_length);
my $coverage = $hsp_length/$subject_length*100; $coverage =~ s/\.\d+//;
if ($coverage_cutoff){
next unless($coverage > $coverage_cutoff);
}
$subject->name =~ /^\S+\s+(\w+)/;
my $name = $1;
push @daf_results, $self->parser->split_hsp($hsp,$name);
foreach my $daf(@daf_results){
if ($daf->hstrand == -1 && $daf->strand == 1){
$daf->strand(-1);
$daf->hstrand(1);
}
$daf->score($coverage);
$daf->external_db_id(3200);
push @daf_coverage_results, $daf;
}
}
}
}
return undef unless ( @daf_coverage_results);
my $output = $self->cluster(\@daf_coverage_results);
$self->output($output); } |
sub run
{ my ($self, $dir) = @_;
$self->workdir($dir) if($dir);
throw("Can't run ".$self." without a query sequence")
unless($self->query);
$self->checkdir();
my $filename = $self->write_seq_file();
$self->files_to_delete($filename);
$self->files_to_delete($self->resultsfile);
$self->run_analysis;
$self->parse_results;
$self->delete_files;
return 1; } |
General documentation