Raw content of Bio::EnsEMBL::Analysis::Runnable::Finished::BlastExonerate
### Bio::EnsEMBL::Analysis::Runnable::Finished::BlastExonerate
package Bio::EnsEMBL::Analysis::Runnable::Finished::BlastExonerate;
use strict;
use warnings;
use Bio::EnsEMBL::Analysis::Runnable;
use Bio::EnsEMBL::Analysis::Runnable::Finished::Blast;
use Bio::EnsEMBL::Analysis::Runnable::Finished::Exonerate;
use Bio::EnsEMBL::Analysis::Config::Blast;
use Bio::EnsEMBL::Utils::Argument qw(rearrange);
use Bio::EnsEMBL::Utils::Exception qw(throw warning);
use base 'Bio::EnsEMBL::Analysis::Runnable';
use vars qw($verbose);
$verbose = 1;
sub new {
my ( $class, @args ) = @_;
my $self = bless {}, $class;
my ( $soft_masked, $hard_masked, $analysis, $seqfetcher ) = rearrange(
[
qw{
SOFT_MASKED
HARD_MASKED
ANALYSIS
SEQFETCHER
}
],
@args
);
throw "No SOFT_MASKED query (soft masked genomic sequence) given" unless $soft_masked;
throw "No HARD_MASKED query (hard masked genomic sequence) given" unless $hard_masked;
$self->soft_masked($soft_masked);
$self->hard_masked($hard_masked);
$self->analysis($analysis);
my $indices = [ split(",", $self->analysis->db) ];
$seqfetcher = $self->_make_seqfetcher($indices);
$self->seqfetcher($seqfetcher);
return $self;
}
sub hard_masked {
my ( $self, $seq ) = @_;
if ($seq) {
unless ( $seq->isa("Bio::PrimarySeqI") || $seq->isa("Bio::Seq") ) {
die Dumper($seq);
throw("Input isn't a Bio::Seq or Bio::PrimarySeq");
}
$self->{'_hard_masked'} = $seq;
}
return $self->{'_hard_masked'};
}
sub soft_masked {
my ( $self, $seq ) = @_;
if ($seq) {
unless ( $seq->isa("Bio::PrimarySeqI") || $seq->isa("Bio::Seq") ) {
die Dumper($seq);
throw("Input isn't a Bio::Seq or Bio::PrimarySeq");
}
$self->{'_soft_masked'} = $seq;
}
return $self->{'_soft_masked'};
}
sub seqfetcher {
my ( $self, $seqfetcher ) = @_;
if ($seqfetcher) {
$self->{'_seqfetcher'} = $seqfetcher;
}
return $self->{'_seqfetcher'};
}
sub analysis {
my ( $self, $analysis ) = @_;
if ($analysis) {
$self->{'_analysis'} = $analysis;
}
return $self->{'_analysis'};
}
sub run {
my ($self) = shift;
my $parser = shift;
my $filter = shift;
my $blastcon = shift;
my $blast = Bio::EnsEMBL::Analysis::Runnable::Finished::Blast->new(
-query => $self->hard_masked,
-program => $blastcon->{-program} || 'wublastn',
-database => $self->analysis->db_file,
-analysis => $self->analysis,
-parser => $parser,
-filter => $filter,
%{$blastcon},
);
my $db_files = $blast->databases;
# set the db_version_searched
my $dbv = $blast->get_db_version();
print "using $dbv\n";
$self->get_db_version($dbv);
foreach my $db_file (sort @$db_files) {
print STDOUT "database file $db_file\n";
$blast->clean_output();
$blast->clean_results_files();
$blast->clean_databases();
$blast->databases($db_file);
$blast->run();
my $features = $blast->output ;
if (!$self->analysis->db)
{
print STDOUT "indice file $db_file\n";
my $seqfetcher = $self->_make_seqfetcher([$db_file]);
$self->seqfetcher($seqfetcher);
}
$self->run_exonerate($features,$blastcon) if @$features;
}
}
sub run_exonerate {
my ( $self, $feat, $exon_options ) = @_;
# create a hash of features
my $hit_features = {};
for ( my $i = 0 ; $i < @$feat ; $i++ ) {
my $f = $feat->[$i];
my $hid = $f->hseqname or throw("Missing hid");
$hit_features->{$hid} ||= [];
push ( @{ $hit_features->{$hid} }, $f );
}
# get a sorted list of unique feature IDs
my @features_ids = sort {$a cmp $b } keys %$hit_features;
print STDOUT "Blast output: ".scalar(@features_ids)." features\n" if $verbose;
my $chunk_size = 200;
for (my $i = 0; $i < @features_ids; $i += $chunk_size) {
my $j = $i + $chunk_size - 1;
# Set second index to last element if we're off the end of the array
$j = $#features_ids if $#features_ids < $j;
# Take a slice from the array
my $chunk = [@features_ids[$i..$j]];
print STDOUT "Process chunk of ".scalar(@$chunk)." features\n" if $verbose;
# fetch the sequences
my $seqs = [];
foreach my $id (@$chunk) {
push @$seqs, $self->get_Sequence($id);
}
# run exonerate
my $exonerate =
Bio::EnsEMBL::Analysis::Runnable::Finished::Exonerate->new(
-analysis => $self->analysis,
-program => $self->analysis->program,
-target => $self->soft_masked ,
-query_seqs => $seqs,
-exo_options => $exon_options->{-exo_options},
-query_type => $exon_options->{-query_type},
);
$exonerate->run();
my $exon_output = $exonerate->output();
print STDOUT "Exonerate output: ".scalar(@$exon_output)." features\n" if $verbose;
$self->output($exon_output);
}
}
sub get_Sequence {
my ($self,$id) = @_;
my $seqfetcher = $self->seqfetcher;
my $seq;
eval {
$seq = $seqfetcher->get_Seq_by_acc($id);
};
if ((!defined($seq)) && $@) {
throw("Couldn't find sequence for [$id]:\n $@");
}
return $seq;
}
sub add_output {
my ( $self, $f ) = @_;
my $ana = $self->analysis;
$f->analysis($ana);
$self->{'output'} ||= [];
push ( @{ $self->{'output'} }, $f );
}
sub _make_seqfetcher {
my ( $self, $indices ) = @_;
my $seqfetcher;
if ( ref($indices) eq "ARRAY"){
$seqfetcher = Bio::EnsEMBL::Pipeline::SeqFetcher::xdget->new(-db => $indices,
-executable => '/software/farm/bin/xdget');
}
else{
$seqfetcher = Bio::EnsEMBL::Pipeline::SeqFetcher::Pfetch->new();
}
return $seqfetcher;
}
=head2 get_db_version
Title : get_db_version
[ distinguished from RunnableDB::*::db_version_searched() ]
Useage : $self->get_db_version('version string')
$obj->get_db_version()
Function: Get/Set a blast database version that was searched
The actual look up is done in Runnable::Finished_Blast
This is just a holding place for the string in this
module
Returns : String or undef
Args : String
Caller : $self::run()
RunnableDB::BlastExonerate::db_version_searched()
=cut
sub get_db_version{
my ($self, $arg) = @_;
$self->{'_db_version_searched'} = $arg if $arg;
return $self->{'_db_version_searched'};
}
1;
=head1 NAME - Bio::EnsEMBL::Analysis::Runnable::BlastExonerate
=head1 AUTHOR
Mustapha Larbaoui B ml6@sanger.ac.uk