Raw content of Bio::EnsEMBL::Analysis::Runnable::BestPmatch
package Bio::EnsEMBL::Analysis::Runnable::BestPmatch;
use strict;
use warnings;
use Bio::EnsEMBL::Analysis::Runnable;
use Bio::EnsEMBL::Analysis::Tools::GeneBuildUtils qw(id coord_string);
use Bio::EnsEMBL::Utils::Exception qw(throw warning);
use Bio::EnsEMBL::Utils::Argument qw( rearrange );
use vars qw(@ISA);
@ISA = qw(Bio::EnsEMBL::Analysis::Runnable);
sub new {
my ($class,@args) = @_;
my $self = $class->SUPER::new(@args);
my ($pafs, $min_coverage) = rearrange(['PROTEIN_HITS', 'MIN_COVERAGE'], @args);
###SETTING DEFAULTS###
$self->min_coverage(25);
#######################
$self->protein_hits($pafs);
$self->min_coverage($min_coverage);
return $self;
}
sub protein_hits{
my ($self, $ref) = @_;
if($ref){
if(ref($ref) ne "ARRAY"){
throw("Runnable::BestPmatch Must pass protein hits an array ref not ".$ref);
}
$self->{protein_hits} = $ref;
}
return $self->{protein_hits};
}
sub min_coverage{
my ($self, $arg) = @_;
if($arg){
$self->{'min_coverage'} = $arg;
}
return $self->{'min_coverage'};
}
sub protein_hash{
my ($self, $hash) = @_;
if(!$self->{'_proteins'}){
$self->{'_proteins'} = {};
}
if($hash){
throw("Must pass protein_hash a hashref not ".$hash)
if(ref($hash) ne 'HASH');
$self->{'_proteins'} = $hash;
}
return $self->{'_proteins'};
}
sub run{
my ($self) = @_;
my %prots;
foreach my $hit(@{$self->protein_hits}){
push (@{$prots{$hit->hseqname}}, $hit);
}
$self->protein_hash(\%prots);
my $hits = $self->prune_hits;
my %unique;
foreach my $hit(@$hits){
my $string = id($hit)."-".coord_string($hit);
if(!$unique{$string}){
$hit->analysis($self->analysis);
$unique{$string} = $hit;
}
}
my @output = values(%unique);
$self->output(\@output);
}
sub prune_hits{
my ($self, $hits) = @_;
$hits = $self->protein_hits if(!$hits);
my @chosen;
my %prots = %{$self->protein_hash};
PROTEIN:foreach my $p(keys(%{$self->protein_hash})){
my $allhits = $prots{$p};
my @sorted = sort {$b->score <=> $a->score} @$allhits;
my $first = shift(@sorted);
my $score_boundary = $first->score() - 2;
my $lower_threshold = $self->min_coverage;
next PROTEIN if $first->score < $lower_threshold;
push(@chosen, $first);
PRUNE:foreach my $hit(@sorted){
last PRUNE if($hit->score < $score_boundary);
last PRUNE if($hit->score < $lower_threshold);
push(@chosen, $hit);
}
}
return \@chosen;
}
1;