Bio::EnsEMBL::Analysis::Tools FilterBPlite
SummaryIncluded librariesPackage variablesSynopsisDescriptionGeneral documentationMethods
Toolbar
WebCvsRaw content
Summary
  Bio::EnsEMBL::Analysis::Tools::FilterBPlite
Package variables
No package variables defined.
Included modules
Bio::EnsEMBL::Analysis::Tools::BPliteWrapper
Bio::EnsEMBL::Analysis::Tools::FeatureFilter
Bio::EnsEMBL::Utils::Argument qw ( rearrange )
Bio::EnsEMBL::Utils::Exception qw ( verbose throw warning )
Inherit
Bio::EnsEMBL::Analysis::Tools::BPliteWrapper
Synopsis
  my $parser = Bio::EnsEMBL::Analysis::Tools::FilterBPlite->
new(
-regex => '^\w+\s+(\w+)'
-query_type => 'dna',
-database_type => 'pep',
-threshold_type => 'PVALUE',
-threshold => 0.01,
);
my @results = @{$parser->parse_results('blast.out')};
Description
This module inherits from BPliteWrapper so follows the same basic
methodology but it implements some prefiltering of the HSPs to mimic how
the old pipeline blast runnable was used in the raw computes
Methods
coverage
No description
Code
filter
No description
Code
filter_hitsDescriptionCode
get_hspsDescriptionCode
is_hsp_validDescriptionCode
newDescriptionCode
threshold
No description
Code
threshold_type
No description
Code
Methods description
filter_hitscode    nextTop
  Arg [1]   : Bio::EnsEMBL::Analysis::Tools::FilterBPlite
Arg [2] : Bio::EnsEMBL::Analysis::Tools::BPlite
Function : prefilter the blast results using specified thresholds
and FeatureFilter
Returntype: hashref
Exceptions:
Example :
get_hspscodeprevnextTop
  Arg [1]   : Bio::EnsEMBL::Analysis::Tools::FilterBPlite
Arg [2] : Bio::EnsEMBL::Analysis::Tools::BPlite
Function : prefilter the hsps then parser then and turn them into
features
Returntype: none
Exceptions: throw if no name can be parser from the subject
Example :
is_hsp_validcodeprevnextTop
  Arg [1]   : Bio::EnsEMBL::Analysis::Tools::FilterBPlite
Arg [2] : Bio::EnsEMBL::Analysis::Tools::BPlite::HSP
Function : checks hsp against specified threshold returns hsp
if above value 0 if not
Returntype: Bio::EnsEMBL::Analysis::Tools::BPlite::HSP/0
Exceptions:
Example :
newcodeprevnextTop
  Arg [1]   : Bio::EnsEMBL::Analysis::Tools::FilterBPlite
Arg [THRESHOLD_TYPE] : string, threshold type
Arg [THRESHOLD] : int, threshold
Arg [COVERAGE] : int, coverage value
Arg [FILTER] : int, boolean toggle as whether to filter
Function : create a Bio::EnsEMBL::Analysis::Tools::FilterBPlite
object
Returntype: Bio::EnsEMBL::Analysis::Tools::FilterBPlite
Exceptions:
Example :
Methods code
coveragedescriptionprevnextTop
sub coverage {
  my $self = shift;
  $self->{'coverage'} = shift if(@_);
  return $self->{'coverage'};
}
filterdescriptionprevnextTop
sub filter {
  my $self = shift;
  $self->{'filter'} = shift if(@_);
  return $self->{'filter'};
}
filter_hitsdescriptionprevnextTop
sub filter_hits {
  my ($self, $parsers) = @_;
  my %ids;
  my @features;
 PARSER:foreach my $parser(@$parsers){
  SUB:while(my $sbjct = $parser->nextSbjct){
      my $name = $sbjct->name;
    HSP:while (my $hsp = $sbjct->nextHSP) {
        if($self->is_hsp_valid($hsp)){
          my $qstart = $hsp->query->start();
          my $hstart = $hsp->subject->start();
          my $qend   = $hsp->query->end();
          my $hend   = $hsp->subject->end();
          my $qstrand = $hsp->query->strand();
          my $hstrand = $hsp->subject->strand();
          my $score  = $hsp->score;
          my $p_value = $hsp->P;
          my $percent = $hsp->percent;
          
          my $fp = $self->feature_factory->create_feature_pair
            ($qstart, $qend, $qstrand, $score, $hstart,
             $hend, $hstrand, $name, $percent, $p_value);
          
          push(@features,$fp);
        }
      }
    }
  }
 
  my $search = Bio::EnsEMBL::Analysis::Tools::FeatureFilter->new
    (
     -coverage => $self->coverage,
    );

  my @newfeatures = @{$search->filter_results(\@features)};

  foreach my $f (@newfeatures) {
    my $id = $f->hseqname;
    $ids{$id} = 1;
  }
  return\% ids;
}
get_hspsdescriptionprevnextTop
sub get_hsps {
  my ($self, $parsers) = @_;
  my $regex = $self->regex;
  my @output;
  my $ids;
  if($self->filter){
    $ids = $self->filter_hits($parsers);
  }
  my $seconds = $self->get_parsers($self->filenames);
 PARSER:foreach my $second(@$seconds){
  NAME: while(my $sbjct = $second->nextSbjct){
      if($self->filter && !($ids->{$sbjct->name})){
        next NAME;
      }
      my ($name) = $sbjct->name =~ /$regex/;
      throw("Error parsing name from ".$sbjct->name." check your ".
            "blast setup and blast headers") unless($name);
    HSP: while (my $hsp = $sbjct->nextHSP) {
        if($self->is_hsp_valid($hsp)){     
          push(@output, $self->split_hsp($hsp, $name));
        }
      }
    }
  }
  $parsers = [];
  $self->output(\@output);
}
is_hsp_validdescriptionprevnextTop
sub is_hsp_valid {
  my ($self, $hsp) = @_;
  if($self->threshold_type){
    if ($self->threshold_type eq "PID") {
      return 0 if ($hsp->percent < $self->threshold);
    } elsif ($self->threshold_type eq "SCORE") {
      return 0 if ($hsp->score < $self->threshold);
    } elsif ($self->threshold_type eq "PVALUE") {
      return 0 if($hsp->P > $self->threshold);
    } 
  }
  return $hsp;
}
newdescriptionprevnextTop
sub new {
  my ($class,@args) = @_;
  my $self = $class->SUPER::new(@args);
  &verbose('WARNING');
  my ($threshold_type, $threshold, $coverage, $filter) = rearrange
    (['THRESHOLD_TYPE', 'THRESHOLD', 'COVERAGE', 'FILTER'], @args);
  ######################
#SETTING THE DEFAULTS#
######################
$self->coverage(10); $self->filter(1); ######################
$self->threshold_type($threshold_type); $self->threshold($threshold); $self->coverage($coverage) if(defined($coverage)); $self->filter($filter) if(defined($filter)); return $self;
}
thresholddescriptionprevnextTop
sub threshold {
  my $self = shift;
  $self->{'threshold'} = shift if(@_);
  return $self->{'threshold'};
}
threshold_typedescriptionprevnextTop
sub threshold_type {
  my $self = shift;
  $self->{'threshold_type'} = shift if(@_);
  return $self->{'threshold_type'};
}
General documentation
CONTACTTop
Post questions to the Ensembl development list: ensembl-dev@ebi.ac.uk
Container methodTop
  Arg [1]   : Bio::EnsEMBL::Analysis::Tools::FilterBPlite
Arg [2] : string/int
Function : container methods, this documents the 4 methods
below threshold_type, threshold, coverage, filter
Returntype: string/int
Exceptions:
Example :