Raw content of Bio::EnsEMBL::Analysis::Tools::FilterBPlite # Ensembl module for Bio::EnsEMBL::Analysis::Tools::FilterBPlite # # Copyright (c) 2004 Ensembl # =head1 NAME Bio::EnsEMBL::Analysis::Tools::FilterBPlite =head1 SYNOPSIS my $parser = Bio::EnsEMBL::Analysis::Tools::FilterBPlite-> new( -regex => '^\w+\s+(\w+)' -query_type => 'dna', -database_type => 'pep', -threshold_type => 'PVALUE', -threshold => 0.01, ); my @results = @{$parser->parse_results('blast.out')}; =head1 DESCRIPTION This module inherits from BPliteWrapper so follows the same basic methodology but it implements some prefiltering of the HSPs to mimic how the old pipeline blast runnable was used in the raw computes =head1 CONTACT Post questions to the Ensembl development list: ensembl-dev@ebi.ac.uk =cut package Bio::EnsEMBL::Analysis::Tools::FilterBPlite; use strict; use warnings; use Bio::EnsEMBL::Utils::Exception qw(verbose throw warning); use Bio::EnsEMBL::Utils::Argument qw( rearrange ); use Bio::EnsEMBL::Analysis::Tools::BPliteWrapper; use Bio::EnsEMBL::Analysis::Tools::FeatureFilter; use vars qw (@ISA); @ISA = qw(Bio::EnsEMBL::Analysis::Tools::BPliteWrapper); =head2 new Arg [1] : Bio::EnsEMBL::Analysis::Tools::FilterBPlite Arg [THRESHOLD_TYPE] : string, threshold type Arg [THRESHOLD] : int, threshold Arg [COVERAGE] : int, coverage value Arg [FILTER] : int, boolean toggle as whether to filter Function : create a Bio::EnsEMBL::Analysis::Tools::FilterBPlite object Returntype: Bio::EnsEMBL::Analysis::Tools::FilterBPlite Exceptions: Example : =cut sub new{ my ($class,@args) = @_; my $self = $class->SUPER::new(@args); &verbose('WARNING'); my ($threshold_type, $threshold, $coverage, $filter) = rearrange (['THRESHOLD_TYPE', 'THRESHOLD', 'COVERAGE', 'FILTER'], @args); ###################### #SETTING THE DEFAULTS# ###################### $self->coverage(10); $self->filter(1); ###################### $self->threshold_type($threshold_type); $self->threshold($threshold); $self->coverage($coverage) if(defined($coverage)); $self->filter($filter) if(defined($filter)); return $self; } =head2 Container method Arg [1] : Bio::EnsEMBL::Analysis::Tools::FilterBPlite Arg [2] : string/int Function : container methods, this documents the 4 methods below threshold_type, threshold, coverage, filter Returntype: string/int Exceptions: Example : =cut sub threshold_type{ my $self = shift; $self->{'threshold_type'} = shift if(@_); return $self->{'threshold_type'}; } sub threshold{ my $self = shift; $self->{'threshold'} = shift if(@_); return $self->{'threshold'}; } sub coverage{ my $self = shift; $self->{'coverage'} = shift if(@_); return $self->{'coverage'}; } sub filter{ my $self = shift; $self->{'filter'} = shift if(@_); return $self->{'filter'}; } =head2 get_hsps Arg [1] : Bio::EnsEMBL::Analysis::Tools::FilterBPlite Arg [2] : Bio::EnsEMBL::Analysis::Tools::BPlite Function : prefilter the hsps then parser then and turn them into features Returntype: none Exceptions: throw if no name can be parser from the subject Example : =cut sub get_hsps{ my ($self, $parsers) = @_; my $regex = $self->regex; my @output; my $ids; if($self->filter){ $ids = $self->filter_hits($parsers); } my $seconds = $self->get_parsers($self->filenames); PARSER:foreach my $second(@$seconds){ NAME: while(my $sbjct = $second->nextSbjct){ if($self->filter && !($ids->{$sbjct->name})){ next NAME; } my ($name) = $sbjct->name =~ /$regex/; throw("Error parsing name from ".$sbjct->name." check your ". "blast setup and blast headers") unless($name); HSP: while (my $hsp = $sbjct->nextHSP) { if($self->is_hsp_valid($hsp)){ push(@output, $self->split_hsp($hsp, $name)); } } } } $parsers = []; $self->output(\@output); } =head2 filter_hits Arg [1] : Bio::EnsEMBL::Analysis::Tools::FilterBPlite Arg [2] : Bio::EnsEMBL::Analysis::Tools::BPlite Function : prefilter the blast results using specified thresholds and FeatureFilter Returntype: hashref Exceptions: Example : =cut sub filter_hits{ my ($self, $parsers) = @_; my %ids; my @features; PARSER:foreach my $parser(@$parsers){ SUB:while(my $sbjct = $parser->nextSbjct){ my $name = $sbjct->name; HSP:while (my $hsp = $sbjct->nextHSP) { if($self->is_hsp_valid($hsp)){ my $qstart = $hsp->query->start(); my $hstart = $hsp->subject->start(); my $qend = $hsp->query->end(); my $hend = $hsp->subject->end(); my $qstrand = $hsp->query->strand(); my $hstrand = $hsp->subject->strand(); my $score = $hsp->score; my $p_value = $hsp->P; my $percent = $hsp->percent; my $fp = $self->feature_factory->create_feature_pair ($qstart, $qend, $qstrand, $score, $hstart, $hend, $hstrand, $name, $percent, $p_value); push(@features,$fp); } } } } my $search = Bio::EnsEMBL::Analysis::Tools::FeatureFilter->new ( -coverage => $self->coverage, ); my @newfeatures = @{$search->filter_results(\@features)}; foreach my $f (@newfeatures) { my $id = $f->hseqname; $ids{$id} = 1; } return \%ids; } =head2 is_hsp_valid Arg [1] : Bio::EnsEMBL::Analysis::Tools::FilterBPlite Arg [2] : Bio::EnsEMBL::Analysis::Tools::BPlite::HSP Function : checks hsp against specified threshold returns hsp if above value 0 if not Returntype: Bio::EnsEMBL::Analysis::Tools::BPlite::HSP/0 Exceptions: Example : =cut sub is_hsp_valid{ my ($self, $hsp) = @_; if($self->threshold_type){ if ($self->threshold_type eq "PID") { return 0 if ($hsp->percent < $self->threshold); } elsif ($self->threshold_type eq "SCORE") { return 0 if ($hsp->score < $self->threshold); } elsif ($self->threshold_type eq "PVALUE") { return 0 if($hsp->P > $self->threshold); } } return $hsp; }