Raw content of Bio::EnsEMBL::Analysis::Tools::FragmentTranscriptFilter # Ensembl module for Bio::EnsEMBL::Analysis::Tools::FragmentTranscriptFilter # # Copyright (c) 2004 Ensembl # =head1 NAME Bio::EnsEMBL::Analysis::Tools::FragmentTranscriptFilter =head1 SYNOPSIS my $filter = new Bio::EnsEMBL::Analysis::Tools::FragmentTranscriptFilter-> new->( -coverage => 80, -percent_id => 90, ); my @filtered_results = @{$filter->filter_results(\@results)}; =head1 DESCRIPTION This is a best-in-genome filter is designed for mapping proteins/cDNAs/ESTs to a low-coverage, fragmented genome, where different parts a single transcribed sequence may validly map to different top-level sequences in the target. =cut package Bio::EnsEMBL::Analysis::Tools::FragmentTranscriptFilter; use strict; use warnings; use Bio::EnsEMBL::Root; use Bio::EnsEMBL::Utils::Exception qw(verbose throw warning); use Bio::EnsEMBL::Utils::Argument qw( rearrange ); use vars qw (@ISA); @ISA = qw(Bio::EnsEMBL::Root); =head2 new Returntype: Bio::EnsEMBL::Analysis::Tools::FragmentTranscriptFilter Exceptions: none Example : =cut sub new{ my ($class,@args) = @_; my $self = $class->SUPER::new(@args); &verbose('WARNING'); my ( $min_percent, $min_score, ) = rearrange([ 'PERCENT_ID', 'SCORE', ], @args); ###################### #SETTING THE DEFAULTS# ###################### $self->min_percent($min_percent) if defined $min_percent; $self->min_score($min_score) if defined $min_score; return $self; } #filter methods =head2 filter_results Arg [1] : self Arg [2] : arrayref of Trancripts Function : filter the given Transcruipts in the tried and trusted manner Returntype: arrayref Exceptions: throws if passed nothing or not an arrayref Example : =cut sub filter_results{ my ($self, $transcripts) = @_; my %trans_by_hid; foreach my $tran (@$transcripts) { # transcript will only have one supporting feature for use cases of this filter my ($sf) = @{$tran->get_all_supporting_features}; next if defined($self->min_score) and $sf->score < $self->min_score; next if defined($self->min_percent) and $sf->percent_id < $self->min_percent; push @{$trans_by_hid{$sf->hseqname}}, { tran => $tran, score => $sf->score, }; } my @good_transcripts; foreach my $hid (keys %trans_by_hid) { # sort transcripts by score my @trans = map { $_->{tran} } sort { $b->{score} <=> $a->{score} } @{$trans_by_hid{$hid}}; my (@all_t_sfs); TRANSCRIPT: foreach my $tran (@trans) { my @t_sfs; foreach my $exon (@{$tran->get_all_Exons}) { my ($sf) = @{$exon->get_all_supporting_features}; # check that this does not overlap with any of the previous overlap features foreach my $f (@all_t_sfs) { if ($sf->hstart <= $f->hend and $sf->hend >= $f->hstart) { next TRANSCRIPT; } } push @t_sfs, $sf; } # if we get here, the transcript has passed; push @all_t_sfs, @t_sfs; push @good_transcripts, $tran; } } return \@good_transcripts; } # containers sub min_percent{ my $self = shift; $self->{'_min_percent'} = shift if(@_); return exists($self->{'_min_percent'}) ? $self->{'_min_percent'} : undef; } sub min_score{ my $self = shift; $self->{'_min_score'} = shift if(@_); return exists($self->{'_min_score'}) ? $self->{'_min_score'} : undef; } 1;