Bio::EnsEMBL::Analysis::Tools ClusterFilter
SummaryIncluded librariesPackage variablesSynopsisDescriptionGeneral documentationMethods
Toolbar
WebCvsRaw content
Summary
  Bio::EnsEMBL::Analysis::Tools::ClusterFilter
Package variables
No package variables defined.
Included modules
Bio::EnsEMBL::Root
Bio::EnsEMBL::Utils::Argument qw ( rearrange )
Bio::EnsEMBL::Utils::Exception qw ( verbose throw warning )
Inherit
Bio::EnsEMBL::Root
Synopsis
  my $filter = new Bio::EnsEMBL::Analysis::Tools::ClusterFilter
new->(
-coverage => 80,
-percent_id => 90,
-best_in_genome => 1,
);
my @filtered_results = @{$filter->filter_results(\@results)};
Description
This is the standard module used for filtering WGA2GenesDirect transcripts. It takes all the transcript alignments riginated from one gene
and cluster them together by location, it then selects the best cluster based in coverage and percent id of the transcripts within each cluster.
It was originally used to avoid transcript from one gene to be allocated in different chromosomes in the projection process (which would have no sense).
Methods
_get_transcript_coverage
No description
Code
_get_transcript_evidence_id
No description
Code
_get_transcript_percent_id
No description
Code
best_in_genome
No description
Code
filter_resultsDescriptionCode
max_stops
No description
Code
min_coverage
No description
Code
min_percent
No description
Code
newDescriptionCode
Methods description
filter_resultscode    nextTop
  Arg [1]   : Bio::EnsEMBL::Analysis::Tools::ClusterFilter
Arg [2] : arrayref of Trancripts
Function : filter the given Transcruipts in the tried and trusted manner
Returntype: arrayref
Exceptions: throws if passed nothing or not an arrayref
Example :
newcodeprevnextTop
  Returntype: Bio::EnsEMBL::Analysis::Tools::ClusterFilter
Exceptions: none
Example :
Methods code
_get_transcript_coveragedescriptionprevnextTop
sub _get_transcript_coverage {
  my ($self,$tran) = @_;

  if (@{$tran->get_all_supporting_features} and
      defined $tran->get_all_supporting_features->[0]->hcoverage) {
    my ($evi) = @{$tran->get_all_supporting_features};
    return $evi->hcoverage;
  } else {
    my @exons = @{$tran->get_all_Exons};
    my ($evi) = @{$exons[0]->get_all_supporting_features};
    return $evi->score;
  }
}

############################################################
}
_get_transcript_evidence_iddescriptionprevnextTop
sub _get_transcript_evidence_id {
  my ($self,$tran) = @_;

  my ($sf);

  if (@{$tran->get_all_supporting_features}) {
    ($sf) = @{$tran->get_all_supporting_features};
  } else {
    my @exons = @{$tran->get_all_Exons};
    ($sf) = @{$exons[0]->get_all_supporting_features};    
  }
  
  return $sf->hseqname;
}

############################################################
# containers
}
_get_transcript_percent_iddescriptionprevnextTop
sub _get_transcript_percent_id {
  my ($self,$tran) = @_;

  my ($sf);

  if (@{$tran->get_all_supporting_features}) {
    ($sf) = @{$tran->get_all_supporting_features};
  } else {
    my @exons = @{$tran->get_all_Exons};
    ($sf) = @{$exons[0]->get_all_supporting_features};    
  }

  return $sf->percent_id;
}

############################################################
}
best_in_genomedescriptionprevnextTop
sub best_in_genome {
  my $self = shift;
  $self->{'_best_in_genome'} = shift if(@_);

  return exists($self->{'_best_in_genome'}) ? $self->{'_best_in_genome'} : undef;
}


1;
}
filter_resultsdescriptionprevnextTop
sub filter_results {
  my ($self, $transcripts) = @_;
  
  # results are Bio::EnsEMBL::Transcripts with exons and supp_features
my @good_matches; TRANSCRIPT: foreach my $transcript (@$transcripts ){ my $coverage = $self->_get_transcript_coverage($transcript); my $percent_id = $self->_get_transcript_percent_id($transcript); my $pep = $transcript->translate->seq; my $num_stops = $pep =~ tr/\*/\*/; if (defined $self->min_percent and $percent_id < $self->min_percent) { print "Transcript REJECTED with: perc_ident: $percent_id\n"; next TRANSCRIPT; } if (defined $self->min_coverage and $coverage < $self->min_coverage){ print "Transcript REJECTED with: coverage: $coverage\n"; next TRANSCRIPT; } if (defined $self->max_stops and $num_stops > $self->max_stop){ print "Rejecting proj due to too many STOPS ",$num_stops,"\n"; next TRANSCRIPT; } print "Transcripts passed filter (cov=$coverage, per=$percent_id, stops=$num_stops\n"; push @good_matches, $transcript; } my @best_matches; if ($self->best_in_genome == 1){ @good_matches = sort { $a->slice->seq_region_name cmp $b->slice->seq_region_name or $a->start <=> $b->start } @good_matches; my @c; foreach my $t (@good_matches) { if (not @c or $c[-1]->{name} ne $t->slice->seq_region_name or $c[-1]->{end} < $t->start) { push @c, { name => $t->slice->seq_region_name, start => $t->start, end => $t->end, trans => [$t], }; } else { push @{$c[-1]->{trans}}, $t; if ($t->{end} > $c[-1]->{end}) { $c[-1]->{end} = $t->end; } } } #####################
my ($best_c, $best_cov); foreach my $c (@c) { my @trans = @{$c->{trans}}; my ($total_cov, $total_pid); foreach my $t (@trans) { my ($sf) = @{$t->get_all_supporting_features}; my $cov = $sf->score; my $pid = $sf->percent_id; $total_cov += $cov; $total_pid += $pid; } $total_cov /= scalar(@trans);
$total_pid /= scalar(@trans);
$c->{average_coverage} = $total_cov; $c->{average_percent_id} = $total_pid; } @c = sort { $b->{average_coverage} <=> $a->{average_coverage} or $b->{average_percent_id} <=> $a->{average_percent_id} } @c; if (@c && @{$c[0]->{trans}}){ my $best_c = shift @c; push @best_matches, @{$best_c->{trans}}; foreach my $oc (@c) { if ($oc->{average_coverage} >= $best_c->{average_coverage} and $oc->{average_percent_id} >= $best_c->{average_percent_id}) { push @best_matches, @{$oc->{trans}}; } } } } return\@ best_matches; } ############################################################
}
max_stopsdescriptionprevnextTop
sub max_stops {
  my $self = shift;
  $self->{'_max_stops'} = shift if(@_);

  return exists($self->{'_max_stops'}) ? $self->{'_max_stops'} : undef;
}
min_coveragedescriptionprevnextTop
sub min_coverage {
  my $self = shift;
  $self->{'_min_coverage'} = shift if(@_);

  return exists($self->{'_min_coverage'}) ? $self->{'_min_coverage'} : undef;
}
min_percentdescriptionprevnextTop
sub min_percent {
  my $self = shift;
  $self->{'_min_percent'} = shift if(@_);

  return exists($self->{'_min_percent'}) ? $self->{'_min_percent'} : undef;
}
newdescriptionprevnextTop
sub new {
  my ($class,@args) = @_;
  my $self = $class->SUPER::new(@args);
  &verbose('WARNING');
  my ($min_coverage,
      $min_percent,
      $max_stops,
      $best_in_genome,
      ) = 
        rearrange(['coverage',
                   'percent_id',
                   'max_stops',
                   'best_in_genome'], @args); 

  ######################
#SETTING THE DEFAULTS#
######################
$self->min_coverage($min_coverage) if defined $min_coverage; $self->min_percent($min_percent) if defined $min_percent; $self->max_stops($max_stops) if defined $max_stops; $self->best_in_genome($best_in_genome) if $best_in_genome; return $self; } #filter methods
}
General documentation
No general documentation available.