Bio::EnsEMBL::Analysis::Tools
ExonerateTranscriptFilter
Toolbar
Summary
Bio::EnsEMBL::Analysis::Tools::ExonerateTranscriptFilter
Package variables
No package variables defined.
Included modules
Inherit
Synopsis
my $filter = new Bio::EnsEMBL::Analysis::Tools::ExonerateTranscriptFilter
new->(
-best_in_genome => 1,
-reject_processed_pseudos => 1,
-coverage => 80,
-percent_id => 90,
);
my @filtered_results = @{$filter->filter_results(\@results)};
Description
This is the standard module used for filtering Exonerate transcripts
Methods
_get_transcript_coverage | No description | Code |
_get_transcript_evidence_id | No description | Code |
_get_transcript_percent_id | No description | Code |
_transcript_is_spliced | No description | Code |
best_in_genome | No description | Code |
filter_results | Description | Code |
min_coverage | No description | Code |
min_percent | No description | Code |
new | Description | Code |
reject_processed_pseudos | No description | Code |
Methods description
Arg [1] : Bio::EnsEMBL::Analysis::Tools::DefaultExonerateFilter Arg [2] : arrayref of Trancripts Function : filter the given Transcruipts in the tried and trusted manner Returntype: arrayref Exceptions: throws if passed nothing or not an arrayref Example : |
Returntype: Bio::EnsEMBL::Analysis::Tools::ExonerateTranscriptFilter Exceptions: none Example : |
Methods code
_get_transcript_coverage | description | prev | next | Top |
sub _get_transcript_coverage
{ my ($self,$tran) = @_;
if (@{$tran->get_all_supporting_features} and
defined $tran->get_all_supporting_features->[0]->hcoverage) {
my ($evi) = @{$tran->get_all_supporting_features};
return $evi->hcoverage;
} else {
my @exons = @{$tran->get_all_Exons};
my ($evi) = @{$exons[0]->get_all_supporting_features};
return $evi->score;
}
}
} |
sub _get_transcript_evidence_id
{ my ($self,$tran) = @_;
my ($sf);
if (@{$tran->get_all_supporting_features}) {
($sf) = @{$tran->get_all_supporting_features};
} else {
my @exons = @{$tran->get_all_Exons};
($sf) = @{$exons[0]->get_all_supporting_features};
}
return $sf->hseqname;
}
} |
sub _get_transcript_percent_id
{ my ($self,$tran) = @_;
my ($sf);
if (@{$tran->get_all_supporting_features}) {
($sf) = @{$tran->get_all_supporting_features};
} else {
my @exons = @{$tran->get_all_Exons};
($sf) = @{$exons[0]->get_all_supporting_features};
}
return $sf->percent_id;
}
} |
sub _transcript_is_spliced
{ my ($self, $tran) = @_;
my @exons = sort { $a->start <=> $b->start } @{$tran->get_all_Exons};
if ( scalar (@exons) > 1 ){
for(my $i=0; $i < @exons - 1; $i++){
my $intron_len = $exons[$i+1]->start - $exons[$i]->end - 1;
if ( $intron_len > 9 ){
return 1;
}
}
}
return 0;
}
} |
sub best_in_genome
{ my $self = shift;
$self->{'_best_in_genome'} = shift if(@_);
return exists($self->{'_best_in_genome'}) ? $self->{'_best_in_genome'} : 0; } |
sub filter_results
{ my ($self, $transcripts) = @_;
my @good_matches;
my %matches;
TRAN:
foreach my $transcript (@$transcripts ){
my $coverage = $self->_get_transcript_coverage($transcript);
my $percent_id = $self->_get_transcript_percent_id($transcript);
my $id = $self->_get_transcript_evidence_id($transcript);
push @{$matches{$id}}, {
transcript => $transcript,
coverage => $coverage,
percent_id => $percent_id,
num_exons => scalar(@{$transcript->get_all_Exons}),
is_spliced => $self->_transcript_is_spliced($transcript),
};
}
my %matches_sorted_by_coverage;
my %selected_matches;
QUERY:
foreach my $query_id ( keys( %matches ) ){
@{$matches_sorted_by_coverage{$query_id}} =
sort { $b->{coverage} <=> $a->{coverage} or
$b->{num_exons} <=> $a->{num_exons} or
$b->{percent_id} <=> $a->{percent_id} } @{$matches{$query_id}};
my $max_coverage;
my $perc_id_of_best;
my $count = 0;
my $splices_elsewhere = 0;
my $best_has_been_seen = 0;
my $best_transcript = ${$matches_sorted_by_coverage{$query_id}}[0]->{transcript};
my $best_start = $best_transcript->start;
my $best_end = $best_transcript->end;
my $best_slice;
if (!defined $best_transcript->slice) {
$best_slice = $best_transcript->start_Exon->seqname;
} else {
$best_slice = $best_transcript->slice;
}
TRANSCRIPT:
foreach my $hit ( @{$matches_sorted_by_coverage{$query_id}} ){
$count++;
my ($accept, $label);
my $transcript = $hit->{transcript};
my $strand = $transcript->strand;
my $coverage = $hit->{coverage};
my $percent_id = $hit->{percent_id};
my $is_spliced = $hit->{is_spliced};
my $transcript_slice;
if (!defined $transcript->slice) {
$transcript_slice = $transcript->start_Exon->seqname;
} else {
$transcript_slice = $transcript->slice;
}
unless ($max_coverage){
$max_coverage = $coverage;
}
unless ( $perc_id_of_best ){
$perc_id_of_best = $percent_id;
}
if ( $count == 1 ){
$label = 'best_match';
} elsif ( $count > 1 &&
$splices_elsewhere &&
! $is_spliced) {
$label = 'potential_processed_pseudogene';
} else{
$label = $count;
}
if ( $count == 1 && $is_spliced ){
$splices_elsewhere = 1;
}
if ( $self->best_in_genome ){
if ($coverage == $max_coverage &&
(($coverage >= $self->min_coverage &&
$percent_id >= $self->min_percent)
||
($coverage >= (1 + 5/100) * $self->min_coverage && $percent_id >= (1 - 3/100) * $self->min_percent))) {
if ( $self->reject_processed_pseudos
&& $count > 1
&& $splices_elsewhere
&& ! $is_spliced) {
$accept = 'NO';
}
elsif ($best_slice eq $transcript_slice &&
$best_start > $transcript->start &&
$best_end < $transcript->end){
$accept = 'NO';
}
else {
$accept = 'YES';
push( @good_matches, $transcript);
}
}
else{
$accept = 'NO';
}
}
else{
if ($coverage >= (0.98 * $max_coverage) &&
(($coverage >= $self->min_coverage &&
$percent_id >= $self->min_percent)
||
($coverage >= (1 + 5/100) * $self->min_coverage && $percent_id >= (1 - 3/100) * $self->min_percent))) {
if ( $self->reject_processed_pseudos &&
$count > 1 &&
$splices_elsewhere &&
! $is_spliced) {
$accept = 'NO';
}
else{
$accept = 'YES';
push( @good_matches, $transcript);
}
}
else{
$accept = 'NO';
}
}
}
}
return\@ good_matches;
}
} |
sub min_coverage
{ my $self = shift;
$self->{'_min_coverage'} = shift if(@_);
return exists($self->{'_min_coverage'}) ? $self->{'_min_coverage'} : undef; } |
sub min_percent
{ my $self = shift;
$self->{'_min_percent'} = shift if(@_);
return exists($self->{'_min_percent'}) ? $self->{'_min_percent'} : undef; } |
sub new
{ my ($class,@args) = @_;
my $self = $class->SUPER::new(@args);
&verbose('WARNING');
my ($min_coverage,
$min_percent,
$best_in_genome,
$rpp) =
rearrange([
'COVERAGE',
'PERCENT_ID',
'BEST_IN_GENOME',
'REJECT_PROCESSED_PSEUDOS',], @args);
if (defined ($min_coverage)) {
$self->min_coverage($min_coverage);
} elsif ( !defined($self->min_coverage) ) {
warn("\n\tmin_coverage not set, setting it to zero (0)!\n\n$!");
$self->min_coverage(0);
}
if (defined ($min_percent)) {
$self->min_percent($min_percent);
} elsif (!defined($self->min_percent)) {
warn("\n\tmin_percent not set, setting it to zero (0)!\n\n$!");
$self->min_percent(0);
}
if (defined ($best_in_genome)) {
$self->best_in_genome($best_in_genome);
} elsif (!defined($self->best_in_genome)) {
warn("\n\tbest_in_genome not set, setting it to one (1)!\n\n$!");
$self->best_in_genome(1);
}
if (defined ($rpp)) {
$self->reject_processed_pseudos($rpp);
} elsif (!defined($self->reject_processed_pseudos)) {
warn("\n\treject_processed_pseudos, setting it to one (1)!\n\n$!");
$self->reject_processed_pseudos(1);
}
return $self;
}
} |
reject_processed_pseudos | description | prev | next | Top |
sub reject_processed_pseudos
{ my $self = shift;
$self->{'_reject_processed_pseudos'} = shift if(@_);
return exists($self->{'_reject_processed_pseudos'}) ? $self->{'_reject_processed_pseudos'} : 0;
}
1; } |
General documentation
No general documentation available.