Raw content of Bio::EnsEMBL::Compara::Filter::Greedy
package Bio::EnsEMBL::Compara::Filter::Greedy;
use vars qw(@ISA);
use strict;
#use Bio::EnsEMBL::DnaDnaAlignFeature;
=head2 filter
Title : filter
Usage : filter(\@)
Function: Clean the Array of Bio::EnsEMBL::FeaturePairs in three steps,
First, determines the highest scored hit, and fix the expected strand hit
Second, hits on expected strand are kept if they do not overlap,
either on query or subject sequence, previous strored, higher scored hits.
If hit goes trough the second step, the third test makes sure that the hit
is coherent position according to previous ones.
Returns : Array reference of Bio::EnsEMBL::FeaturePairs
Args : Array reference of Bio::EnsEMBL::FeaturePairs
=cut
sub filter {
my ($self,$DnaDnaAlignFeatures) = @_;
my @{$DnaDnaAlignFeatures} = sort {$b->score <=> $a->score} @{$DnaDnaAlignFeatures};
my @DnaDnaAlignFeatures_filtered;
my $ref_strand;
foreach my $fp (@{$DnaDnaAlignFeatures}) {
if ($fp->strand < 0) {
$fp->reverse_complement;
}
if (! scalar @DnaDnaAlignFeatures_filtered) {
push @DnaDnaAlignFeatures_filtered, $fp;
$ref_strand = $fp->hstrand;
next;
}
next if ($fp->hstrand != $ref_strand);
my $add_fp = 1;
foreach my $feature_filtered (@DnaDnaAlignFeatures_filtered) {
my ($start,$end,$hstart,$hend) = ($feature_filtered->start,$feature_filtered->end,$feature_filtered->hstart,$feature_filtered->hend);
if (($fp->start >= $start && $fp->start <= $end) ||
($fp->end >= $start && $fp->end <= $end) ||
($fp->hstart >= $hstart && $fp->hstart <= $hend) ||
($fp->hend >= $hstart && $fp->hend <= $hend)) {
$add_fp = 0;
last;
}
if ($ref_strand == 1) {
unless (($fp->start > $end && $fp->hstart > $hend) ||
($fp->end < $start && $fp->hend < $hend)) {
$add_fp = 0;
last;
}
} elsif ($ref_strand == -1) {
unless (($fp->start > $end && $fp->hstart < $hend) ||
($fp->end < $start && $fp->hend > $hend)) {
$add_fp = 0;
last;
}
}
}
push @DnaDnaAlignFeatures_filtered, $fp if ($add_fp);
}
return \@DnaDnaAlignFeatures_filtered;
}
1;