This can be used as a default filter for ProteinAnnotation. It
removes overlaps in a "sensible" way.
sub cut
{ my ($self, $val) = @_;
if (defined $val) {
$self->{_cut} = $val;
}
return $self->{_cut}; } |
sub filter_results
{ my ($self, $res) = @_;
my $sort_routine = $self->sort_function;
my (%res_by_name, @all_fps);
foreach my $f (@$res) {
push @{$res_by_name{$f->seqname}->{$f->hseqname}}, $f;
}
foreach my $seqname (keys %res_by_name) {
my @kept_hits;
foreach my $fam (keys %{$res_by_name{$seqname}}) {
my @hits_so_far;
my @hits = sort $sort_routine @{$res_by_name{$seqname}->{$fam}};
foreach my $h (@hits) {
my $overlap = 0;
foreach my $kh (@hits_so_far) {
if ($h->start <= $kh->end and
$h->end >= $kh->start) {
$overlap = 1;
last;
}
}
if (not $overlap) {
push @hits_so_far, $h;
}
}
push @kept_hits, @hits_so_far;
}
@kept_hits = sort $sort_routine @kept_hits;
if (not $self->cut) {
my @hits_so_far;
foreach my $h (@kept_hits) {
my $overlap = 0;
foreach my $kh (@hits_so_far) {
if ($h->start <= $kh->end and
$h->end >= $kh->start) {
$overlap = 1;
last;
}
}
if (not $overlap) {
push @hits_so_far, $h;
}
}
push @all_fps, @hits_so_far;
} else {
my @hit_string;
foreach my $h (reverse @kept_hits) {
foreach my $idx ($h->start .. $h->end) {
$hit_string[$idx] = $h;
}
}
my $max = scalar(@hit_string) - 1;
my @domains;
foreach my $idx(1 .. $max) {
if (defined($hit_string[$idx])) {
if (not @domains or
$domains[-1]->{end} < $idx - 1 or
$hit_string[$idx] != $domains[-1]->{domain}) {
push @domains, {
start => $idx,
end => $idx,
domain => $hit_string[$idx],
};
} elsif ($hit_string[$idx] == $domains[-1]->{domain}) {
$domains[-1]->{end} = $idx;
}
}
}
my @fps;
foreach my $dom (@domains) {
my $f = $dom->{domain};
my $st = $dom->{start};
my $en = $dom->{end};
if ($st == $f->start and
$en == $f->end) {
push @fps, $f;
} else {
my $newfp = Bio::EnsEMBL::ProteinFeature->new(-seqname => $f->seqname,
-start => $st,
-end => $en,
-hseqname => $f->hseqname,
-hstart => $f->hstart,
-hend => $f->hend,
-score => $f->score,
-percent_id => $f->percent_id,
-analysis => $f->analysis);
push @fps, $newfp;
}
}
push @all_fps, @fps;
}
}
return\@ all_fps; } |
sub new
{ my ($caller, @args) = @_;
my $class = ref($caller) || $caller;
my $self = bless({}, $class);
my ($sort_func,
$cut) = rearrange(['sort',
'cut',
], @args);
if (defined $sort_func) {
$self->sort_function($sort_func);
}
if (defined $cut) {
$self->cut($cut);
}
return $self; } |
sub sort_function
{ my ($self, $func) = @_;
if (defined $func) {
$self->{_sort_func} = $func;
}
if (defined $self->{_sort_func}) {
return $self->{_sort_func};
} else {
my $ref = sub { $b->score <=> $a->score };
return $ref;
}
}
1; } |