Bio::EnsEMBL::Analysis::Tools::Pmatch
First_PMF
Toolbar
Package variables
No package variables defined.
Included modules
Inherit
Synopsis
No synopsis!
Description
No description!
Methods
Methods description
Title : make_coord_pair Usage : Function: Example : Returns : Args : |
Title : make_mergelist Usage : Function: Example : Returns : Args : |
Title : merge_hits Usage : Function: Example : Returns : Args : |
Title : new_merged_hit Usage : Function: Example : Returns : Args : |
Title : prune Usage : Function: Example : Returns : Args : |
Methods code
sub extend_hits
{ my ($self, $hits) = @_;
my @fhits;
my @rhits;
my @newhits;
foreach my $mh(@$hits){
if($mh->strand == 1){
push (@fhits, $mh);
}
else {
push (@rhits, $mh);
}
}
@fhits = sort {$a->qstart <=> $b->qstart} @fhits;
@rhits = sort {$b->qstart <=> $a->qstart} @rhits;
while(scalar(@fhits)){
my $hit = shift(@fhits);
push (@newhits, $hit);
foreach my $sh(@fhits){
die ("Argh!") unless $hit->strand == $sh->strand;
last if ($hit->qend+$self->{_maxintronlen} < $sh->qstart);
if ($sh->tstart > $hit->tend &&
$sh->tend > $hit->tend &&
abs($sh->tstart - $hit->tend) <= 3 &&
$sh->qstart >= $hit->qend
) {
$hit->subsume_MergedHit($sh);
}
}
}
while(scalar(@rhits)){
my $hit = shift(@rhits);
push (@newhits, $hit);
foreach my $sh(@rhits){
die ("Argh!") unless $hit->strand == $sh->strand;
last if ($hit->qend-$self->{_maxintronlen} > $sh->qstart);
if ($sh->tstart > $hit->tend &&
$sh->tend > $hit->tend &&
abs($sh->tstart - $hit->tend) <= 3 &&
$hit->qend >= $sh->qstart
) {
$hit->subsume_MergedHit($sh);
}
}
}
return (\@newhits); } |
sub make_coord_pair
{ my ($self) = @_;
my @cols = split();
my $protid = $cols[5];
if($protid =~ /\w+\|\w+\|\w+\|([\w\.]+)\|/) {
$protid = $1;
}
my $strand = 1;
if ( $cols[3] < $cols[2] ) { $strand=-1; }
my $cp = new Bio::EnsEMBL::Analysis::Tools::Pmatch::CoordPair(
-query => $cols[1],
-target => $protid,
-qstart => $cols[2],
-qend => $cols[3],
-tstart => $cols[6],
-tend => $cols[7],
-percent=> $cols[8],
-strand => $strand,
);
my $proteins = $self->proteins;
my $ph = $$proteins{$protid};
if (!defined $ph) {
$ph = new Bio::EnsEMBL::Analysis::Tools::Pmatch::ProteinHit(-id=>$protid);
$$proteins{$protid} = $ph;
}
$self->proteins($proteins);
my $ch = $ph->get_ContigHit($cols[1]);
if (!defined $ch) {
$ch = Bio::EnsEMBL::Analysis::Tools::Pmatch::ContigHit->new(-id => $cols[1]);
$ph->add_ContigHit($ch);
}
$ch->add_CoordPair($cp); } |
sub make_mergelist
{ my ($self, $ph) = @_;
my @hits = ();
foreach my $ch($ph->each_ContigHit()) {
my @cps = @{$ch->each_ForwardPair()};
@cps = sort {$a->qstart <=> $b->qstart} @cps;
my @mps = @{$ch->each_ReversePair()};
@mps = sort {$b->qstart <=> $a->qstart} @mps;
push (@cps,@mps);
my $first = shift(@cps);
my $mh = $self->new_merged_hit($first);
push(@hits,$mh);
my $prev = $hits[$#hits];
my $prev_cp = $first;
CP:
foreach my $cp(@cps) {
my $strand = 1;
if ($cp->qend() < $cp->qstart()) { $strand = -1; }
if( $strand == $prev->strand &&
( ($cp->tstart >= $prev_cp->tend) ||
(($prev_cp->tend - $cp->tstart) <= 3)) &&
$cp->qstart*$strand >= $prev_cp->qend*$strand
)
{
my $coverage = $cp->tend - $cp->tstart + 1;
$coverage += $prev->coverage();
my $diff = $prev_cp->tend - $cp->tstart;
if($diff >=0) {
$diff++;
$coverage -= $diff;
}
$prev->coverage($coverage);
$prev->add_CoordPair($cp);
$prev_cp = $cp;
}
else {
my $mh = $self->new_merged_hit($cp);
push(@hits,$mh);
$prev = $hits[$#hits];
$prev_cp = $cp;
}
}
}
@hits = @{$self->extend_hits(\@hits)};
my $plengths = $self->plengths;
my $protlen = $$plengths{$ph->id};
warn "No length for " . $ph->id . "\n" unless $protlen;
return unless $protlen;
foreach my $hit(@hits) {
my $percent = $hit->coverage;
$percent *= 100;
$percent /= $protlen; $percent=sprintf("%.1f",$percent);
$hit->coverage($percent);
}
return\@ hits; } |
sub maxintronlen
{ my ($self, $maxintronlen) = @_;
if(defined($maxintronlen)){
if ($maxintronlen =~ /\d+/) {
$self->{_maxintronlen} = $maxintronlen;
}
else {
throw("[$maxintronlen] is not numeric.");
}
}
return $self->{_maxintronlen}; } |
sub merge_hits
{ my ($self) = @_;
my @merged;
my $protref = $self->proteins;
foreach my $p_hit(values %$protref) {
my @allhits = @{$self->make_mergelist($p_hit)};
my @chosen = @{$self->prune(\@allhits)};
push(@merged,@chosen);
}
return\@ merged; } |
sub min_coverage
{ my ($self, $min_coverage) = @_;
if(defined($min_coverage)){
if ($min_coverage =~ /\d+/) {
$self->{_min_coverage} = $min_coverage;
}
else {
throw("[$min_coverage] is not numeric.");
}
}
return $self->{_min_coverage}; } |
sub new
{ my ($class, @args) = @_;
my $self = bless {}, $class;
my ($plengths, $maxintronlen, $min_coverage) =
rearrange(['PLENGTHS','MAXINTRONLEN', 'MIN_COVERAGE'], @args);
$self->min_coverage(25);
$self->maxintronlen(50000);
$self->plengths($plengths);
$self->maxintronlen($maxintronlen);
$self->min_coverage($min_coverage);
throw("No protlengths data") unless $self->plengths;
throw("No maximum intron length") unless defined $self->maxintronlen;
return $self; } |
sub new_merged_hit
{ my ($self,$cp) = @_;
my $coverage = $cp->tend - $cp->tstart + 1;
my $mh = new Bio::EnsEMBL::Analysis::Tools::Pmatch::MergedHit( -query => $cp->query(),
-target => $cp->target(),
-strand => $cp->strand(),
-coverage => $coverage,
);
$mh->add_CoordPair($cp);
return $mh; } |
sub plengths
{ my ($self, $plengths) = @_;
if(defined($plengths)){
if (ref($plengths) eq "HASH") {
$self->{_plengths} = $plengths;
}
else {
throw("[$plengths] is not a hash ref.");
}
}
return $self->{_plengths}; } |
sub proteins
{ my ($self, $proteins) = @_;
if(!$self->{_proteins}){
$self->{_proteins} = {};
}
if(defined($proteins)){
if (ref($proteins) eq "HASH") {
$self->{_proteins} = $proteins;
}
else {
throw("[$proteins] is not a hash ref.");
}
}
return $self->{_proteins}; } |
sub prune
{ my ($self, $all) = @_;
my @chosen = ();
my $lower_threshold = $self->min_coverage;
my @all = sort {$b->coverage <=> $a->coverage} @$all;
my $first = shift(@all);
if($first->coverage < $lower_threshold){
return\@ chosen;
}
push (@chosen,$first);
my $curr_pc = $first->coverage() - 2;
PRUNE:
foreach my $hit(@all) {
last PRUNE if $hit->coverage < $lower_threshold;
last PRUNE if $hit->coverage < $curr_pc;
push (@chosen,$hit);
}
return\@ chosen;
}
1; } |
General documentation
No general documentation available.