Raw content of Bio::EnsEMBL::Analysis::Tools::BPlite::HSP
###############################################################################
# Bio::EnsEMBL::Analysis::Tools::BPlite::HSP
###############################################################################
# HSP = High Scoring Pair (to all non-experts as I am)
#
# The original BPlite.pm module has been written by Ian Korf !
# see http://sapiens.wustl.edu/~ikorf
#
# You may distribute this module under the same terms as perl itself
package Bio::EnsEMBL::Analysis::Tools::BPlite::HSP;
use vars qw(@ISA);
use strict;
# to disable overloading comment this out:
#use overload '""' => '_overload';
# Object preamble - inheriets from Bio::SeqFeature::SimilarityPair
use Bio::SeqFeature::SimilarityPair;
use Bio::SeqFeature::Similarity;
@ISA = qw(Bio::SeqFeature::SimilarityPair);
sub new {
my ($class, @args) = @_;
# workaround to make sure frame is not set before strand is
# interpreted from query/subject info
# this workaround removes the key from the hash
# so the superclass does not try and work with it
# we'll take care of setting it in this module later on
my %newargs = @args;
foreach ( keys %newargs ) {
if( /frame$/i ) {
delete $newargs{$_};
}
}
# done with workaround
my $self = $class->SUPER::new(%newargs);
my ($score,$bits,$match,$positive,$percent,$p,$qb,$qe,$sb,$se,$qs,
$ss,$hs,$qname,$sname,$qlength,$slength, $frame) =
$self->_rearrange([qw(SCORE
BITS
MATCH
POSITIVE
PERCENT
P
QUERYBEGIN
QUERYEND
SBJCTBEGIN
SBJCTEND
QUERYSEQ
SBJCTSEQ
HOMOLOGYSEQ
QUERYNAME
SBJCTNAME
QUERYLENGTH
SBJCTLENGTH
FRAME
)],@args);
# Store the aligned query as sequence feature
if ($qe > $qb) { # normal query: start < end
$self->query( Bio::SeqFeature::Similarity->new
(-start=>$qb, -end=>$qe, -strand=>1,
-source=>"BLAST" ) ) }
else { # reverse query (i dont know if this is possible, but feel free to correct)
$self->query( Bio::SeqFeature::Similarity->new
(-start=>$qe, -end=>$qb, -strand=>-1,
-source=>"BLAST" ) ) }
# store the aligned subject as sequence feature
if ($se > $sb) { # normal subject
$self->subject( Bio::SeqFeature::Similarity->new
(-start=>$sb, -end=>$se, -strand=>1,
-source=>"BLAST" ) ) }
else { # reverse subject: start bigger than end
$self->subject( Bio::SeqFeature::Similarity->new
(-start=>$se, -end=>$sb, -strand=>-1,
-source=>"BLAST" ) ) }
# name the sequences
if($self->query->can('seq_id')){
$self->query->seq_id($qname); # query
$self->subject->seq_id($sname); # subject
}else{
$self->query->seqname($qname); # query
$self->subject->seqname($sname); # subject
}
# set lengths
$self->query->seqlength($qlength); # query
$self->subject->seqlength($slength); # subject
# set object vars
$self->score($score);
$self->bits($bits);
$self->significance($p);
$self->query->frac_identical($match);
$self->subject->frac_identical($match);
# $self->{'PERCENT'} = int( (1000 * $match) / $self->query->length ) / 10;
# $self->{'PERCENT'} = 100 * ($match / $self->subject->length);
$self->{'PERCENT'} = defined($percent) ? $percent : 100 * ($match / $self->subject->length);
$self->{'POSITIVE'} = $positive;
$self->{'QS'} = $qs;
$self->{'SS'} = $ss;
$self->{'HS'} = $hs;
defined $frame && $self->frame($frame);
return $self; # success - we hope!
}
# to disable overloading comment this out:
sub _overload {
my $self = shift;
return $self->start."..".$self->end." ".$self->bits;
}
=head2 P
Title : P
Usage : $hsp->P();
Function : returns the P (significance) value for a HSP
Example :
Returns : (double) significance value
Args :
=cut
sub P {
my ($self, @args) = @_;
my $float = $self->significance(@args);
my $match = '([+-]?)(?=\d|\.\d)\d*(\.\d*)?([Ee]([+-]?\d+))?'; # Perl Cookbook 2.1
if ($float =~ /^$match$/) {
# Is a C float
return $float;
} elsif ("1$float" =~ /^$match$/) {
# Almost C float, Jitterbug 974
return "1$float";
} else {
$self->warn("[HSP::P()] '$float' is not a known number format. Returning zero (0) instead.");
return 0;
}
}
=head2 percent
Title : percent
Usage : $hsp->percent();
Function : returns the percent matching
Returns : (double) percent matching
Args : none
=cut
sub percent {shift->{'PERCENT'}}
=head2 match
Title : match
Usage : $hsp->match();
Function : returns the match
Example :
Returns : (double) frac_identical
Args :
=cut
sub match {shift->query->frac_identical(@_)}
=head2 positive
Title : positive
Usage : $hsp->positive();
Function : returns the number of positive hits
Returns : (int) number of positive residue hits
Args : none
=cut
sub positive {shift->{'POSITIVE'}}
=head2 querySeq
Title : querySeq
Usage : $hsp->querySeq();
Function : returns the query sequence
Returns : (string) the Query Sequence
Args : none
=cut
sub querySeq {shift->{'QS'}}
=head2 sbjctSeq
Title : sbjctSeq
Usage : $hsp->sbjctSeq();
Function : returns the Sbjct sequence
Returns : (string) the Sbjct Sequence
Args : none
=cut
sub sbjctSeq {shift->{'SS'}}
=head2 homologySeq
Title : homologySeq
Usage : $hsp->homologySeq();
Function : returns the homologous sequence
Returns : (string) homologous sequence
Args : none
=cut
sub homologySeq {shift->{'HS'}}
=head2 qs
Title : qs
Usage : $hsp->qs();
Function : returns the Query Sequence (same as querySeq)
Returns : (string) query Sequence
Args : none
=cut
sub qs {shift->{'QS'}}
=head2 ss
Title : ss
Usage : $hsp->ss();
Function : returns the subject sequence ( same as sbjctSeq)
Returns : (string) Sbjct Sequence
Args : none
=cut
sub ss {shift->{'SS'}}
=head2 hs
Title : hs
Usage : $hsp->hs();
Function : returns the Homologous Sequence (same as homologySeq )
Returns : (string) Homologous Sequence
Args : none
=cut
sub hs {shift->{'HS'}}
sub frame {
my ($self, $frame) = @_;
if( defined $frame ) {
if( $frame == 0 ) {
$frame = undef;
} elsif( $frame !~ /^([+-])?([1-3])/ ) {
$self->warn("Specifying an invalid frame ($frame)");
$frame = undef;
} else {
# JB 949 - Creates too many warnings for blastx report.
# Future enhancement to BPLite::_parseHeader needed to set report type
# so that subject strand is used with tblastn and query strand with blastx
# if( ($1 eq '-' && $self->subject->strand >= 0) ||
# ($1 eq '+' && $self->subject->strand <= 0) ) {
# $self->warn("Frame ($frame) did not match strand of query match (".
# $self->subject->strand().")");
# }
# Set frame to GFF [0-2]
$frame = $2 - 1;
}
}
return $self->SUPER::frame($frame);
}
1;