See
Bio::Tools::BPlite for a more detailed information about the
BPlite BLAST parsing objects.
The original BPlite.pm module has been written by Ian Korf!
See
http://sapiens.wustl.edu/~ikorfThe Sbjct object encapsulates a Hit in a Blast database
search. The Subjects are the "Hits" for a particular query. A
Subject may be made up of multiple High Scoring Pairs (HSP) which are
accessed through the nextHSP method.
If you are searching for the P-value or percent identity that is
specific to each HSP and you will need to use the nextHSP method to
get access to that data.
sub nextHSP
{ my ($self) = @_;
return undef if $self->{'HSP_ALL_PARSED'};
my ($qframe, $sframe);
my $scoreline = $self->_readline();
my $nextline = $self->_readline();
return undef if not defined $nextline;
$scoreline .= $nextline;
my ($score, $bits);
if ($scoreline =~ /\d bits\)/) {
($score, $bits) = $scoreline =~
/Score = (\d+) \((\S+) bits\)/; }
else {
($bits, $score) = $scoreline =~
/Score =\s+(\S+) bits \((\d+)/; }
my ($match, $hsplength) = ($scoreline =~ /Identities = (\d+)\/(\d+)/);
my ($positive) = ($scoreline =~ /Positives = (\d+)/);
my ($gaps) = ($scoreline =~ /Gaps = (\d+)/);
if($self->report_type() eq 'TBLASTX') {
($qframe, $sframe) = $scoreline =~ /Frame =\s+([+-]\d)\s+\/\s+([+-]\d)/;
} elsif ($self->report_type() eq 'TBLASTN') {
($sframe) = $scoreline =~ /Frame =\s+([+-]\d)/;
} else {
($qframe) = $scoreline =~ /Frame =\s+([+-]\d)/;
}
$positive = $match if not defined $positive;
$gaps = '0' if not defined $gaps;
my ($p) = ($scoreline =~ /[Sum ]*P[\(\d+\)]* = (\S+)/);
unless (defined $p) {(undef, $p) = $scoreline =~ /Expect(\(\d+\))? =\s+(\S+)/}
my ($exp) = ($scoreline =~ /Expect(?:\(\d+\))? =\s+([^\s,]+)/);
$exp = -1 unless( defined $exp );
$self->throw("Unable to parse '$scoreline'") unless defined $score;
my (@hspline);
while( defined($_ = $self->_readline()) ) {
if ($_ =~ /^WARNING:|^NOTE:/) {
while(defined($_ = $self->_readline())) {last if $_ !~ /\S/}
}
elsif ($_ !~ /\S/) {next}
elsif ($_ =~ /Strand HSP/) {next} elsif ($_ =~ /^\s*Strand/) {next} elsif ($_ =~ /^\s*Score/) {$self->_pushback($_); last}
elsif ($_ =~ /^>|^Histogram|^Searching|^Parameters|^\s+Database:|^CPU\stime|^\s*Lambda/)
{
$self->_pushback($_);
$self->{'HSP_ALL_PARSED'} = 1;
last;
}
elsif( $_ =~ /^\s*Frame/ ) {
if ($self->report_type() eq 'TBLASTX') {
($qframe, $sframe) = $_ =~ /Frame = ([\+-]\d)\s+\/\s+([\+-]\d)/;
} elsif ($self->report_type() eq 'TBLASTN') {
($sframe) = $_ =~ /Frame = ([\+-]\d)/;
} else {
($qframe) = $_ =~ /Frame = ([\+-]\d)/;
}
}
else {
push @hspline, $_; $nextline = $self->_readline();
my $l1 = ($nextline =~ /^\s*pattern/) ? $self->_readline() : $nextline;
push @hspline, $l1; my $l2 = $self->_readline(); push @hspline, $l2; }
}
my ($ql, $sl, $as) = ("", "", "");
my ($qb, $qe, $sb, $se) = (0,0,0,0);
my (@QL, @SL, @AS);
for(my $i=0;$i<@hspline;$i+=3) {
$hspline[$i] =~ /^(?:Query|Trans):\s+(\d+)\s*([\D\S]+)\s+(\d+)/;
$ql = $2; $qb = $1 unless $qb; $qe = $3;
my $offset = index($hspline[$i], $ql);
$as = substr($hspline[$i+1], $offset, CORE::length($ql));
$hspline[$i+2] =~ /^Sbjct:\s+(\d+)\s*([\D\S]+)\s+(\d+)/;
$sl = $2; $sb = $1 unless $sb; $se = $3;
push @QL, $ql; push @SL, $sl; push @AS, $as;
}
$ql = join("", @QL);
$sl = join("", @SL);
$as = join("", @AS);
my ($qname, $qlength) = ('unknown','unknown');
if ($self->{'PARENT'}->can('query')) {
$qname = $self->{'PARENT'}->query;
$qlength = $self->{'PARENT'}->qlength;
}
my $hsp = new Bio::Tools::BPlite::HSP
('-score' => $score,
'-bits' => $bits,
'-match' => $match,
'-positive' => $positive,
'-gaps' => $gaps,
'-hsplength' => $hsplength,
'-p' => $p,
'-exp' => $exp,
'-queryBegin' => $qb,
'-queryEnd' => $qe,
'-sbjctBegin' => $sb,
'-sbjctEnd' => $se,
'-querySeq' => $ql,
'-sbjctSeq' => $sl,
'-homologySeq'=> $as,
'-queryName' => $qname,
'-sbjctName' => $self->{'NAME'},
'-queryLength'=> $qlength,
'-sbjctLength'=> $self->{'LENGTH'},
'-queryFrame' => $qframe,
'-sbjctFrame' => $sframe,
'-blastType' => $self->report_type());
return $hsp; } |
The rest of the documentation details each of the object methods.
Internal methods are usually preceded with a _