sub new
{ my ($caller, @args) = @_;
my $class = ref($caller) || $caller;
my $self = bless({}, $class);
($self->{'NAME'},$self->{'LENGTH'},$self->{'FH'},
$self->{'LASTLINE'},$self->{'PARENT'}) =
rearrange([qw(NAME
LENGTH
FH
LASTLINE
PARENT
)],@args);
$self->{'HSP_ALL_PARSED'} = 0;
return $self; } |
sub nextHSP
{ my ($self) = @_;
return undef if $self->{'HSP_ALL_PARSED'};
my $scoreline = $self->{'LASTLINE'};
my $FH = $self->{'FH'};
my $nextline = <$FH>;
return undef if not defined $nextline;
$scoreline .= $nextline;
my ($score, $bits);
if ($scoreline =~ /\d bits\)/) {
($score, $bits) = $scoreline =~
/Score = (\d+) \((\S+) bits\)/; }
else {
($bits, $score) = $scoreline =~
/Score =\s+(\S+) bits \((\d+)/; }
my ($match, $length, $percent) = $scoreline =~ /Identities = (\d+)\/(\d+)(?:\s*\((\d+)\%\))?/;
my ($positive) = $scoreline =~ /Positives = (\d+)/;
my $frame = '0';
$positive = $match if not defined $positive;
my ($p) = $scoreline =~ /[Sum ]*P[\(\d+\)]* = ([^\,\s]+)/;
if (not defined $p) {(undef, $p) = $scoreline =~ /Expect(\(\d+\))? =\s+([^\,\s]+)/}
throw("Unable to parse '$scoreline'") if not defined $score;
my @hspline;
while(<$FH>) {
if ($_ =~ /^WARNING:|^NOTE:/) {
while(<$FH>) {last if $_ !~ /\S/}
}
elsif ($_ !~ /\S/) {next}
elsif ($_ =~ /Strand HSP/) {next} elsif ($_ =~ /^\s*Strand/) {next} elsif ($_ =~ /^\s*Score/) {$self->{'LASTLINE'} = $_; last}
elsif ($_ =~ /^>|^Parameters|^\s+Database:|^CPU\stime/) {
$self->{'LASTLINE'} = $_;
$self->{'PARENT'}->{'LASTLINE'} = $_;
$self->{'HSP_ALL_PARSED'} = 1;
last;
}
elsif( $_ =~ /^\s*Frame\s*=\s*([-\+]\d+)/ ) {
$frame = $1;
}
else {
push @hspline, $_; $nextline = <$FH> ;
my $l1 = ($nextline =~ /^\s*pattern/) ? <$FH> : $nextline;
push @hspline, $l1; my $l2 = <$FH>; push @hspline, $l2; }
}
my ($ql, $sl, $as) = ("", "", "");
my ($qb, $qe, $sb, $se) = (0,0,0,0);
my (@QL, @SL, @AS);
for(my $i=0;$i<@hspline;$i+=3) {
$hspline[$i] =~ /^Query:\s+(\d+)\s*([\D\S]+)\s+(\d+)/;
$ql = $2; $qb = $1 unless $qb; $qe = $3;
my $offset = index($hspline[$i], $ql);
$as = substr($hspline[$i+1], $offset, CORE::length($ql));
$hspline[$i+2] =~ /^Sbjct:\s+(\d+)\s*([\D\S]+)\s+(\d+)/;
$sl = $2; $sb = $1 unless $sb; $se = $3;
push @QL, $ql; push @SL, $sl; push @AS, $as;
}
$ql = join("", @QL);
$sl = join("", @SL);
$as = join("", @AS);
my $hsp = new Bio::EnsEMBL::Analysis::Tools::BPlite::HSP('-score'=>$score,
'-bits'=>$bits,
'-match'=>$match,
'-positive'=>$positive,
'-percent'=>$percent,
'-p'=>$p,
'-queryBegin'=>$qb,
'-queryEnd'=>$qe,
'-sbjctBegin'=>$sb,
'-sbjctEnd'=>$se,
'-querySeq'=>$ql,
'-sbjctSeq'=>$sl,
'-homologySeq'=>$as,
'-queryName'=>$self->{'PARENT'}->query,
'-sbjctName'=>$self->{'NAME'},
'-queryLength'=>$self->{'PARENT'}->qlength,
'-sbjctLength'=>$self->{'LENGTH'},
'-frame' => $frame);
return $hsp;
}
1; } |