Raw content of Bio::EnsEMBL::Analysis::Tools::BPlite::Sbjct # $Id: Sbjct.pm,v 1.2 2005/12/19 09:50:07 ba1 Exp $ ############################################################################### # Bio::EnsEMBL::Analysis::Tools::BPlite::Sbjct ############################################################################### # # The original BPlite.pm module has been written by Ian Korf ! # see http://sapiens.wustl.edu/~ikorf # # You may distribute this module under the same terms as perl itself package Bio::EnsEMBL::Analysis::Tools::BPlite::Sbjct; use strict; use Bio::EnsEMBL::Utils::Exception qw(throw warning); use Bio::EnsEMBL::Utils::Argument qw( rearrange ); use Bio::EnsEMBL::Analysis::Tools::BPlite::HSP; # we want to use HSP #use overload '""' => 'name'; use vars qw(@ISA); @ISA = qw(); sub new { my ($caller, @args) = @_; my $class = ref($caller) || $caller; my $self = bless({}, $class); ($self->{'NAME'},$self->{'LENGTH'},$self->{'FH'}, $self->{'LASTLINE'},$self->{'PARENT'}) = rearrange([qw(NAME LENGTH FH LASTLINE PARENT )],@args); $self->{'HSP_ALL_PARSED'} = 0; return $self; } =head2 name Title : name Usage : $name = $obj->name(); Function : returns the name of the Sbjct Example : Returns : name of the Sbjct Args : =cut sub name {shift->{'NAME'}} =head2 nextFeaturePair Title : nextFeaturePair Usage : $name = $obj->nextFeaturePair(); Function : same as the nextHSP function Example : Returns : next FeaturePair Args : =cut sub nextFeaturePair {shift->nextHSP}; # just another name =head2 nextHSP Title : nextHSP Usage : $hsp = $obj->nextHSP(); Function : returns the next available High Scoring Pair Example : Returns : Bio::Tools::HSP or null if finished Args : =cut sub nextHSP { my ($self) = @_; return undef if $self->{'HSP_ALL_PARSED'}; ############################ # get and parse scorelines # ############################ my $scoreline = $self->{'LASTLINE'}; my $FH = $self->{'FH'}; my $nextline = <$FH>; return undef if not defined $nextline; $scoreline .= $nextline; my ($score, $bits); if ($scoreline =~ /\d bits\)/) { ($score, $bits) = $scoreline =~ /Score = (\d+) \((\S+) bits\)/; # WU-BLAST } else { ($bits, $score) = $scoreline =~ /Score =\s+(\S+) bits \((\d+)/; # NCBI-BLAST } my ($match, $length, $percent) = $scoreline =~ /Identities = (\d+)\/(\d+)(?:\s*\((\d+)\%\))?/; my ($positive) = $scoreline =~ /Positives = (\d+)/; my $frame = '0'; $positive = $match if not defined $positive; my ($p) = $scoreline =~ /[Sum ]*P[\(\d+\)]* = ([^\,\s]+)/; if (not defined $p) {(undef, $p) = $scoreline =~ /Expect(\(\d+\))? =\s+([^\,\s]+)/} throw("Unable to parse '$scoreline'") if not defined $score; ####################### # get alignment lines # ####################### my @hspline; while(<$FH>) { if ($_ =~ /^WARNING:|^NOTE:/) { while(<$FH>) {last if $_ !~ /\S/} } elsif ($_ !~ /\S/) {next} elsif ($_ =~ /Strand HSP/) {next} # WU-BLAST non-data elsif ($_ =~ /^\s*Strand/) {next} # NCBI-BLAST non-data elsif ($_ =~ /^\s*Score/) {$self->{'LASTLINE'} = $_; last} elsif ($_ =~ /^>|^Parameters|^\s+Database:|^CPU\stime/) { $self->{'LASTLINE'} = $_; $self->{'PARENT'}->{'LASTLINE'} = $_; $self->{'HSP_ALL_PARSED'} = 1; last; } elsif( $_ =~ /^\s*Frame\s*=\s*([-\+]\d+)/ ) { $frame = $1; } else { push @hspline, $_; # store the query line $nextline = <$FH> ; # Skip "pattern" line when parsing PHIBLAST reports, otherwise store the alignment line my $l1 = ($nextline =~ /^\s*pattern/) ? <$FH> : $nextline; # my $l1 = push @hspline, $l1; # grab/store the alignment line push @hspline, $l1; # store the alignment line my $l2 = <$FH>; push @hspline, $l2; # grab/store the sbjct line } } ######################### # parse alignment lines # ######################### my ($ql, $sl, $as) = ("", "", ""); my ($qb, $qe, $sb, $se) = (0,0,0,0); my (@QL, @SL, @AS); # for better memory management for(my $i=0;$i<@hspline;$i+=3) { # warn $hspline[$i], $hspline[$i+2]; $hspline[$i] =~ /^Query:\s+(\d+)\s*([\D\S]+)\s+(\d+)/; $ql = $2; $qb = $1 unless $qb; $qe = $3; my $offset = index($hspline[$i], $ql); $as = substr($hspline[$i+1], $offset, CORE::length($ql)); $hspline[$i+2] =~ /^Sbjct:\s+(\d+)\s*([\D\S]+)\s+(\d+)/; $sl = $2; $sb = $1 unless $sb; $se = $3; push @QL, $ql; push @SL, $sl; push @AS, $as; } ################## # the HSP object # ################## $ql = join("", @QL); $sl = join("", @SL); $as = join("", @AS); my $hsp = new Bio::EnsEMBL::Analysis::Tools::BPlite::HSP('-score'=>$score, '-bits'=>$bits, '-match'=>$match, '-positive'=>$positive, '-percent'=>$percent, '-p'=>$p, '-queryBegin'=>$qb, '-queryEnd'=>$qe, '-sbjctBegin'=>$sb, '-sbjctEnd'=>$se, '-querySeq'=>$ql, '-sbjctSeq'=>$sl, '-homologySeq'=>$as, '-queryName'=>$self->{'PARENT'}->query, '-sbjctName'=>$self->{'NAME'}, '-queryLength'=>$self->{'PARENT'}->qlength, '-sbjctLength'=>$self->{'LENGTH'}, '-frame' => $frame); return $hsp; } 1;