Bio::Tools::BPlite Iteration
SummaryIncluded librariesPackage variablesSynopsisDescriptionGeneral documentationMethods
Toolbar
WebCvsRaw content
Summary
Bio::Tools::BPlite::Iteration - object for parsing single iteration
of a PSIBLAST report
Package variables
No package variables defined.
Included modules
Bio::Root::Root
Bio::Tools::BPlite
Bio::Tools::BPlite::Sbjct
Inherit
Bio::Root::Root
Synopsis
   use Bio::Tools:: BPpsilite;
open FH, "t/psiblastreport.out"; $report = Bio::Tools::BPpsilite->new(-fh=>\*FH); # determine number of iterations executed by psiblast $total_iterations = $report->number_of_iterations; $last_iteration = $report->round($total_iterations); # Process only hits found in last iteration ... $oldhitarray_ref = $last_iteration->oldhits; HIT: while($sbjct = $last_iteration->nextSbjct) { $id = $sbjct->name; $is_old = grep /\Q$id\E/, @$oldhitarray_ref; if ($is_old ){next HIT;} # do something with new hit... }
  # This assumed that you have $db pointing to a database, $out to an output file
# $slxdir to a directory and $psiout
# note the alignments can only be obtained if the flag "-m 6" is run.
# It might also be necessary to use the flag -v to get all alignments
#
my @psiparams = ('database' => $db , 'output' => $out, 'j' => 3, 'm' => 6,
'h' => 1.e-3 , 'F' => 'T' , 'Q' => $psiout );
my $factory = Bio::Tools::Run::StandAloneBlast->new(@psiparams);
my $report = $factory->blastpgp($seq);
my $total_iterations = $report->number_of_iterations();
my $last_iteration = $report->round($total_iterations);
my $align=$last_iteration->Align;
my $slxfile=$slxdir.$id.".slx";
my $slx = Bio::AlignIO->new('-format' => 'selex','-file' => ">".$slxfile );
$slx->write_aln($align);
Description
See the documentation for BPpsilite.pm for a description of the
Iteration.pm module.
Methods
AlignDescriptionCode
_fastForward
No description
Code
_parseHeader
No description
Code
_pushbackDescriptionCode
_readlineDescriptionCode
new
No description
Code
newhitsDescriptionCode
nextSbjctDescriptionCode
oldhitsDescriptionCode
qlengthDescriptionCode
queryDescriptionCode
Methods description
Aligncode    nextTop
 Title    : Align
Usage : $SimpleAlign = $obj->Align();
Function : Method to obtain a simpleAlign object from psiblast
Example : $SimpleAlign = $obj->Align();
Returns : SimpleAlign object or undef if not found.
BUG : Only works if psiblast has been run with m 6 flag
Args :
_pushbackcodeprevnextTop
 Title   : _pushback
Usage : $obj->_pushback($newvalue)
Function: puts a line previously read with _readline back into a buffer
Example :
Returns :
Args : newvalue
_readlinecodeprevnextTop
 Title   : _readline
Usage : $obj->_readline
Function: Reads a line of input.
Note that this method implicitely uses the value of $/ that is in effect when called. Note also that the current implementation does not handle pushed back input correctly unless the pushed back input ends with the value of $/. Example : Returns :
newhitscodeprevnextTop
 Title    :  newhits
Usage : $newhits = $obj->newhits();
Returns : reference to an array listing all the hits
from the current iteration which were not identified
in the previous iteration
Args : none
nextSbjctcodeprevnextTop
 Title    : nextSbjct
Usage : $sbjct = $obj->nextSbjct();
Function : Method of iterating through all the Sbjct retrieved
from parsing the report
#Example : while ( my $sbjct = $obj->nextSbjct ) {}
Returns : next Sbjct object or undef if finished
Args :
oldhitscodeprevnextTop
 Title    :  oldhits
Usage : $oldhits = $obj->oldhits();
Returns : reference to an array listing all the hits from
the current iteration which were identified and
above threshold in the previous iteration
Args : none
qlengthcodeprevnextTop
 Title    : qlength
Usage : $len = $obj->qlength();
Returns : length of query
Args : none
querycodeprevnextTop
 Title    : query
Usage : $query = $obj->query();
Function : returns the query object
Example :
Returns : query object
Args :
Methods code
AligndescriptionprevnextTop
sub Align {
    use Bio::SimpleAlign;
    my ($self) = @_;
    $self->_fastForward or return undef;
    my $lastline = $self->_readline();
    return undef unless $lastline =~ /^QUERY/; # If psiblast not run correctly
my (%sequence,%first,%last,$num); if ( $lastline =~ /^QUERY\s+(\d*)\s*([-\w]+)\s*(\d*)\s*$/){ my $name='QUERY'; my $start=$1; my $seq=$2; my $stop=$3; $seq =~ s/-/\./g; $start =~ s/ //g; $stop =~ s/ //g; $sequence{$name} .= $seq; if ($first{$name} eq ''){$first{$name}=$start;} if ($stop ne ''){$last{$name}=$stop;} # print "FOUND:\t$seq\t$start\t$stop\n";
$num=0; } while(defined($_ = $self->_readline()) ){ chomp($_); if ( $_ =~ /^QUERY\s+(\d+)\s*([\-A-Z]+)\s*(\+)\s*$/){ my $name='QUERY'; my $start=$1; my $seq=$2; my $stop=$3; $seq =~ s/-/\./g; $start =~ s/ //g; $stop =~ s/ //g; $sequence{$name} .= $seq; if ($first{$name} eq '') { $first{$name} = $start;} if ($stop ne '') { $last{$name}=$stop;} $num=0; } elsif ( $_ =~ /^(\d+)\s+(\d+)\s*([\-A-Z]+)\s*(\d+)\s*$/ ){ my $name=$1.".".$num; my $start=$2; my $seq=$3; my $stop=$4; $seq =~ s/-/\./g; $start =~ s/ //g; $stop =~ s/ //g; $sequence{$name} .= $seq; if ($first{$name} eq ''){$first{$name}=$start;} if ($stop ne ''){$last{$name}=$stop;} $num++; } } my $align = new Bio::SimpleAlign(); my @keys=sort keys(%sequence); foreach my $name (@keys){ my $nse = $name."/".$first{$name}."-".$last{$name}; my $seqobj = Bio::LocatableSeq->new( -seq => $sequence{$name}, -id => $name, -name => $nse, -start => $first{$name}, -end => $last{$name} ); $align->add_seq($seqobj); } return $align; } # Start of internal subroutines.
}
_fastForwarddescriptionprevnextTop
sub _fastForward {
  my ($self) = @_;
  return 0 if $self->{'REPORT_DONE'}; # empty report
while(defined($_ = $self->_readline()) ) { if( $_ =~ /^>/ || $_ =~ /^QUERY|^\d+ .* \d+$/ ) { # Changed to also handle "-m 6" /AE
$self->_pushback($_); return 1; } # print "FASTFORWARD",$_,"\n";
if ($_ =~ /^>|^Parameters|^\s+Database:/) { $self->_pushback($_); return 1; } } $self->warn("Possible error (2) while parsing BLAST report!");
}
_parseHeaderdescriptionprevnextTop
sub _parseHeader {
  my ($self) = @_;
  my (@old_hits, @new_hits);

  my $newhits_true = ($self->{'ROUND'} < 2) ? 1  : 0 ;
  while(defined($_ = $self->_readline()) ) {
    if ($_ =~ /(\w\w|.*|\w+.*)\s\s+(\d+)\s+([-\.e\d]+)$/)    {
	my $id = $1;
	my $score= $2;	#not used currently
my $evalue= $3; #not used currently
if ($newhits_true) { push ( @new_hits, $id);} else { push (@old_hits, $id);} } elsif ($_ =~ /^Sequences not found previously/) {$newhits_true = 1 ;} # This is changed for "-m 6" option /AE
elsif ($_ =~ /^>/ || $_ =~ /^QUERY/) { $self->_pushback($_); $self->{'OLDHITS'} =\@ old_hits; $self->{'NEWHITS'} =\@ new_hits; return 1; } elsif ($_ =~ /^Parameters|^\s+Database:|^\s*Results from round\s+(d+)/) { $self->_pushback($_); return 0; # no sequences found in this iteration
} } return 0; # no sequences found in this iteration
}
_pushbackdescriptionprevnextTop
sub _pushback {
   my ($self, $arg) = @_;   
   return $self->{'PARENT'}->_pushback($arg);    
}
1;
__END__
}
_readlinedescriptionprevnextTop
sub _readline {
   my ($self) = @_;
   return $self->{'PARENT'}->_readline();
}
newdescriptionprevnextTop
sub new {
    my ($class, @args) = @_;
    my $self = $class->SUPER::new(@args);

    ($self->{'PARENT'},$self->{'ROUND'}) =
	$self->_rearrange([qw(PARENT
			      ROUND
			      )],@args);
    
    $self->{'QUERY'} = $self->{'PARENT'}->{'QUERY'};
    $self->{'LENGTH'} = $self->{'PARENT'}->{'LENGTH'};

    if($self->_parseHeader) {$self->{'REPORT_DONE'} = 0} # there are alignments
else {$self->{'REPORT_DONE'} = 1} # empty report
return $self; # success - we hope!
}
newhitsdescriptionprevnextTop
sub newhits {
shift->{'NEWHITS'}
}
nextSbjctdescriptionprevnextTop
sub nextSbjct {
  my ($self) = @_;
  $self->_fastForward or return undef;
 
  #######################
# get all sbjct lines #
#######################
my $def = $self->_readline(); while(defined ($_ = $self->_readline) ) { if ($_ !~ /\w/) {next} elsif ($_ =~ /Strand HSP/) {next} # WU-BLAST non-data
elsif ($_ =~ /^\s{0,2}Score/) {$self->_pushback( $_); last} elsif ($_ =~ /^(\d+) .* \d+$/) { # This is not correct at all
$self->_pushback($_); # 1: HSP does not work for -m 6 flag
$def = $1; # 2: length/name are incorrect
my $length = undef; # 3: Names are repeated many times.
my $sbjct = new Bio::Tools::BPlite::Sbjct('-name'=>$def, '-length'=>$length, '-parent'=>$self); return $sbjct; } # m-6
elsif ($_ =~ /^Parameters|^\s+Database:|^\s+Posted date:/) { $self->_pushback( $_); last; } else {$def .= $_} } $def =~ s/\s+/ /g; $def =~ s/\s+$//g; $def =~ s/Length = ([\d,]+)$//g; my $length = $1; return 0 unless $def =~ /^>/; $def =~ s/^>//; ####################
# the Sbjct object #
####################
my $sbjct = new Bio::Tools::BPlite::Sbjct('-name'=>$def, '-length'=>$length, '-parent'=>$self); return $sbjct; } # This is added by /AE
}
oldhitsdescriptionprevnextTop
sub oldhits {
shift->{'OLDHITS'}
}
qlengthdescriptionprevnextTop
sub qlength {
shift->{'LENGTH'}
}
querydescriptionprevnextTop
sub query {
shift->{'QUERY'}
}
General documentation
AUTHORS - Peter SchattnerTop
Email: schattner@alum.mit.edu
CONTRIBUTORSTop
Jason Stajich, jason@cgt.mc.duke.edu
ACKNOWLEDGEMENTSTop
Based on work of:
Ian Korf (ikorf@sapiens.wustl.edu, http://sapiens.wustl.edu/~ikorf),
Lorenz Pollak (lorenz@ist.org, bioperl port)
COPYRIGHTTop
BPlite.pm is copyright (C) 1999 by Ian Korf.
DISCLAIMERTop
This software is provided "as is" without warranty of any kind.