Bio::Tools::BPlite
Iteration
Toolbar
Summary
Bio::Tools::BPlite::Iteration - object for parsing single iteration
of a PSIBLAST report
Package variables
No package variables defined.
Included modules
Inherit
Synopsis
use Bio::Tools:: BPpsilite;
open FH, "t/psiblastreport.out";
$report = Bio::Tools::BPpsilite->new(-fh=>\*FH);
# determine number of iterations executed by psiblast
$total_iterations = $report->number_of_iterations;
$last_iteration = $report->round($total_iterations);
# Process only hits found in last iteration ...
$oldhitarray_ref = $last_iteration->oldhits;
HIT: while($sbjct = $last_iteration->nextSbjct) {
$id = $sbjct->name;
$is_old = grep /\Q$id\E/, @$oldhitarray_ref;
if ($is_old ){next HIT;}
# do something with new hit...
}
# This assumed that you have $db pointing to a database, $out to an output file
# $slxdir to a directory and $psiout
# note the alignments can only be obtained if the flag "-m 6" is run.
# It might also be necessary to use the flag -v to get all alignments
#
my @psiparams = ('database' => $db , 'output' => $out, 'j' => 3, 'm' => 6,
'h' => 1.e-3 , 'F' => 'T' , 'Q' => $psiout );
my $factory = Bio::Tools::Run::StandAloneBlast->new(@psiparams);
my $report = $factory->blastpgp($seq);
my $total_iterations = $report->number_of_iterations();
my $last_iteration = $report->round($total_iterations);
my $align=$last_iteration->Align;
my $slxfile=$slxdir.$id.".slx";
my $slx = Bio::AlignIO->new('-format' => 'selex','-file' => ">".$slxfile );
$slx->write_aln($align);
Description
See the documentation for BPpsilite.pm for a description of the
Iteration.pm module.
Methods
Methods description
Title : Align Usage : $SimpleAlign = $obj->Align(); Function : Method to obtain a simpleAlign object from psiblast Example : $SimpleAlign = $obj->Align(); Returns : SimpleAlign object or undef if not found. BUG : Only works if psiblast has been run with m 6 flag Args : |
Title : _pushback Usage : $obj->_pushback($newvalue) Function: puts a line previously read with _readline back into a buffer Example : Returns : Args : newvalue |
Title : _readline Usage : $obj->_readline Function: Reads a line of input.
Note that this method implicitely uses the value of $/ that is
in effect when called.
Note also that the current implementation does not handle pushed
back input correctly unless the pushed back input ends with the
value of $/.
Example :
Returns : |
Title : newhits Usage : $newhits = $obj->newhits(); Returns : reference to an array listing all the hits from the current iteration which were not identified in the previous iteration Args : none |
Title : nextSbjct Usage : $sbjct = $obj->nextSbjct(); Function : Method of iterating through all the Sbjct retrieved from parsing the report #Example : while ( my $sbjct = $obj->nextSbjct ) {} Returns : next Sbjct object or undef if finished Args : |
Title : oldhits Usage : $oldhits = $obj->oldhits(); Returns : reference to an array listing all the hits from the current iteration which were identified and above threshold in the previous iteration Args : none |
Title : qlength Usage : $len = $obj->qlength(); Returns : length of query Args : none |
Title : query Usage : $query = $obj->query(); Function : returns the query object Example : Returns : query object Args : |
Methods code
sub Align
{ use Bio::SimpleAlign;
my ($self) = @_;
$self->_fastForward or return undef;
my $lastline = $self->_readline();
return undef unless $lastline =~ /^QUERY/; my (%sequence,%first,%last,$num);
if ( $lastline =~ /^QUERY\s+(\d*)\s*([-\w]+)\s*(\d*)\s*$/){
my $name='QUERY';
my $start=$1;
my $seq=$2;
my $stop=$3;
$seq =~ s/-/\./g;
$start =~ s/ //g;
$stop =~ s/ //g;
$sequence{$name} .= $seq;
if ($first{$name} eq ''){$first{$name}=$start;}
if ($stop ne ''){$last{$name}=$stop;}
$num=0;
}
while(defined($_ = $self->_readline()) ){
chomp($_);
if ( $_ =~ /^QUERY\s+(\d+)\s*([\-A-Z]+)\s*(\+)\s*$/){
my $name='QUERY';
my $start=$1;
my $seq=$2;
my $stop=$3;
$seq =~ s/-/\./g;
$start =~ s/ //g;
$stop =~ s/ //g;
$sequence{$name} .= $seq;
if ($first{$name} eq '') { $first{$name} = $start;}
if ($stop ne '') { $last{$name}=$stop;}
$num=0;
} elsif ( $_ =~ /^(\d+)\s+(\d+)\s*([\-A-Z]+)\s*(\d+)\s*$/ ){
my $name=$1.".".$num;
my $start=$2;
my $seq=$3;
my $stop=$4;
$seq =~ s/-/\./g;
$start =~ s/ //g;
$stop =~ s/ //g;
$sequence{$name} .= $seq;
if ($first{$name} eq ''){$first{$name}=$start;}
if ($stop ne ''){$last{$name}=$stop;}
$num++;
}
}
my $align = new Bio::SimpleAlign();
my @keys=sort keys(%sequence);
foreach my $name (@keys){
my $nse = $name."/".$first{$name}."-".$last{$name};
my $seqobj = Bio::LocatableSeq->new( -seq => $sequence{$name},
-id => $name,
-name => $nse,
-start => $first{$name},
-end => $last{$name}
);
$align->add_seq($seqobj);
}
return $align;
}
} |
sub _fastForward
{ my ($self) = @_;
return 0 if $self->{'REPORT_DONE'};
while(defined($_ = $self->_readline()) ) {
if( $_ =~ /^>/ ||
$_ =~ /^QUERY|^\d+ .* \d+$/ ) { $self->_pushback($_);
return 1;
}
if ($_ =~ /^>|^Parameters|^\s+Database:/) {
$self->_pushback($_);
return 1;
}
}
$self->warn("Possible error (2) while parsing BLAST report!"); } |
sub _parseHeader
{ my ($self) = @_;
my (@old_hits, @new_hits);
my $newhits_true = ($self->{'ROUND'} < 2) ? 1 : 0 ;
while(defined($_ = $self->_readline()) ) {
if ($_ =~ /(\w\w|.*|\w+.*)\s\s+(\d+)\s+([-\.e\d]+)$/) {
my $id = $1;
my $score= $2; my $evalue= $3; if ($newhits_true) { push ( @new_hits, $id);}
else { push (@old_hits, $id);}
}
elsif ($_ =~ /^Sequences not found previously/) {$newhits_true = 1 ;}
elsif ($_ =~ /^>/ || $_ =~ /^QUERY/)
{
$self->_pushback($_);
$self->{'OLDHITS'} =\@ old_hits;
$self->{'NEWHITS'} =\@ new_hits;
return 1;
}
elsif ($_ =~ /^Parameters|^\s+Database:|^\s*Results from round\s+(d+)/) {
$self->_pushback($_);
return 0; }
}
return 0;
} |
sub _pushback
{ my ($self, $arg) = @_;
return $self->{'PARENT'}->_pushback($arg);
}
1;
__END__ } |
sub _readline
{ my ($self) = @_;
return $self->{'PARENT'}->_readline(); } |
sub new
{ my ($class, @args) = @_;
my $self = $class->SUPER::new(@args);
($self->{'PARENT'},$self->{'ROUND'}) =
$self->_rearrange([qw(PARENT
ROUND
)],@args);
$self->{'QUERY'} = $self->{'PARENT'}->{'QUERY'};
$self->{'LENGTH'} = $self->{'PARENT'}->{'LENGTH'};
if($self->_parseHeader) {$self->{'REPORT_DONE'} = 0} else {$self->{'REPORT_DONE'} = 1}
return $self;
} |
sub newhits
{shift->{'NEWHITS'} } |
sub nextSbjct
{ my ($self) = @_;
$self->_fastForward or return undef;
my $def = $self->_readline();
while(defined ($_ = $self->_readline) ) {
if ($_ !~ /\w/) {next}
elsif ($_ =~ /Strand HSP/) {next} elsif ($_ =~ /^\s{0,2}Score/) {$self->_pushback( $_); last}
elsif ($_ =~ /^(\d+) .* \d+$/) { $self->_pushback($_); $def = $1; my $length = undef; my $sbjct = new Bio::Tools::BPlite::Sbjct('-name'=>$def,
'-length'=>$length,
'-parent'=>$self);
return $sbjct;
} elsif ($_ =~ /^Parameters|^\s+Database:|^\s+Posted date:/) {
$self->_pushback( $_);
last;
} else {$def .= $_}
}
$def =~ s/\s+/ /g;
$def =~ s/\s+$//g;
$def =~ s/Length = ([\d,]+)$//g;
my $length = $1;
return 0 unless $def =~ /^>/;
$def =~ s/^>//;
my $sbjct = new Bio::Tools::BPlite::Sbjct('-name'=>$def,
'-length'=>$length,
'-parent'=>$self);
return $sbjct;
}
} |
sub oldhits
{shift->{'OLDHITS'} } |
sub qlength
{shift->{'LENGTH'} } |
sub query
{shift->{'QUERY'} } |
General documentation
AUTHORS - Peter Schattner | Top |
BPlite.pm is copyright (C) 1999 by Ian Korf.
This software is provided "as is" without warranty of any kind.