Raw content of Bio::EnsEMBL::Analysis::Tools::BPlite::Iteration
# $Id: Iteration.pm,v 1.2 2005/12/19 09:51:30 ba1 Exp $
# Bioperl module Bio::EnsEMBL::Analysis::Tools::BPlite::Iteration
# based closely on the Bio::EnsEMBL::Analysis::Tools::BPlite modules
# Ian Korf (ikorf@sapiens.wustl.edu, http://sapiens.wustl.edu/~ikorf),
# Lorenz Pollak (lorenz@ist.org, bioperl port)
#
# Copyright Peter Schattner
#
# You may distribute this module under the same terms as perl itself
# _history
# October 20, 2000
# POD documentation - main docs before the code
=head1 NAME
Bio::Tools:: BPlite::Iteration - object for parsing single iteration
of a PSIBLAST report
=head1 SYNOPSIS
use Bio::Tools:: BPpsilite;
open FH, "t/psiblastreport.out";
$report = Bio::Tools::BPpsilite->new(-fh=>\*FH);
# determine number of iterations executed by psiblast
$total_iterations = $report->number_of_iterations;
$last_iteration = $report->round($total_iterations);
# Process only hits found in last iteration ...
$oldhitarray_ref = $last_iteration->oldhits;
HIT: while($sbjct = $last_iteration->nextSbjct) {
$id = $sbjct->name;
$is_old = grep /\Q$id\E/, @$oldhitarray_ref;
if ($is_old ){next HIT;}
# do something with new hit...
}
=head1 DESCRIPTION
See the documentation for BPpsilite.pm for a description of the
Iteration.pm module.
=head1 AUTHORS - Peter Schattner
Email: schattner@alum.mit.edu
=head1 ACKNOWLEDGEMENTS
Based on work of:
Ian Korf (ikorf@sapiens.wustl.edu, http://sapiens.wustl.edu/~ikorf),
Lorenz Pollak (lorenz@ist.org, bioperl port)
=head1 COPYRIGHT
BPlite.pm is copyright (C) 1999 by Ian Korf.
=head1 DISCLAIMER
This software is provided "as is" without warranty of any kind.
=cut
package Bio::EnsEMBL::Analysis::Tools::BPlite::Iteration;
use strict;
use vars qw(@ISA);
use Bio::EnsEMBL::Utils::Exception qw(throw warning);
use Bio::EnsEMBL::Utils::Argument qw( rearrange );
use Bio::EnsEMBL::Analysis::Tools::BPlite; #
use Bio::EnsEMBL::Analysis::Tools::BPlite::Sbjct;
@ISA = qw();
sub new {
my ($caller, @args) = @_;
# my $self = $class->SUPER::new(@args);
my $class = ref($caller) || $caller;
my $self = bless({}, $class);
($self->{'FH'},$self->{'PARENT'},$self->{'ROUND'}) =
rearrange([qw(FH
PARENT
ROUND
)],@args);
if((! ref($self->{'FH'})) ||
((ref($self->{'FH'}) ne 'GLOB') &&
(! $self->{'FH'}->isa('IO::Handle')))) {
throw("Expecting a GLOB reference, not $self->{'FH'} !");
}
$self->{'LASTLINE'} = "";
$self->{'QUERY'} = $self->{'PARENT'}->{'QUERY'};
$self->{'LENGTH'} = $self->{'PARENT'}->{'LENGTH'};
if ($self->_parseHeader) {$self->{'REPORT_DONE'} = 0} # there are alignments
else {$self->{'REPORT_DONE'} = 1} # empty report
return $self; # success - we hope!
}
=head2 query
Title : query
Usage : $query = $obj->query();
Function : returns the query object
Example :
Returns : query object
Args :
=cut
sub query {shift->{'QUERY'}}
=head2 qlength
Title : qlength
Usage : $len = $obj->qlength();
Returns : length of query
Args : none
=cut
sub qlength {shift->{'LENGTH'}}
=head2 newhits
Title : newhits
Usage : $newhits = $obj->newhits();
Returns : reference to an array listing all the hits
from the current iteration which were not identified
in the previous iteration
Args : none
=cut
sub newhits {shift->{'NEWHITS'}}
=head2 oldhits
Title : oldhits
Usage : $oldhits = $obj->oldhits();
Returns : reference to an array listing all the hits from
the current iteration which were identified and
above threshold in the previous iteration
Args : none
=cut
sub oldhits {shift->{'OLDHITS'}}
=head2 nextSbjct
Title : nextSbjct
Usage : $sbjct = $obj->nextSbjct();
Function : Method of iterating through all the Sbjct retrieved
from parsing the report
Example : while ( my $sbjct = $obj->nextSbjct ) {}
Returns : next Sbjct object or undef if finished
Args :
=cut
sub nextSbjct {
my ($self) = @_;
$self->_fastForward or return undef;
#######################
# get all sbjct lines #
#######################
my $def = $self->{'LASTLINE'};
my $FH = $self->{'FH'};
while(<$FH>) {
if ($_ !~ /\w/) {next}
elsif ($_ =~ /Strand HSP/) {next} # WU-BLAST non-data
elsif ($_ =~ /^\s{0,2}Score/) {$self->{'LASTLINE'} = $_; last}
else {$def .= $_}
}
$def =~ s/\s+/ /g;
$def =~ s/\s+$//g;
$def =~ s/Length = ([\d,]+)$//g;
my $length = $1;
return 0 unless $def =~ /^>/;
$def =~ s/^>//;
####################
# the Sbjct object #
####################
my $sbjct = new Bio::EnsEMBL::Analysis::Tools::BPlite::Sbjct('-name'=>$def,
'-length'=>$length,
'-fh'=>$self->{'FH'},
'-lastline'=>$self->{'LASTLINE'},
'-parent'=>$self);
return $sbjct;
}
sub _parseHeader {
my ($self) = @_;
my (@old_hits, @new_hits);
my $FH = $self->{'FH'};
my $newhits_true = ($self->{'ROUND'} < 2) ? 1 : 0 ;
while(<$FH>) {
if ($_ =~ /(\w\w|.*|\w+.*)\s\s+(\d+)\s+([-\.e\d]+)$/) {
my $id= $1;
my $score= $2; #not used currently
my $evalue= $3; #not used currently
if ($newhits_true) { push ( @new_hits, $id);}
else { push (@old_hits, $id);}
}
elsif ($_ =~ /^Sequences not found previously/) {$newhits_true = 1 ;}
elsif ($_ =~ /^>/)
{$self->{'LASTLINE'} = $_;
$self->{'OLDHITS'} = \@old_hits;
$self->{'NEWHITS'} = \@new_hits;
$self->{'LASTLINE'} = $_;
return 1;
}
elsif ($_ =~ /^Parameters|^\s+Database:|^\s*Results from round\s+(d+)/) {
$self->{'LASTLINE'} = $_;
return 0; # no sequences found in this iteration
}
}
return 0; # no sequences found in this iteration
}
sub _fastForward {
my ($self) = @_;
return 0 if $self->{'REPORT_DONE'}; # empty report
return 1 if $self->{'LASTLINE'} =~ /^>/;
my $FH = $self->{'FH'};
while(<$FH>) {
if ($_ =~ /^>|^Parameters|^\s+Database:/) {
$self->{'LASTLINE'} = $_;
return 1;
}
}
warning("Possible error while parsing BLAST report!");
}
1;
__END__