Raw content of Bio::Tools::BPbl2seq
# $Id: BPbl2seq.pm,v 1.21.2.2 2003/06/03 14:38:18 jason Exp $
#
# Bioperl module Bio::Tools::BPbl2seq
# based closely on the Bio::Tools::BPlite modules
# Ian Korf (ikorf@sapiens.wustl.edu, http://sapiens.wustl.edu/~ikorf),
# Lorenz Pollak (lorenz@ist.org, bioperl port)
#
#
# Copyright Peter Schattner
#
# You may distribute this module under the same terms as perl itself
# _history
# October 20, 2000
# May 29, 2001
# Fixed bug which prevented reading of more than one HSP / hit.
# This fix required changing calling syntax as described below. (PS)
# POD documentation - main docs before the code
=head1 NAME
Bio::Tools::BPbl2seq - Lightweight BLAST parser for pair-wise sequence
alignment using the BLAST algorithm.
=head1 SYNOPSIS
use Bio::Tools::BPbl2seq;
my $report = Bio::Tools::BPbl2seq->new(-file => 't/bl2seq.out');
$report->sbjctName;
$report->sbjctLength;
while(my $hsp = $report->next_feature) {
$hsp->score;
$hsp->bits;
$hsp->percent;
$hsp->P;
$hsp->match;
$hsp->positive;
$hsp->length;
$hsp->querySeq;
$hsp->sbjctSeq;
$hsp->homologySeq;
$hsp->query->start;
$hsp->query->end;
$hsp->sbjct->start;
$hsp->sbjct->end;
$hsp->sbjct->seq_id;
$hsp->sbjct->overlaps($exon);
}
=head1 DESCRIPTION
BPbl2seq is a package for parsing BLAST bl2seq reports. BLAST bl2seq is a
program for comparing and aligning two sequences using BLAST. Although
the report format is similar to that of a conventional BLAST, there are a
few differences so that BPlite is unable to read bl2seq reports directly.
From the user's perspective, one difference between bl2seq and
other blast reports is that the bl2seq report does not print out the
name of the first of the two aligned sequences. (The second sequence
name is given in the report as the name of the "hit"). Consequently,
BPbl2seq has no way of identifying the name of the initial sequence
unless it is passed to constructor as a second argument as in:
my $report = Bio::Tools::BPbl2seq->new(\*FH, "ALEU_HORVU");
If the name of the first sequence (the "query") is not passed to
BPbl2seq.pm in this manner, the name of the first sequence will be
left as "unknown". (Note that to preserve a common interface with the
other BLAST programs the two sequences being compared are referred to
in bl2seq as "query" and "subject" although this is perhaps a bit
misleading when simply comparing 2 sequences as opposed to querying a
database.)
In addition, since there will only be (at most) one "subject" (hit) in
a bl2seq report, one should use the method $report-Enext_feature,
rather than $report-EnextSbjct-EnextHSP to obtain the next
high scoring pair.
One should note that the previous (0.7) version of BPbl2seq used
slightly different syntax. That version had a bug and consequently the
old syntax has been eliminated. Attempts to use the old syntax will
return error messages explaining the (minor) recoding required to use
the current syntax.
=head1 FEEDBACK
=head2 Mailing Lists
User feedback is an integral part of the evolution of this and other
Bioperl modules. Send your comments and suggestions preferably to one
of the Bioperl mailing lists. Your participation is much appreciated.
bioperl-l@bioperl.org - General discussion
http://bio.perl.org/MailList.html - About the mailing lists
=head2 Reporting Bugs
Report bugs to the Bioperl bug tracking system to help us keep track
the bugs and their resolution. Bug reports can be submitted via
email or the web:
bioperl-bugs@bio.perl.org
http://bugzilla.bioperl.org/
=head1 AUTHOR - Peter Schattner
Email: schattner@alum.mit.edu
=head1 ACKNOWLEDGEMENTS
Based on work of:
Ian Korf (ikorf@sapiens.wustl.edu, http://sapiens.wustl.edu/~ikorf),
Lorenz Pollak (lorenz@ist.org, bioperl port)
=head1 CONTRIBUTORS
Jason Stajich, jason@cgt.mc.duke.edu
=cut
#'
package Bio::Tools::BPbl2seq;
use strict;
use vars qw(@ISA);
use Bio::Tools::BPlite;
use Bio::Root::Root;
use Bio::Root::IO;
use Bio::Tools::BPlite::Sbjct; # we want to use Sbjct
use Bio::SeqAnalysisParserI;
use Symbol;
@ISA = qw(Bio::Root::Root Bio::SeqAnalysisParserI Bio::Root::IO);
#@ISA = qw(Bio::Tools::BPlite);
=head2 new
Title : new
Function: Create a new Bio::Tools::BPbl2seq object
Returns : Bio::Tools::BPbl2seq
Args : -file input file (alternative to -fh)
-fh input stream (alternative to -file)
-queryname name of query sequence
=cut
sub new {
my ($class, @args) = @_;
my $self = $class->SUPER::new(@args);
# initialize IO
$self->_initialize_io(@args);
my ($queryname,$rt) = $self->_rearrange([qw(QUERYNAME
REPORT_TYPE)], @args);
$queryname = 'unknown' if( ! defined $queryname );
if( $rt && $rt =~ /BLAST/i ) {
$self->{'BLAST_TYPE'} = uc($rt);
} else {
$self->warn("Must provide which type of BLAST was run (blastp,blastn, tblastn, tblastx, blastx) if you want strand information to get set properly for DNA query or subjects");
}
my $sbjct = $self->getSbjct();
$self->{'_current_sbjct'} = $sbjct;
$self->{'_query'}->{'NAME'} = $queryname;
return $self;
}
=head2 getSbjct
Title :
Usage : $sbjct = $obj->getSbjct();
Function : Method of obtaining single "subject" of a bl2seq report
Example : my $sbjct = $obj->getSbjct ) {}
Returns : Sbjct object or null if finished
Args :
=cut
sub getSbjct {
my ($self) = @_;
# $self->_fastForward or return undef;
#######################
# get bl2seq "sbjct" name and length #
#######################
my $length;
my $def;
READLOOP: while(defined ($_ = $self->_readline) ) {
if ($_ =~ /^>(.+)$/) {
$def = $1;
next READLOOP;
}
elsif ($_ =~ /^\s*Length\s.+\D(\d+)/i) {
$length = $1;
next READLOOP;
}
elsif ($_ =~ /^\s{0,2}Score/) {
$self->_pushback($_);
last READLOOP;
}
}
return undef if ! defined $def;
$def =~ s/\s+/ /g;
$def =~ s/\s+$//g;
####################
# the Sbjct object #
####################
my $sbjct = new Bio::Tools::BPlite::Sbjct('-name'=>$def,
'-length'=>$length,
'-parent'=>$self);
return $sbjct;
}
=head2 next_feature
Title : next_feature
Usage : while( my $feat = $res->next_feature ) { # do something }
Function: calls next_feature function from BPlite.
Example :
Returns : A Bio::SeqFeatureI compliant object, in this case a
Bio::Tools::BPlite::HSP object, and FALSE if there are no more
HSPs.
Args : None
=cut
sub next_feature{
my ($self) = @_;
my ($sbjct, $hsp);
$sbjct = $self->{'_current_sbjct'};
unless( defined $sbjct ) {
$self->debug(" No hit object found for bl2seq report \n ") ;
return undef;
}
$hsp = $sbjct->nextHSP;
return $hsp || undef;
}
=head2 queryName
Title :
Usage : $name = $report->queryName();
Function : get /set the name of the query
Example :
Returns : name of the query
Args :
=cut
sub queryName {
my ($self, $queryname) = @_;
if( $queryname ) {
$self->{'_query'}->{'NAME'} = $queryname;
}
$self->{'_query'}->{'NAME'};
}
=head2 sbjctName
Title :
Usage : $name = $report->sbjctName();
Function : returns the name of the Sbjct
Example :
Returns : name of the Sbjct
Args :
=cut
sub sbjctName {
my $self = shift;
# unless( defined $self->{'_current_sbjct'} ) {
# my $sbjct = $self->{'_current_sbjct'} = $self->nextSbjct;
# return undef unless defined $sbjct;
# }
$self->{'_current_sbjct'}->{'NAME'} || '';
}
=head2 sbjctLength
Title : sbjctLength
Usage : $length = $report->sbjctLength();
Function : returns the length of the Sbjct
Example :
Returns : name of the Sbjct
Args :
=cut
sub sbjctLength {
my $self = shift;
# unless( defined $self->{'_current_sbjct'} ) {
# my $sbjct = $self->{'_current_sbjct'} = $self->nextSbjct;
# return undef unless defined $sbjct;
# }
$self->{'_current_sbjct'}->{'LENGTH'};
}
=head2 P
Title : P
Usage :
Function : Syntax no longer supported, error message only
=cut
sub P {
my $self = shift;
$self->throw("Syntax used is no longer supported.\n See BPbl2seq.pm documentation for current syntax.\n ");
}
=head2 percent
Title : percent
Usage : $hsp->percent();
Function : Syntax no longer supported, error message only
=cut
sub percent {
my $self = shift;
$self->throw("Syntax used is no longer supported.\n See BPbl2seq.pm documentation for current syntax.\n ");
}
=head2 match
Title : match
Usage : $hsp->match();
Function : Syntax no longer supported, error message only
=cut
sub match {
my $self = shift;
$self->throw("Syntax used is no longer supported.\n See BPbl2seq.pm documentation for current syntax.\n ");
}
=head2 positive
Title : positive
Usage : $hsp->positive();
Function : Syntax no longer supported, error message only
=cut
sub positive {
my $self = shift;
$self->throw("Syntax used is no longer supported.\n See BPbl2seq.pm documentation for current syntax.\n ") ;
}
=head2 querySeq
Title : querySeq
Usage : $hsp->querySeq();
Function : Syntax no longer supported, error message only
=cut
sub querySeq {
my $self = shift;
$self->throw("Syntax used is no longer supported.\n See BPbl2seq.pm documentation for current syntax.\n ") ;
}
=head2 sbjctSeq
Title : sbjctSeq
Usage : $hsp->sbjctSeq();
Function : Syntax no longer supported, error message only
=cut
sub sbjctSeq {
my $self = shift;
$self->throw("Syntax used is no longer supported.\n See BPbl2seq.pm documentation for current syntax.\n ") ;
}
=head2 homologySeq
Title : homologySeq
Usage : $hsp->homologySeq();
Function : Syntax no longer supported, error message only
=cut
sub homologySeq {
my $self = shift;
$self->throw("Syntax used is no longer supported.\n See BPbl2seq.pm documentation for current syntax.\n ") ;
}
=head2 qs
Title : qs
Usage : $hsp->qs();
Function : Syntax no longer supported, error message only
=cut
sub qs {
my $self = shift;
$self->throw("Syntax used is no longer supported.\n See BPbl2seq.pm documentation for current syntax.\n ") ;
}
=head2 ss
Title : ss
Usage : $hsp->ss();
Function : Syntax no longer supported, error message only
=cut
sub ss {
my $self = shift;
$self->throw("Syntax used is no longer supported.\n See BPbl2seq.pm documentation for current syntax.\n ") ;
}
=head2 hs
Title : hs
Usage : $hsp->hs();
Function : Syntax no longer supported, error message only
=cut
sub hs {
my $self = shift;
$self->throw("Syntax used is no longer supported.\n See BPbl2seq.pm documentation for current syntax.\n ") ;
}
sub _fastForward {
my ($self) = @_;
return 0 if $self->{'REPORT_DONE'}; # empty report
while(defined( $_ = $self->_readline() ) ) {
if ($_ =~ /^>|^Parameters|^\s+Database:|^\s+Posted date:|^\s*Lambda/) {
$self->_pushback($_);
return 1;
}
}
$self->warn("Possible error (1) while parsing BLAST report!");
}
1;
__END__