Raw content of Bio::Search::Result::BlastResult
#-----------------------------------------------------------------
# $Id: BlastResult.pm,v 1.13 2002/12/24 15:48:41 jason Exp $
#
# BioPerl module Bio::Search::Result::BlastResult
#
# Cared for by Steve Chervitz
#
# You may distribute this module under the same terms as perl itself
#-----------------------------------------------------------------
# POD documentation - main docs before the code
=head1 NAME
Bio::Search::Result::BlastResult - A top-level BLAST Report object
=head1 SYNOPSIS
The construction of BlastResult objects is performed by
by the B parser.
Therefore, you do not need to
use B) directly. If you need to construct
BlastHits directly, see the new() function for details.
For B BLAST parsing usage examples, see the
B directory of the Bioperl distribution.
=head1 DESCRIPTION
This module supports BLAST versions 1.x and 2.x, gapped and ungapped,
and PSI-BLAST.
=head1 DEPENDENCIES
Bio::Search::Result::BlastResult.pm is a concrete class that inherits from B and B. It relies on two other modules:
=over 4
=item B
Encapsulates a single a single BLAST hit.
=item B
Provides an interface to a blast database metadata.
=back
=head1 FEEDBACK
=head2 Mailing Lists
User feedback is an integral part of the evolution of this and other
Bioperl modules. Send your comments and suggestions preferably to one
of the Bioperl mailing lists. Your participation is much appreciated.
bioperl-l@bioperl.org - General discussion
http://bio.perl.org/MailList.html - About the mailing lists
=head2 Reporting Bugs
Report bugs to the Bioperl bug tracking system to help us keep track
the bugs and their resolution. Bug reports can be submitted via email
or the web:
bioperl-bugs@bio.perl.org
http://bugzilla.bioperl.org/
=head1 AUTHOR
Steve Chervitz Esac@bioperl.orgE
See L for where to send bug reports and comments.
=head1 ACKNOWLEDGEMENTS
This software was originally developed in the Department of Genetics
at Stanford University. I would also like to acknowledge my
colleagues at Affymetrix for useful feedback.
=head1 COPYRIGHT
Copyright (c) 2001 Steve Chervitz. All Rights Reserved.
=cut
=head1 DISCLAIMER
This software is provided "as is" without warranty of any kind.
=head1 APPENDIX
The rest of the documentation details each of the object methods.
Internal methods are usually preceded with a _
=cut
# Let the code begin...
package Bio::Search::Result::BlastResult;
use strict;
use Bio::Search::Result::ResultI;
use Bio::Root::Root;
use overload
'""' => \&to_string;
use vars qw(@ISA $Revision );
$Revision = '$Id: BlastResult.pm,v 1.13 2002/12/24 15:48:41 jason Exp $'; #'
@ISA = qw( Bio::Root::Root Bio::Search::Result::ResultI);
#----------------
sub new {
#----------------
my ($class, @args) = @_;
my $self = $class->SUPER::new(@args);
return $self;
}
#sub DESTROY {
# my $self = shift;
# print STDERR "->DESTROYING $self\n";
#}
#=================================================
# Begin Bio::Search::Result::ResultI implementation
#=================================================
=head2 next_hit
See L for documentation
=cut
#----------------
sub next_hit {
#----------------
my ($self) = @_;
unless(defined $self->{'_hit_queue'}) {
$self->{'_hit_queue'} = [$self->hits()];
}
shift @{$self->{'_hit_queue'}};
}
=head2 query_name
See L for documentation
=cut
#----------------
sub query_name {
#----------------
my $self = shift;
if (@_) {
my $name = shift;
$name =~ s/^\s+|(\s+|,)$//g;
$self->{'_query_name'} = $name;
}
return $self->{'_query_name'};
}
=head2 query_length
See L for documentation
=cut
#----------------
sub query_length {
#----------------
my $self = shift;
if(@_) { $self->{'_query_length'} = shift; }
return $self->{'_query_length'};
}
=head2 query_description
See L for documentation
=cut
#----------------
sub query_description {
#----------------
my $self = shift;
if(@_) {
my $desc = shift;
defined $desc && $desc =~ s/(^\s+|\s+$)//g;
# Remove duplicated ID at beginning of description string
defined $desc && $desc =~ s/^$self->{'_query_name'}//o;
$self->{'_query_query_desc'} = $desc || '';
}
return $self->{'_query_query_desc'};
}
=head2 analysis_method
See L for documentation
This implementation ensures that the name matches /blast/i.
=cut
#----------------
sub analysis_method {
#----------------
my ($self, $method) = @_;
if($method ) {
if( $method =~ /blast/i) {
$self->{'_analysis_prog'} = $method;
} else {
$self->throw("method $method not supported in " . ref($self));
}
}
return $self->{'_analysis_prog'};
}
=head2 analysis_method_version
See L for documentation
=cut
#----------------
sub analysis_method_version {
#----------------
my ($self, $version) = @_;
if($version) {
$self->{'_analysis_progVersion'} = $version;
}
return $self->{'_analysis_progVersion'};
}
=head2 analysis_query
See L for documentation
=cut
#----------------
sub analysis_query {
#----------------
my ($self) = @_;
if(not defined $self->{'_analysis_query'}) {
require Bio::PrimarySeq;
my $moltype = $self->analysis_method =~ /blastp|tblastn/i ? 'protein' : 'dna';
$self->{'_analysis_query'} = Bio::PrimarySeq->new( -display_id => $self->query_name,
-desc => $self->query_description,
-moltype => $moltype
);
$self->{'_analysis_query'}->length( $self->query_length );
}
return $self->{'_analysis_query'};
}
=head2 analysis_subject
Usage : $blastdb = $result->analyis_subject();
Purpose : Get a Bio::Search::DatabaseI object containing
information about the database used in the BLAST analysis.
Returns : Bio::Search::DatabaseI object.
Argument : n/a
=cut
#---------------
sub analysis_subject {
#---------------
my ($self, $blastdb) = @_;
if($blastdb) {
if( ref $blastdb and $blastdb->isa('Bio::Search::DatabaseI')) {
$self->{'_analysis_sbjct'} = $blastdb;
}
else {
$self->throw(-class =>'Bio::Root::BadParameter',
-text => "Can't set BlastDB: not a Bio::Search::DatabaseI $blastdb"
);
}
}
return $self->{'_analysis_sbjct'};
}
=head2 next_feature
Title : next_feature
Usage : while( my $feat = $blast_result->next_feature ) { # do something }
Function: Returns the next feature available in the analysis result, or
undef if there are no more features.
Example :
Returns : A Bio::SeqFeatureI compliant object, in this case,
each Bio::Search::HSP::BlastHSP object within each BlastHit.
Args : None
=cut
#---------------
sub next_feature{
#---------------
my ($self) = @_;
my ($hit, $hsp);
$hit = $self->{'_current_hit'};
unless( defined $hit ) {
$hit = $self->{'_current_hit'} = $self->next_hit;
return undef unless defined $hit;
}
$hsp = $hit->next_hsp;
unless( defined $hsp ) {
$self->{'_current_hit'} = undef;
return $self->next_feature;
}
return $hsp || undef;
}
sub algorithm { shift->analysis_method( @_ ); }
sub algorithm_version { shift->analysis_method_version( @_ ); }
=head2 available_parameters
Title : available_parameters
Usage : my @params = $report->available_paramters
Function: Returns the names of the available parameters
Returns : Return list of available parameters used for this report
Args : none
=cut
sub available_parameters{
return ();
}
=head2 get_parameter
Title : get_parameter
Usage : my $gap_ext = $report->get_parameter('gapext')
Function: Returns the value for a specific parameter used
when running this report
Returns : string
Args : name of parameter (string)
=cut
sub get_parameter{
return '';
}
=head2 get_statistic
Title : get_statistic
Usage : my $gap_ext = $report->get_statistic('kappa')
Function: Returns the value for a specific statistic available
from this report
Returns : string
Args : name of statistic (string)
=cut
sub get_statistic{
return '';
}
=head2 available_statistics
Title : available_statistics
Usage : my @statnames = $report->available_statistics
Function: Returns the names of the available statistics
Returns : Return list of available statistics used for this report
Args : none
=cut
sub available_statistics{
return ();
}
#=================================================
# End Bio::Search::Result::ResultI implementation
#=================================================
=head2 to_string
Title : to_string
Usage : print $blast->to_string;
Function: Returns a string representation for the Blast result.
Primarily intended for debugging purposes.
Example : see usage
Returns : A string of the form:
[BlastResult] query= db=analysis_method . " query=" . $self->query_name . " " . $self->query_description .", db=" . $self->database_name;
return $str;
}
#---------------
sub database_name {
#---------------
my $self = shift;
my $dbname = '';
if( ref $self->analysis_subject) {
$dbname = $self->analysis_subject->name;
}
return $dbname;
}
=head2 database_entries
Title : database_entries
Usage : $num_entries = $result->database_entries()
Function: Used to obtain the number of entries contained in the database.
Returns : a scalar integer representing the number of entities in the database
or undef if the information was not available.
Args : [optional] new integer for the number of sequence entries in the db
=cut
#---------------
sub database_entries {
#---------------
my $self = shift;
my $dbentries = '';
if( ref $self->analysis_subject) {
$dbentries = $self->analysis_subject->entries;
}
return $dbentries;
}
=head2 database_letters
Title : database_letters
Usage : $size = $result->database_letters()
Function: Used to obtain the size of database that was searched against.
Returns : a scalar integer (units specific to algorithm, but probably the
total number of residues in the database, if available) or undef if
the information was not available to the Processor object.
Args : [optional] new scalar integer for number of letters in db
=cut
#---------------
sub database_letters {
#---------------
my $self = shift;
my $dbletters = '';
if( ref $self->analysis_subject) {
$dbletters = $self->analysis_subject->letters;
}
return $dbletters;
}
#---------------
sub hits {
#---------------
my $self = shift;
my @hits = ();
if( ref $self->{'_hits'}) {
@hits = @{$self->{'_hits'}};
}
return @hits;
}
=head2 add_hit
Usage : $blast->add_hit( $hit );
Purpose : Adds a hit object to the collection of hits in this BLAST result.
Returns : n/a
Argument : A Bio::Search::Hit::HitI object
Comments : For PSI-BLAST, hits from all iterations are lumped together.
For any given hit, you can determine the iteration in which it was
found by checking $hit->iteration().
=cut
#---------------
sub add_hit {
#---------------
my ($self, $hit) = @_;
my $add_it = 1;
unless( ref $hit and $hit->isa('Bio::Search::Hit::HitI')) {
$add_it = 0;
$self->throw(-class =>'Bio::Root::BadParameter',
-text => "Can't add hit: not a Bio::Search::Hit::HitI: $hit"
);
}
# Avoid adding duplicate hits if we're doing multiple iterations (PSI-BLAST)
# if( $self->iterations > 1 ) {
# my $hit_name = $hit->name;
# if( grep $hit_name eq $_, @{$self->{'_hit_names'}}) {
# $add_it = 0;
# }
# }
if( $add_it ) {
push @{$self->{'_hits'}}, $hit;
push @{$self->{'_hit_names'}}, $hit->name;
}
}
=head2 is_signif
Usage : $blast->is_signif();
Purpose : Determine if the BLAST report contains significant hits.
Returns : Boolean
Argument : n/a
Comments : BLAST reports without significant hits but with defined
: significance criteria will throw exceptions during construction.
: This obviates the need to check significant() for
: such objects.
=cut
#------------
sub is_signif { my $self = shift; return $self->{'_is_significant'}; }
#------------
=head2 matrix
Usage : $blast_object->matrix();
Purpose : Get the name of the scoring matrix used.
: This is extracted from the report.
Argument : n/a
Returns : string or undef if not defined
Comments : TODO: Deprecate this and implement get_parameter('matrix').
=cut
#------------
sub matrix {
#------------
my $self = shift;
if(@_) {
$self->{'_matrix'} = shift;
}
$self->{'_matrix'};
}
=head2 raw_statistics
Usage : @stats = $blast_result->raw_statistics();
Purpose : Get the raw, unparsed statistical parameter section of the Blast report.
This is the section at the end after the last HSP alignment.
Argument : n/a
Returns : Array of strings
=cut
#------------
sub raw_statistics {
#------------
my $self = shift;
if(@_) {
my $params = shift;
if( ref $params eq 'ARRAY') {
$self->{'_raw_statistics'} = $params;
}
else {
$self->throw(-class =>'Bio::Root::BadParameter',
-text => "Can't set statistical params: not an ARRAY ref: $params"
);
}
}
if(not defined $self->{'_raw_statistics'}) {
$self->{'_raw_statistics'} = [];
}
@{$self->{'_raw_statistics'}};
}
=head2 no_hits_found
Usage : $nohits = $blast->no_hits_found( [iteration_number] );
Purpose : Get boolean indicator indicating whether or not any hits
were present in the report.
This is NOT the same as determining the number of hits via
the hits() method, which will return zero hits if there were no
hits in the report or if all hits were filtered out during the parse.
Thus, this method can be used to distinguish these possibilities
for hitless reports generated when filtering.
Returns : Boolean
Argument : (optional) integer indicating the iteration number (PSI-BLAST)
If iteration number is not specified and this is a PSI-BLAST result,
then this method will return true only if all iterations had
no hits found.
=cut
#-----------
sub no_hits_found {
#-----------
my ($self, $round) = @_;
my $result = 0; # final return value of this method.
# Watch the double negative!
# result = 0 means "yes hits were found"
# result = 1 means "no hits were found" (for the indicated iteration or all iterations)
# If a iteration was not specified and there were multiple iterations,
# this method should return true only if all iterations had no hits found.
if( not defined $round ) {
if( $self->{'_iterations'} > 1) {
$result = 1;
foreach my $i( 1..$self->{'_iterations'} ) {
if( not defined $self->{"_iteration_$i"}->{'_no_hits_found'} ) {
$result = 0;
last;
}
}
}
else {
$result = $self->{"_iteration_1"}->{'_no_hits_found'};
}
}
else {
$result = $self->{"_iteration_$round"}->{'_no_hits_found'};
}
return $result;
}
=head2 set_no_hits_found
Usage : $blast->set_no_hits_found( [iteration_number] );
Purpose : Set boolean indicator indicating whether or not any hits
were present in the report.
Returns : n/a
Argument : (optional) integer indicating the iteration number (PSI-BLAST)
=cut
#-----------
sub set_no_hits_found {
#-----------
my ($self, $round) = @_;
$round ||= 1;
$self->{"_iteration_$round"}->{'_no_hits_found'} = 1;
}
=head2 iterations
Usage : $num_iterations = $blast->iterations; (get)
$blast->iterations($num_iterations); (set)
Purpose : Set/get the number of iterations in the Blast Report (PSI-BLAST).
Returns : Total number of iterations in the report
Argument : integer (when setting)
=cut
#----------------
sub iterations {
#----------------
my ($self, $num ) = @_;
if( defined $num ) {
$self->{'_iterations'} = $num;
}
return $self->{'_iterations'};
}
=head2 psiblast
Usage : if( $blast->psiblast ) { ... }
Purpose : Set/get a boolean indicator whether or not the report
is a PSI-BLAST report.
Returns : 1 if PSI-BLAST, undef if not.
Argument : 1 (when setting)
=cut
#----------------
sub psiblast {
#----------------
my ($self, $val ) = @_;
if( $val ) {
$self->{'_psiblast'} = 1;
}
return $self->{'_psiblast'};
}
1;
__END__