Raw content of Bio::Factory::BlastHitFactory
#-----------------------------------------------------------------
# $Id: BlastHitFactory.pm,v 1.7 2002/10/22 09:38:09 sac Exp $
#
# BioPerl module for Bio::Factory::BlastHitFactory
#
# Cared for by Steve Chervitz
#
# You may distribute this module under the same terms as perl itself
#-----------------------------------------------------------------
# POD documentation - main docs before the code
=head1 NAME
Bio::Factory::BlastHitFactory - Factory for Bio::Search::Hit::BlastHit objects
=head1 SYNOPSIS
use Bio::Factory::BlastHitFactory;
my $hit_fact = Bio::Factory::BlastHitFactory->new();
my $hit = $hit_fact->create_hit( %parameters );
See documentation for create_hit() for information about C<%parameters>.
=head1 DESCRIPTION
This module encapsulates code for creating Bio::Search::Hit::BlastHit
and Bio::Search::HSP::BlastHSP objects from traditional BLAST report
data (i.e., non-XML formatted).
=head1 FEEDBACK
=head2 Mailing Lists
User feedback is an integral part of the evolution of this
and other Bioperl modules. Send your comments and suggestions preferably
to one of the Bioperl mailing lists.
Your participation is much appreciated.
bioperl-l@bioperl.org - General discussion
http://bioperl.org/MailList.html - About the mailing lists
=head2 Reporting Bugs
Report bugs to the Bioperl bug tracking system to help us keep track
the bugs and their resolution. Bug reports can be submitted via email
or the web:
bioperl-bugs@bio.perl.org
http://bugzilla.bioperl.org/
=head1 AUTHOR
Steve Chervitz Esac@bioperl.orgE
See L for where to send bug reports and comments.
=head1 COPYRIGHT
Copyright (c) 2001 Steve Chervitz. All Rights Reserved.
=head1 DISCLAIMER
This software is provided "as is" without warranty of any kind.
=head1 APPENDIX
The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _
=cut
#'
package Bio::Factory::BlastHitFactory;
use strict;
use Bio::Root::Root;
use Bio::Factory::HitFactoryI;
use Bio::Search::Hit::BlastHit;
use vars qw(@ISA);
@ISA = qw(Bio::Root::Root Bio::Factory::HitFactoryI);
sub new {
my ($class, @args) = @_;
my $self = $class->SUPER::new(@args);
return $self;
}
=head2 create_hit
Title : create_hit
Usage : $hit = $factory->create_hit( %params );
Function: Creates a new Bio::Search::Hit::BlastHit object given
raw BLAST report data, formatted in traditional BLAST report format.
Returns : A single Bio::Search::Hit::BlastHit object
Args : Named parameters to be passed to the BlastHit object.
Parameter keys are case-insensitive.
See Bio::Search::Hit::BlastHit::new() documentation for
details about these parameters.
The only additional parameter required is:
-RESULT => a Bio::Search::Result::BlastResult object.
From this result object, the program, query length,
and iteration are obtained and passed on to the BlastHit.
=cut
sub create_hit {
my ($self, @args) = @_;
my ($blast, $raw_data, $shallow_parse) =
$self->_rearrange( [qw(RESULT
RAW_DATA
SHALLOW_PARSE)], @args);
my %args = @args;
$args{'-PROGRAM'} = $blast->analysis_method;
$args{'-QUERY_LEN'} = $blast->query_length;
$args{'-ITERATION'} = $blast->iterations;
my $hit = Bio::Search::Hit::BlastHit->new( %args );
unless( $shallow_parse ) {
$self->_add_hsps( $hit,
$args{'-PROGRAM'},
$args{'-QUERY_LEN'},
$blast->query_name,
@{$raw_data} );
}
return $hit;
}
#=head2 _add_hsps
#
# Usage : Private method; called automatically by create_hit().
# Purpose : Creates BlastHSP.pm objects for each HSP in a BLAST hit alignment.
# : Also collects the full description of the hit from the
# : HSP alignment section.
# Returns : n/a
# Argument : (<$BlastHit_object>, <$program_name>, <$query_length>, <$query_name>, <@raw_data>
# 'raw data list' consists of traditional BLAST report
# format for a single HSP, supplied as a list of strings.
# Throws : Warnings for each BlastHSP.pm object that fails to be constructed.
# : Exception if no BlastHSP.pm objects can be constructed.
# : Exception if can't parse length data for hit sequence.
# Comments : Requires Bio::Search::HSP::BlastHSP.pm.
# : Sets the description using the full string present in
# : the alignment data.
#=cut
#--------------
sub _add_hsps {
#--------------
my( $self, $hit, $prog, $qlen, $qname, @data ) = @_;
my $start = 0;
my $hspCount = 0;
require Bio::Search::HSP::BlastHSP;
# printf STDERR "\nBlastHit \"$hit\" _process_hsps(). \nDATA (%d lines) =\n@data\n", scalar(@data);
my( @hspData, @hspList, @errs, @bad_names );
my($line, $set_desc, @desc);
$set_desc = 0;
my $hname = $hit->name;
my $hlen;
hit_loop:
foreach $line( @data ) {
if( $line =~ /^\s*Length = ([\d,]+)/ ) {
$hit->_set_description(@desc);
$set_desc = 1;
$hit->_set_length($1);
$hlen = $hit->length;
next hit_loop;
} elsif( !$set_desc) {
$line =~ s/^\s+|\s+$//g;
push @desc, $line;
next hit_loop;
} elsif( $line =~ /^\s*Score/ ) {
## This block is for setting multiple HSPs.
if( not scalar @hspData ) {
$start = 1;
push @hspData, $line;
next hit_loop;
} elsif( scalar @hspData) {
$hspCount++;
$self->verbose and do{ print STDERR +( $hspCount % 10 ? "+" : "+\n" ); };
# print STDERR "\nBlastHit: setting HSP #$hspCount \n@hspData\n";
my $hspObj = Bio::Search::HSP::BlastHSP->new
(-RAW_DATA => \@hspData,
-RANK => $hspCount,
-PROGRAM => $prog,
-QUERY_NAME => $qname,
-HIT_NAME => $hname,
);
push @hspList, $hspObj;
@hspData = ();
push @hspData, $line;
next;
} else {
push @hspData, $line;
}
} elsif( $start ) {
## This block is for setting the last HSP (which may be the first as well!).
if( $line =~ /^(end|>|Parameters|CPU|Database:)/ ) {
$hspCount++;
$self->verbose and do{ print STDERR +( $hspCount % 10 ? "+" : "+\n" ); };
# print STDERR "\nBlastHit: setting HSP #$hspCount \n@hspData";
my $hspObj = Bio::Search::HSP::BlastHSP->new
(-RAW_DATA => \@hspData,
-RANK => $hspCount,
-PROGRAM => $prog,
-QUERY_NAME => $qname,
-HIT_NAME => $hname,
);
push @hspList, $hspObj;
} else {
push @hspData, $line;
}
}
}
$hit->{'_length'} or $self->throw( "Can't determine hit sequence length.");
# Adjust logical length based on BLAST flavor.
if($prog =~ /TBLAST[NX]/) {
$hit->{'_logical_length'} = $hit->{'_length'} / 3;
}
$hit->{'_hsps'} = [ @hspList ];
# print STDERR "\n--------> Done building HSPs for $hit (total HSPS: ${\$hit->num_hsps})\n";
}
1;