This object implements the SearchWriterI interface which will produce
a set of Text for a specific Bio::Search::Report::ReportI interface.
You can also provide the argument -filters => \%hash to filter the at
the hsp, hit, or result level. %hash is an associative array which
contains any or all of the keys (HSP, HIT, RESULT). The values
pointed to by these keys would be references to a subroutine which
expects to be passed an object - one of Bio::Search::HSP::HSPI,
Bio::Search::Hit::HitI, and Bio::Search::Result::ResultI respectively.
Each function needs to return a boolean value as to whether or not the
passed element should be included in the output report - true if it is to be included, false if it to be omitted.
For example to filter on sequences in the database which are too short
for your criteria you would do the following.
Define a hit filter method
sub hit_filter {
my $hit = shift;
return $hit->length > 100; # test if length of the hit sequence
# long enough
}
my $writer = new Bio::SearchIO::Writer::TextResultWriter(
-filters => { 'HIT' => \&hit_filter }
);
Another example would be to filter HSPs on percent identity, let's
only include HSPs which are 75% identical or better.
sub hsp_filter {
my $hsp = shift;
return $hsp->percent_identity > 75;
}
my $writer = new Bio::SearchIO::Writer::TextResultWriter(
-filters => { 'HSP' => \&hsp_filter }
);
See
Bio::SearchIO::SearchWriterI for more info on the filter method.
This module will use the module Text::Wrap if it is installed to wrap
the Query description line. If you do not have Text::Wrap installed
this module will work fine but you won't have the Query line wrapped.
You will see a warning about this when you first instantiate a
TextResultWriter - to avoid these warnings from showing up, simply set
the verbosity upon initialization to -1 like this: my $writer = new
Bio::SearchIO::Writer::TextResultWriter(-verbose => -1);
sub algorithm_reference
{ my ($self,$result) = @_;
return '' if( ! defined $result || !ref($result) ||
! $result->isa('Bio::Search::Result::ResultI')) ;
if( $result->algorithm =~ /BLAST/i ) {
my $res = $result->algorithm . ' '. $result->algorithm_version. "\n";
if( $result->algorithm_version =~ /WashU/i ) {
return $res .qq{
Copyright (C) 1996-2000 Washington University, Saint Louis, Missouri USA.
All Rights Reserved.
Reference: Gish, W. (1996-2000) http://blast.wustl.edu
};
} else {
return $res . qq{
Reference: Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schaffer,
Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997),
"Gapped BLAST and PSI-BLAST: a new generation of protein database search
programs", Nucleic Acids Res. 25:3389-3402.
};
}
} elsif( $result->algorithm =~ /FAST/i ) {
return $result->algorithm. " ". $result->algorithm_version . "\n".
"\nReference: Pearson et al, Genomics (1997) 46:24-36\n";
} else {
return '';
}
}
} |
sub to_string
{ my ($self,$result,$num) = @_;
$num ||= 0;
return unless defined $result;
my ($resultfilter,$hitfilter, $hspfilter) = ( $self->filter('RESULT'),
$self->filter('HIT'),
$self->filter('HSP') );
return '' if( defined $resultfilter && ! &{$resultfilter}($result) );
my ($qtype,$dbtype,$dbseqtype,$type);
my $alg = $result->algorithm;
if( $alg =~ /T(FAST|BLAST)([XY])/i ) {
$qtype = $dbtype = 'translated';
$dbseqtype = $type = 'PROTEIN';
} elsif( $alg =~ /T(FAST|BLAST)N/i ) {
$qtype = '';
$dbtype = 'translated';
$type = 'PROTEIN';
$dbseqtype = 'NUCLEOTIDE';
} elsif( $alg =~ /(FAST|BLAST)N/i ||
$alg =~ /(WABA|EXONERATE)/i ) {
$qtype = $dbtype = '';
$type = $dbseqtype = 'NUCLEOTIDE';
} elsif( $alg =~ /(FAST|BLAST)P/ || $alg =~ /SSEARCH/i ) {
$qtype = $dbtype = '';
$type = $dbseqtype = 'PROTEIN';
} elsif( $alg =~ /(FAST|BLAST)[XY]/i ) {
$qtype = 'translated';
$dbtype = 'PROTEIN';
$dbseqtype = $type = 'PROTEIN';
} else {
print STDERR "algorithm was ", $result->algorithm, " couldn't match\n";
}
my %baselens = ( 'Sbjct:' => ( $dbtype eq 'translated' ) ? 3 : 1,
'Query:' => ( $qtype eq 'translated' ) ? 3 : 1);
my $str;
if( ! defined $num || $num <= 1 ) {
$str = &{$self->start_report}($result);
}
$str .= &{$self->title}($result);
$str .= $result->algorithm_reference || $self->algorithm_reference($result);
$str .= &{$self->introduction}($result);
$str .= qq{
Score E
Sequences producing significant alignments: (bits) value
};
my $hspstr = '';
if( $result->can('rewind')) {
$result->rewind(); }
while( my $hit = $result->next_hit ) {
next if( defined $hitfilter && ! &{$hitfilter}($hit) );
my $nm = $hit->name();
$self->debug( "no $nm for name (".$hit->description(). "\n")
unless $nm;
my ($gi,$acc) = &{$self->id_parser}($nm);
my $p = "%-$MaxDescLen". "s";
my $descsub;
my $desc = sprintf("%s %s",$nm,$hit->description);
if( length($desc) - 3 > $MaxDescLen) {
$descsub = sprintf($p,
substr($desc,0,$MaxDescLen-3) .
"...");
} else {
$descsub = sprintf($p,$desc);
}
$str .= sprintf("%s %-4s %s\n",
$descsub,
defined $hit->raw_score ? $hit->raw_score : ' ',
defined $hit->significance ? $hit->significance : '?');
my @hsps = $hit->hsps;
$hspstr .= sprintf(">%s %s\n%9sLength = %d\n\n",
$hit->name,
defined $hit->description ? $hit->description : '',
'', $hit->length);
foreach my $hsp ( @hsps ) {
next if( defined $hspfilter && ! &{$hspfilter}($hsp) );
$hspstr .= sprintf(" Score = %4s bits (%s), Expect = %s",
$hsp->bits, $hsp->score, $hsp->evalue);
if( $hsp->pvalue ) {
$hspstr .= ", P = ".$hsp->pvalue;
}
$hspstr .= "\n";
$hspstr .= sprintf(" Identities = %d/%d (%d%%)",
( $hsp->frac_identical('total') *
$hsp->length('total')),
$hsp->length('total'),
POSIX::floor($hsp->frac_identical('total')
* 100));
if( $type eq 'PROTEIN' ) {
$hspstr .= sprintf(", Positives = %d/%d (%d%%)",
( $hsp->frac_conserved('total') *
$hsp->length('total')),
$hsp->length('total'),
POSIX::floor($hsp->frac_conserved('total') * 100));
}
if( $hsp->gaps ) {
$hspstr .= sprintf(", Gaps = %d/%d (%d%%)",
$hsp->gaps('total'),
$hsp->length('total'),
POSIX::floor(100 * $hsp->gaps('total') / $hsp->length('total'))); }
$hspstr .= "\n";
my ($hframe,$qframe) = ( $hsp->hit->frame,
$hsp->query->frame);
my ($hstrand,$qstrand) = ($hsp->hit->strand,$hsp->query->strand);
if( $hstrand || $qstrand ) {
$hspstr .= " Frame = ";
my ($signq, $signh);
unless( $hstrand ) {
$hframe = undef;
} else {
$signh = $hstrand < 0 ? '-' : '+';
}
unless( $qstrand ) {
$qframe = undef;
} else {
$signq =$qstrand < 0 ? '-' : '+';
}
if( defined $hframe && ! defined $qframe) {
$hspstr .= "$signh".($hframe+1);
} elsif( defined $qframe && ! defined $hframe) {
$hspstr .= "$signq".($qframe+1);
} else {
$hspstr .= sprintf(" %s%d / %s%d",
$signq,$qframe+1,
$signh, $hframe+1);
}
}
$hspstr .= "\n\n";
my @hspvals = ( {'name' => 'Query:',
'seq' => $hsp->query_string,
'start' => ( $hstrand >= 0 ?
$hsp->query->start :
$hsp->query->end),
'end' => ($qstrand >= 0 ?
$hsp->query->end :
$hsp->query->start),
'index' => 0,
'direction' => $qstrand || 1
},
{ 'name' => ' 'x6, 'seq' => $hsp->homology_string,
'start' => undef,
'end' => undef,
'index' => 0,
'direction' => 1
},
{ 'name' => 'Sbjct:',
'seq' => $hsp->hit_string,
'start' => ($hstrand >= 0 ?
$hsp->hit->start : $hsp->hit->end),
'end' => ($hstrand >= 0 ?
$hsp->hit->end : $hsp->hit->start),
'index' => 0,
'direction' => $hstrand || 1
}
);
my ($numwidth) = sort { $b <=> $a }(length($hspvals[0]->{'start'}),
length($hspvals[0]->{'end'}),
length($hspvals[2]->{'start'}),
length($hspvals[2]->{'end'}));
my $count = 0;
while ( $count <= $hsp->length('total') ) {
foreach my $v ( @hspvals ) {
my $piece = substr($v->{'seq'}, $v->{'index'} +$count,
$AlignmentLineWidth);
my $cp = $piece;
my $plen = scalar ( $cp =~ tr/\-//);
my ($start,$end) = ('','');
if( defined $v->{'start'} ) {
$start = $v->{'start'};
my $d = $v->{'direction'} * ( $AlignmentLineWidth - $plen )*
$baselens{$v->{'name'}};
if( length($piece) < $AlignmentLineWidth ) {
$d = (length($piece) - $plen) * $v->{'direction'} *
$baselens{$v->{'name'}};
}
$end = $v->{'start'} + $d - $v->{'direction'};
$v->{'start'} += $d;
}
$hspstr .= sprintf("%s %-".$numwidth."s %s %s\n",
$v->{'name'},
$start,
$piece,
$end
);
}
$count += $AlignmentLineWidth;
$hspstr .= "\n";
}
}
$hspstr .= "\n";
}
$str .= "\n\n".$hspstr;
$str .= sprintf(qq{ Database: %s
Posted date: %s
Number of letters in database: %s
Number of sequences in database: %s
Matrix: %s
},
$result->database_name(),
$result->get_statistic('posted_date') ||
POSIX::strftime("%b %d, %Y %I:%M %p",localtime),
&_numwithcommas($result->database_entries()),
&_numwithcommas($result->database_letters()),
$result->get_parameter('matrix') || '');
if( defined (my $open = $result->get_parameter('gapopen')) ) {
$str .= sprintf("Gap Penalties Existence: %d, Extension: %d\n",
$open || 0, $result->get_parameter('gapext') || 0);
}
foreach my $param ( grep { ! /matrix|gapopen|gapext/i }
$result->available_parameters ) {
$str .= "$param: ". $result->get_parameter($param) ."\n";
}
$str .= "Search Statistics\n";
foreach my $stat ( sort grep { ! /posted_date/ }
$result->available_statistics ) {
my $expect = $result->get_parameter('expect');
my $v = $result->get_statistic($stat);
if( $v =~ /^\d+$/ ) {
$v = &_numwithcommas($v);
}
if( defined $expect &&
$stat eq 'seqs_better_than_cutoff' ) {
$str .= "seqs_better_than_$expect: $v\n";
} else {
my $v =
$str .= "$stat: $v\n";
}
}
$str .= "\n\n";
return $str; } |
The rest of the documentation details each of the object methods.
Internal methods are usually preceded with a _