Summary | Included libraries | Package variables | Synopsis | Description | General documentation | Methods |
WebCvs | Raw content |
use Bio::Tools::BPlite;
my $report = new Bio::Tools::BPlite(-fh=>\*STDIN);
{ $report->query; $report->database; while(my $sbjct = $report->nextSbjct) { $sbjct->name; while (my $hsp = $sbjct->nextHSP) { $hsp->score; $hsp->bits; $hsp->percent; $hsp->P; $hsp->EXP; $hsp->match; $hsp->positive; $hsp->length; $hsp->querySeq; $hsp->sbjctSeq; $hsp->homologySeq; $hsp->query->start; $hsp->query->end; $hsp->hit->start; $hsp->hit->end; $hsp->hit->seq_id; $hsp->hit->overlaps($exon); } } # the following line takes you to the next report in the stream/file # it will return 0 if that report is empty, # but that is valid for an empty blast report. # Returns -1 for EOF. last if ($report->_parseHeader == -1); redo; }
my $report = new Bio::Tools::BPlite(-fh=>\*STDIN); # or any other filehandleThe report has two attributes (query and database), and one method (nextSbjct).
$report->query; # access to the query nameA subject is a BLAST hit, which should not be confused with an HSP (below). A
$report->database; # access to the database name
$report->nextSbjct; # gets the next subject
while(my $sbjct = $report->nextSbjct) {
# canonical form of use is in a while loop
}
$sbjct->name; # access to the subject nameAn HSP is a high scoring pair, or simply an alignment. HSP objects
$sbjct->nextHSP; # gets the next HSP from the sbjct
while(my $hsp = $sbjct->nextHSP) {
# canonical form is again a while loop
}
$hsp->score;So a very simple look into a BLAST report might look like this.
$hsp->bits;
$hsp->percent;
$hsp->P;
$hsp->match;
$hsp->positive;
$hsp->length;
$hsp->querySeq; $hsp->qs;
$hsp->sbjctSeq; $hsp->ss;
$hsp->homologySeq; $hsp->hs;
$hsp->query->start;
$hsp->query->end;
$hsp->query->seq_id;
$hsp->hit->primary_tag; # "similarity"
$hsp->hit->source_tag; # "BLAST"
$hsp->hit->start;
$hsp->hit->end;
...
my $report = new Bio::Tools::BPlite(-fh=>\*STDIN);The output of such code might look like this:
while(my $sbjct = $report->nextSbjct) {
print ">",$sbjct->name,"\n";
while(my $hsp = $sbjct->nextHSP) {
print "\t",$hsp->start,"..",$hsp->end," ",$hsp->bits,"\n";
}
}
>foo
100..155 29.5
268..300 20.1
>bar
100..153 28.5
265..290 22.1
_fastForward | No description | Code |
_parseHeader | No description | Code |
database | Description | Code |
new | Description | Code |
nextSbjct | Description | Code |
next_feature | Description | Code |
pattern | Description | Code |
qlength | Description | Code |
query | Description | Code |
query_pattern_location | Description | Code |
database | code | next | Top |
Title : database |
new | code | prev | next | Top |
Title : new |
nextSbjct | code | prev | next | Top |
Title : nextSbjct |
next_feature | code | prev | next | Top |
Title : next_feature |
pattern | code | prev | next | Top |
Title : pattern |
qlength | code | prev | next | Top |
Title : qlength |
query | code | prev | next | Top |
Title : query |
query_pattern_location | code | prev | next | Top |
Title : query_pattern_location |
_fastForward | description | prev | next | Top |
my ($self) = @_; return 0 if $self->{'REPORT_DONE'}; # empty report}
while(defined( $_ = $self->_readline() ) ) { if ($_ =~ /^Histogram|^Searching|^Parameters|^\s+Database:|^\s+Posted date:/) { return 0; } elsif( $_ =~ /^>/ ) { $self->_pushback($_); return 1; } } unless( $self->{'BTK'} ) { # Paracel BTK reports have no footer
$self->warn("Possible error (1) while parsing BLAST report!"); } } 1; __END__
_parseHeader | description | prev | next | Top |
my ($self) = @_; # normally, _parseHeader will break out of the parse as soon as it}
# reaches a new Subject (i.e. the first one after the header) if you
# call _parseHeader twice in a row, with nothing in between, all you
# accomplish is a ->nextSubject call.. so we need a flag to
# indicate that we have *entered* a header, before we are allowed to
# leave it!
my $header_flag = 0; # here is the flag/ It is "false" at first, and
# is set to "true" when any valid header element
# is encountered
$self->{'REPORT_DONE'} = 0; # reset this bit for a new report
while(defined($_ = $self->_readline() ) ) { s/\(\s*\)//; if ($_ =~ /^Query=(?:\s+([^\(]+))?/) { $header_flag = 1; # valid header element found
my $query = $1; while( defined($_ = $self->_readline() ) ) { # Continue reading query name until encountering either
# a line that starts with "Database" or a blank line.
# The latter condition is needed in order to be able to
# parse megablast output correctly, since Database comes
# before (not after) the query.
if( ($_ =~ /^Database/) || ($_ =~ /^$/) ) { $self->_pushback($_); last; } $query .= $_; } $query =~ s/\s+/ /g; $query =~ s/^>//; my $length = 0; if( $query =~ /\(([\d,]+)\s+\S+\)\s*$/ ) { $length = $1; $length =~ s/,//g; } else { $self->debug("length is 0 for '$query'\n"); } $self->{'QUERY'} = $query; $self->{'LENGTH'} = $length; } elsif ($_ =~ /^(<b>)?(T?BLAST[NPX])\s+([\w\.-]+)\s+(\[[\w-]*\])/) { $self->{'BLAST_TYPE'} = $2; $self->{'BLAST_VERSION'} = $3; } # BLAST report type - not a valid header element # JB949
# Support Paracel BTK output
elsif ( $_ =~ /(^[A-Z0-9_]+)\s+BTK\s+/ ) { $self->{'BLAST_TYPE'} = $1; $self->{'BTK'} = 1; } elsif ($_ =~ /^Database:\s+(.+)/) {$header_flag = 1;$self->{'DATABASE'} = $1} # valid header element found
elsif ($_ =~ /^\s*pattern\s+(\S+).*position\s+(\d+)\D/) { # For PHIBLAST reports
$header_flag = 1; # valid header element found
$self->{'PATTERN'} = $1; push (@{$self->{'QPATLOCATION'}}, $2); } elsif (($_ =~ /^>/) && ($header_flag==1)) {$self->_pushback($_); return 1} # only leave if we have actually parsed a valid header!
elsif (($_ =~ /^Parameters|^\s+Database:/) && ($header_flag==1)) { # if we entered a header, and saw nothing before the stats at the end, then it was empty
$self->_pushback($_); return 0; # there's nothing in the report
} # bug fix suggested by MI Sadowski via Martin Lomas
# see bug report #1118
if( ref($self->_fh()) !~ /GLOB/ && $self->_fh()->can('EOF') && eof($self->_fh()) ) { $self->warn("unexpected EOF in file\n"); return -1; } } return -1; # EOF
database | description | prev | next | Top |
shift->{'DATABASE'}}
new | description | prev | next | Top |
my ($class, @args) = @_; my $self = $class->SUPER::new(@args); # initialize IO}
$self->_initialize_io(@args); $self->{'QPATLOCATION'} = []; # Anonymous array of query pattern locations for PHIBLAST
if ($self->_parseHeader) {$self->{'REPORT_DONE'} = 0} # there are alignments
else {$self->{'REPORT_DONE'} = 1} # empty report
return $self; # success - we hope!
} # for SeqAnalysisParserI compliance
nextSbjct | description | prev | next | Top |
my ($self) = @_; $self->_fastForward or return undef; #######################}
# get all sbjct lines #
#######################
my $def = $self->_readline(); while(defined ($_ = $self->_readline() ) ) { if ($_ !~ /\w/) {next} elsif ($_ =~ /Strand HSP/) {next} # WU-BLAST non-data
elsif ($_ =~ /^\s{0,2}Score/) {$self->_pushback($_); last} elsif ($_ =~ /^Histogram|^Searching|^Parameters|^\s+Database:|^\s+Posted date:/) { $self->_pushback($_); last; } else {$def .= $_} } $def =~ s/\s+/ /g; $def =~ s/\s+$//g; $def =~ s/Length = ([\d,]+)$//g; my $length = $1; return undef unless $def =~ /^>/; $def =~ s/^>//; ####################
# the Sbjct object #
####################
my $sbjct = new Bio::Tools::BPlite::Sbjct('-name'=>$def, '-length'=>$length, '-parent'=>$self); return $sbjct; } # begin private routines
next_feature | description | prev | next | Top |
my ($self) = @_; my ($sbjct, $hsp); $sbjct = $self->{'_current_sbjct'}; unless( defined $sbjct ) { $sbjct = $self->{'_current_sbjct'} = $self->nextSbjct; return undef unless defined $sbjct; } $hsp = $sbjct->nextHSP; unless( defined $hsp ) { $self->{'_current_sbjct'} = undef; return $self->next_feature; } return $hsp || undef;}
pattern | description | prev | next | Top |
shift->{'PATTERN'}}
qlength | description | prev | next | Top |
shift->{'LENGTH'}}
query | description | prev | next | Top |
shift->{'QUERY'}}
query_pattern_location | description | prev | next | Top |
shift->{'QPATLOCATION'}}
AUTHORS | Top |
ACKNOWLEDGEMENTS | Top |
CONTRIBUTORS | Top |
COPYRIGHT | Top |
DISCLAIMER | Top |