Summary | Included libraries | Package variables | Synopsis | Description | General documentation | Methods |
WebCvs | Raw content |
use Bio::EnsEMBL::Analysis::Tools::BPlite;
my $report = new Bio::EnsEMBL::Analysis::Tools::BPlite(-fh=>\*STDIN);
{ $report->query; $report->database; while(my $sbjct = $report->nextSbjct) { $sbjct->name; while (my $hsp = $sbjct->nextHSP) { $hsp->score; $hsp->bits; $hsp->percent; $hsp->P; $hsp->match; $hsp->positive; $hsp->length; $hsp->querySeq; $hsp->sbjctSeq; $hsp->homologySeq; $hsp->query->start; $hsp->query->end; $hsp->subject->start; $hsp->subject->end; $hsp->subject->seqname; $hsp->subject->overlaps($exon); } } # the following line takes you to the next report in the stream/file # it will return 0 if that report is empty, # but that is valid for an empty blast report. # Returns -1 for EOF. last if ($report->_parseHeader == -1)); redo }
my $report = new Bio::EnsEMBL::Analysis::Tools::BPlite(-fh=>\*STDIN); # or any other filehandleThe report has two attributes (query and database), and one method (nextSbjct).
$report->query; # access to the query nameA subject is a BLAST hit, which should not be confused with an HSP (below). A
$report->database; # access to the database name
$report->nextSbjct; # gets the next subject
while(my $sbjct = $report->nextSbjct) {
# canonical form of use is in a while loop
}
$sbjct->name; # access to the subject nameAn HSP is a high scoring pair, or simply an alignment.
"$sbjct"; # overloaded to return name
$sbjct->nextHSP; # gets the next HSP from the sbjct
while(my $hsp = $sbjct->nextHSP) {
# canonical form is again a while loop
}
$hsp->score;I've included a little bit of overloading for double quote variable
$hsp->bits;
$hsp->percent;
$hsp->P;
$hsp->match;
$hsp->positive;
$hsp->length;
$hsp->querySeq; $hsp->qs;
$hsp->sbjctSeq; $hsp->ss;
$hsp->homologySeq; $hsp->hs;
$hsp->query->start;
$hsp->query->end;
$hsp->query->seqname;
$hsp->subject->primary_tag; # "similarity"
$hsp->subject->source_tag; # "BLAST"
$hsp->subject->start;
$hsp->subject->end;
...
"$hsp"; # overloaded for query->start..query->end bits
my $report = new Bio::EnsEMBL::Analysis::Tools::BPlite(-fh=>\*STDIN);The output of such code might look like this:
while(my $sbjct = $report->nextSbjct) {
print "$sbjct\n";
while(my $hsp = $sbjct->nextHSP) {
print "\t$hsp\n";
}
}
>foo
100..155 29.5
268..300 20.1
>bar
100..153 28.5
265..290 22.1
_fastForward | No description | Code |
_parseHeader | No description | Code |
database | Description | Code |
new | Description | Code |
nextSbjct | Description | Code |
next_feature | Description | Code |
pattern | Description | Code |
qlength | Description | Code |
query | Description | Code |
query_pattern_location | Description | Code |
database | code | next | Top |
Title : database |
new | code | prev | next | Top |
Title : new |
nextSbjct | code | prev | next | Top |
Title : nextSbjct |
next_feature | code | prev | next | Top |
Title : next_feature |
pattern | code | prev | next | Top |
Title : pattern |
qlength | code | prev | next | Top |
Title : qlength |
query | code | prev | next | Top |
Title : query |
query_pattern_location | code | prev | next | Top |
Title : query_pattern_location |
_fastForward | description | prev | next | Top |
my ($self) = @_; return 0 if $self->{'REPORT_DONE'}; # empty report}
return 0 if $self->{'LASTLINE'} =~ /^Parameters|^\s+Database:|^\s+Posted date:/; return 1 if $self->{'LASTLINE'} =~ /^>/; my $FH = $self->_fh(); my $capture; while(<$FH>) { if ($_ =~ /^>|^Parameters|^\s+Database:|^\s+Posted date:/) { $self->{'LASTLINE'} = $_; return 1; } } warning("Possible error while parsing BLAST report!"); } 1; __END__
_parseHeader | description | prev | next | Top |
my ($self) = @_; my $FH = $self->_fh(); # normally, _parseHeader will break out of the parse as soon as it reaches a new Subject (i.e. the first one after the header)}
# if you call _parseHeader twice in a row, with nothing in between, all you accomplish is a ->nextSubject call..
# so we need a flag to indicate that we have *entered* a header, before we are allowed to leave it!
my $header_flag = 0; # here is the flag/ It is "false" at first, and is set to "true" when any valid header element is encountered
$self->{'REPORT_DONE'} = 0; # reset this bit for a new report
while(<$FH>) { if ($_ =~ /^Query=(?:\s+([^\(]+))?/) { $header_flag = 1; # valid header element found
my $query = $1; while(<$FH>) { last if $_ !~ /\S/; $query .= $_; } $query =~ s/\s+/ /g; $query =~ s/^>//; $query =~ /\((\d+)\s+\S+\)\s*$/; my $length = $1; $self->{'QUERY'} = $query; $self->{'LENGTH'} = $length; } elsif ($_ =~ /^Database:\s+(.+)/) {$header_flag = 1;$self->{'DATABASE'} = $1} # valid header element found
elsif ($_ =~ /^\s*pattern\s+(\S+).*position\s+(\d+)\D/) { # For PHIBLAST reports
$header_flag = 1; # valid header element found
$self->{'PATTERN'} = $1; push (@{$self->{'QPATLOCATION'}}, $2); # $self->{'QPATLOCATION'} = $2;
} elsif (($_ =~ /^>/) && ($header_flag==1)) {$self->{'LASTLINE'} = $_; return 1} # only leave if we have actually parsed a valid header!
elsif (($_ =~ /^Parameters|^\s+Database:/) && ($header_flag==1)) { # if we entered a header, and saw nothing before the stats at the end, then it was empty
$self->{'LASTLINE'} = $_; return 0; # there's nothing in the report
} } return -1; # EOF
database | description | prev | next | Top |
shift->{'DATABASE'}}
new | description | prev | next | Top |
my ($caller, @args) = @_; my $class = ref($caller) || $caller; my $self = bless({}, $class); # initialize IO}
$self->_initialize_io(@args); $self->{'LASTLINE'} = ""; $self->{'QPATLOCATION'} = []; # Anonymous array of query pattern locations for PHIBLAST
if ($self->_parseHeader) {$self->{'REPORT_DONE'} = 0} # there are alignments
else {$self->{'REPORT_DONE'} = 1} # empty report
return $self; # success - we hope!
} # for SeqAnalysisParserI compliance
nextSbjct | description | prev | next | Top |
my ($self) = @_; $self->_fastForward or return undef; #######################}
# get all sbjct lines #
#######################
my $def = $self->{'LASTLINE'}; my $FH = $self->_fh(); while(<$FH>) { if ($_ !~ /\w/) {next} elsif ($_ =~ /Strand HSP/) {next} # WU-BLAST non-data
elsif ($_ =~ /^\s{0,2}Score/) {$self->{'LASTLINE'} = $_; last} elsif ($_ =~ /^Parameters|^\s+Database:|^\s+Posted date:/) {$self->{'LASTLINE'} = $_; last} else {$def .= $_} } $def =~ s/\s+/ /g; $def =~ s/\s+$//g; $def =~ s/Length = ([\d,]+)$//g; my $length = $1; return undef unless $def =~ /^>/; $def =~ s/^>//; ####################
# the Sbjct object #
####################
#print "creating Sbjct name = ".$def."\n length = ".$length."\n fh = ".$self->_fh()."\n lastline = ".$self->{'LASTLINE'}."\n parent = ".$self."\n";
my $sbjct = new Bio::EnsEMBL::Analysis::Tools::BPlite::Sbjct('-name'=>$def, '-length'=>$length, '-fh'=>$self->_fh(), '-lastline'=>$self->{'LASTLINE'}, '-parent'=>$self); return $sbjct; } # begin private routines
next_feature | description | prev | next | Top |
my ($self) = @_; my ($sbjct, $hsp); $sbjct = $self->{'_current_sbjct'}; unless( defined $sbjct ) { $sbjct = $self->{'_current_sbjct'} = $self->nextSbjct; return undef unless defined $sbjct; } $hsp = $sbjct->nextHSP; unless( defined $hsp ) { $self->{'_current_sbjct'} = undef; return $self->next_feature; } return $hsp || undef;}
pattern | description | prev | next | Top |
shift->{'PATTERN'}}
qlength | description | prev | next | Top |
shift->{'LENGTH'}}
query | description | prev | next | Top |
shift->{'QUERY'}}
query_pattern_location | description | prev | next | Top |
shift->{'QPATLOCATION'}}
AUTHORS | Top |
ACKNOWLEDGEMENTS | Top |
COPYRIGHT | Top |
DISCLAIMER | Top |