This is the parser for the output of Genewise. It takes either a file handle or
a file name and returns a Bio::SeqFeature::Gene::GeneStructure object.
sub _get_strand
{ my ($self,$start,$end) = @_;
$start || $self->throw("Need a start");
$end || $self->throw("Need an end");
my $strand;
if ($start > $end) {
my $tmp = $start;
$start = $end;
$end = $tmp;
$strand = -1;
}
else {
$strand = 1;
}
return ($start,$end,$strand); } |
sub next_prediction
{ my ($self) = @_;
my $genes = new Bio::SeqFeature::Gene::GeneStructure(-source => $Srctag);
my $transcript = new Bio::SeqFeature::Gene::Transcript(-source => $Srctag);
local ($/) = "//";
my $score;
my $prot_id;
my $target_id;
while ( defined($_ = $self->_readline) ) {
$self->debug( $_ ) if( $self->verbose > 0);
($score) = $_=~m/Score\s+(\d+[\.][\d]+)/;
$self->_score($score) unless defined $self->_score;
($prot_id) = $_=~m/Query protein:\s+(\S+)/;
$self->_prot_id($prot_id) unless defined $self->_prot_id;
($target_id) = $_=~m/Target Sequence\s+(\S+)/;
$self->_target_id($target_id) unless defined $self->_target_id;
next unless /Gene\s+\d+\n/;
my @exons;
unless ( @exons = $_ =~ m/(Exon .+\s+Supporting .+)/g ) { @exons = $_ =~ m/(Exon .+\s+)/g;
}
my $nbr = 1;
foreach my $e (@exons){
my ($e_start,$e_end,$phase) = $e =~ m/Exon\s+(\d+)\s+(\d+)\s+phase\s+(\d+)/; my $e_strand;
($e_start,$e_end,$e_strand) = $self->_get_strand($e_start,$e_end);
$transcript->strand($e_strand) unless $transcript->strand != 0;
my $exon = new Bio::SeqFeature::Gene::Exon
(-seq_id=>$self->_target_id,
-source => $Srctag,
-start=>$e_start,
-end=>$e_end,
-strand=>$e_strand);
$exon->add_tag_value('phase',$phase);
if( $self->_prot_id ) {
$exon->add_tag_value('Sequence',"Protein:".$self->_prot_id);
}
$exon->add_tag_value("Exon",$nbr++);
if( $e =~ m/Supporting\s+(\d+)\s+(\d+)\s+(\d+)\s+(\d+)/) { my ($geno_start,$geno_end, $prot_start, $prot_end) = ($1,$2,$3,$4);
my $prot_strand;
($prot_start,$prot_end,
$prot_strand) = $self->_get_strand($prot_start,$prot_end);
my $pf = new Bio::SeqFeature::Generic
( -start => $prot_start,
-end => $prot_end,
-seq_id => $self->_prot_id,
-score => $self->_score,
-strand => $prot_strand,
-source => $Srctag,
-primary=> 'supporting_protein_feature',
);
my $geno_strand;
($geno_start,$geno_end,
$geno_strand) = $self->_get_strand($geno_start,$geno_end);
my $gf = new Bio::SeqFeature::Generic
( -start => $geno_start,
-end => $geno_end,
-seq_id => $self->_target_id,
-score => $self->_score,
-strand => $geno_strand,
-source => $Srctag,
-primary => 'supporting_genomic_feature',
);
my $fp = new Bio::SeqFeature::FeaturePair(-feature1=>$gf,
-feature2=>$pf);
$exon->add_tag_value( 'supporting_feature' => $fp );
}
$transcript->add_exon($exon);
}
$transcript->seq_id($self->_target_id);
$genes->add_transcript($transcript);
$genes->seq_id($self->_target_id);
return $genes;
}
}
1; } |
User feedback is an integral part of the evolution of this and other
Bioperl modules. Send your comments and suggestions preferably to one
of the Bioperl mailing lists. Your participation is much appreciated.
bioperl-l@bioperl.org - General discussion
http://bio.perl.org/MailList.html - About the mailing lists
The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _