This is a driver for the SearchIO system for parsing Exonerate (Guy
Slater) output. You can get Exonerate at
http://cvsweb.sanger.ac.uk/cgi-bin/cvsweb.cgi/exonerate/?cvsroot=Ensembl
[until Guy puts up a Web reference,publication for it.]).
An optional parameter -min_intron is supported by the
newinitialization method. This is if you run Exonerate with a different
minimum intron length (default is 30) the parser will be able to
detect the difference between standard deletions and an intron. Still
some room to play with there that might cause this to get
misinterpreted that has not been fully tested or explored.
sub next_result
{ my ($self) = @_;
$self->{'_last_data'} = '';
my ($reporttype,$seenquery,$reportline);
$self->start_document();
my @hit_signifs;
my $seentop;
my (@q_ex, @m_ex, @h_ex); while( defined($_ = $self->_readline) ) {
if( /^Query:\s+(\S+)(\s+(.+))?/ ) {
if( $seentop ) {
$self->end_element({'Name' => 'ExonerateOutput'});
$self->_pushback($_);
return $self->end_document();
}
$seentop = 1;
my ($nm,$desc) = ($1,$2);
chomp($desc) if defined $desc;
$self->{'_result_count'}++;
$self->start_element({'Name' => 'ExonerateOutput'});
$self->element({'Name' => 'ExonerateOutput_query-def',
'Data' => $nm });
$self->element({'Name' => 'ExonerateOutput_query-desc',
'Data' => $desc });
$self->element({'Name' => 'ExonerateOutput_program',
'Data' => 'Exonerate' });
} elsif ( /^Target:\s+(\S+)(\s+(.+))?/ ) {
my ($nm,$desc) = ($1,$2);
chomp($desc) if defined $desc;
$self->start_element({'Name' => 'Hit'});
$self->element({'Name' => 'Hit_id',
'Data' => $nm});
$self->element({'Name' => 'Hit_desc',
'Data' => $desc});
} elsif( s/^cigar:\s+(\S+)\s+ # query sequence id (\d+)\s+(\d+)\s+([\-\+])\s+ # query start-end-strand (\S+)\s+ # target sequence id (\d+)\s+(\d+)\s+([\-\+])\s+ # target start-end-strand (\d+)\s+ # score //ox ) {
my ($qs,$qe,$qstrand) = ($2,$3,$4);
my ($hs,$he,$hstrand) = ($6,$7,$8);
my $score = $9;
my @rest = split;
if( $qstrand eq '-' ) {
$qstrand = -1;
($qs,$qe) = ($qe,$qs); $qs--; $qe++;
} else { $qstrand = 1; }
if( $hstrand eq '-' ) {
$hstrand = -1;
($hs,$he) = ($he,$hs); $hs--; $he++;
} else { $hstrand = 1; }
$qs++;
$hs++;
my ($aln_len,$inserts,$deletes) = (0,0,0);
while( @rest >= 2 ) {
my ($state,$len) = (shift @rest, shift @rest);
if( $state eq 'I' ) {
$inserts+=$len;
} elsif( $state eq 'D' ) {
if( $len >= $MIN_INTRON ) {
$self->start_element({'Name' => 'Hsp'});
$self->element({'Name' => 'Hsp_score',
'Data' => $score});
$self->element({'Name' => 'Hsp_align-len',
'Data' => $aln_len});
$self->element({'Name' => 'Hsp_identity',
'Data' => $aln_len -
($inserts + $deletes)});
$self->element({'Name' => 'Hsp_query-from',
'Data' => $qs});
$qs += $aln_len*$qstrand;
$self->element({'Name' => 'Hsp_query-to',
'Data' => $qs - ($qstrand*1)});
$hs += $deletes*$hstrand;
$self->element({'Name' => 'Hsp_hit-from',
'Data' => $hs});
$hs += $aln_len*$hstrand;
$self->element({'Name' => 'Hsp_hit-to',
'Data' => $hs-($hstrand*1)});
$self->element({'Name' => 'Hsp_align-len',
'Data' => $aln_len + $inserts
+ $deletes});
$self->element({'Name' => 'Hsp_identity',
'Data' => $aln_len });
$self->element({'Name' => 'Hsp_gaps',
'Data' => $inserts + $deletes});
$self->element({'Name' => 'Hsp_querygaps',
'Data' => $inserts});
$self->element({'Name' => 'Hsp_hitgaps',
'Data' => $deletes});
$self->element({'Name' => 'Hsp_qseq',
'Data' => shift @q_ex,
});
$self->element({'Name' => 'Hsp_hseq',
'Data' => shift @h_ex,
});
$self->element({'Name' => 'Hsp_midline',
'Data' => shift @m_ex,
});
$self->end_element({'Name' => 'Hsp'});
$aln_len = $inserts = $deletes = 0;
}
$deletes+=$len;
} else {
$aln_len += $len;
}
}
$self->start_element({'Name' => 'Hsp'});
$self->element({'Name' => 'Hsp_qseq',
'Data' => shift @q_ex,
});
$self->element({'Name' => 'Hsp_hseq',
'Data' => shift @h_ex,
});
$self->element({'Name' => 'Hsp_midline',
'Data' => shift @m_ex,
});
$self->element({'Name' => 'Hsp_score',
'Data' => $score});
$self->element({'Name' => 'Hsp_query-from',
'Data' => $qs});
$qs += $aln_len*$qstrand;
$self->element({'Name' => 'Hsp_query-to',
'Data' => $qs - ($qstrand*1)});
$hs += $deletes*$hstrand;
$self->element({'Name' => 'Hsp_hit-from',
'Data' => $hs});
$hs += $aln_len*$hstrand;
$self->element({'Name' => 'Hsp_hit-to',
'Data' => $hs -($hstrand*1)});
$self->element({'Name' => 'Hsp_align-len',
'Data' => $aln_len});
$self->element({'Name' => 'Hsp_identity',
'Data' => $aln_len - ($inserts + $deletes)});
$self->element({'Name' => 'Hsp_gaps',
'Data' => $inserts + $deletes});
$self->element({'Name' => 'Hsp_querygaps',
'Data' => $inserts});
$self->element({'Name' => 'Hsp_hitgaps',
'Data' => $deletes});
$self->end_element({'Name' => 'Hsp'});
$self->element({'Name' => 'Hit_score',
'Data' => $score});
$self->end_element({'Name' => 'Hit'});
$self->end_element({'Name' => 'ExonerateOutput'});
return $self->end_document();
} else {
}
}
return $self->end_document() if( $seentop ); } |
The rest of the documentation details each of the object methods.
Internal methods are usually preceded with a _