Raw content of Bio::EnsEMBL::Analysis::Runnable::ExonerateAlignFeature
=pod
=head1 NAME
Bio::EnsEMBL::Analysis::Runnable::ExonerateAlignFeature
=head1 SYNOPSIS
my $runnable =
Bio::EnsEMBL::Analysis::Runnable::ExonerateAlignFeature->new(
-query_seqs => \@q_seqs,
-query_type => 'dna',
-target_seqs => \@t_seqs,
-options => $options,
);
$runnable->run; #create and fill Bio::Seq object
my @results = $runnable->output;
=head1 DESCRIPTION
This module handles a specific use of the Exonerate (G. Slater) program,
to align clone sequences with genomic sequences.
=head1 CONTACT
ensembl-dev@ebi.ac.uk
=cut
package Bio::EnsEMBL::Analysis::Runnable::ExonerateAlignFeature;
use vars qw(@ISA);
use strict;
use Bio::EnsEMBL::Analysis::Runnable;
use Bio::EnsEMBL::Analysis::Runnable::BaseExonerate;
use Bio::EnsEMBL::DnaDnaAlignFeature;
use Bio::EnsEMBL::DnaPepAlignFeature;
use Bio::EnsEMBL::Feature;
use Bio::EnsEMBL::FeaturePair;
use Bio::EnsEMBL::Utils::Exception qw(throw warning);
use Bio::EnsEMBL::Utils::Argument qw( rearrange );
@ISA = qw(Bio::EnsEMBL::Analysis::Runnable::BaseExonerate);
sub new {
my ( $class, @args ) = @_;
my $self = $class->SUPER::new(@args);
return $self;
}
#
# Implementation of method in abstract superclass
#
sub parse_results {
my ( $self, $fh ) = @_;
my @features;
while (<$fh>){
next unless /^RESULT:/;
chomp;
my (
$tag, $q_id, $q_start, $q_end, $q_strand,
$t_id, $t_start, $t_end, $t_strand, $score,
$perc_id, $q_length, $t_length, $gene_orientation,
@vulgar_blocks, $total_match_length
) = split;
my $cigar_string='';
while (@vulgar_blocks){
throw("Something funny has happened to the input vulgar string." .
" Expecting components in multiples of three, but only have [" .
scalar @vulgar_blocks . "] items left to process.")
unless scalar @vulgar_blocks >= 3;
# check for introns if we are using exonerate2genes model
my $str = join(',',@vulgar_blocks);
# print "$str\n";
if ( $str =~ /^3,(\d+),(\d+),I,(\d+),(\d+),5,(\d+),(\d+)/ or
$str =~ /^5,(\d+),(\d+),I,(\d+),(\d+),3,(\d+),(\d+)/ ) {
splice(@vulgar_blocks,0,9);
my $query_match_length = $2+$4+$6 - ( $1+$3+$5 );
my $match_type = 'I';
$cigar_string .= $query_match_length.$match_type;
# print "INTRON $1 $2 $3 $4 $5 $6\n";
}
$str = join(',',@vulgar_blocks);
# print "$str\n";
my $match_type = shift @vulgar_blocks;
my $query_match_length = shift @vulgar_blocks;
my $target_match_length = shift @vulgar_blocks;
$total_match_length += $query_match_length;
if ($match_type eq "G"){
if ($query_match_length == 0){
$match_type="I";
$query_match_length = $target_match_length;
}elsif ($target_match_length == 0){
$match_type="D";
}
}
$cigar_string .= $query_match_length.$match_type;
}
my $hcoverage = $total_match_length / $q_length * 100;
my $feature =
$self->make_feature(
$q_id, $q_length, $q_start, $q_end, $q_strand,
$t_id, $t_length, $t_start, $t_end, $t_strand,
$score, $perc_id, $cigar_string, $hcoverage , $gene_orientation
);
if($feature){
push @features, $feature;
}else{
warn "Clone end feature from probe :$q_id doesnt match well enough\n";
}
}
return \@features;
}
#
# Create dna align feature objects:
#
sub make_feature{
my ($self,
$q_id, $q_len, $q_start, $q_end, $q_strand,
$t_id, $t_len, $t_start, $t_end, $t_strand,
$score, $perc_id, $cigar_string, $hcoverage, $gene_orientation) = @_;
if($q_strand eq '+'){
$q_strand = 1;
} elsif ($q_strand eq '-') {
$q_strand = -1;
} else {
throw "unrecognised target strand symbol: $q_strand\n";
}
if($t_strand eq '+'){
$t_strand = 1;
} elsif ($t_strand eq '-') {
$t_strand = -1;
} else {
throw "unrecognised target strand symbol: $t_strand\n";
}
if ($t_start > $t_end) {
($t_start, $t_end) = ($t_end, $t_start);
} elsif ($t_strand < 0) {
($t_start, $t_end) = ($t_len - $t_end, $t_len - $t_start);
}
# for reverse strand matches, Exonerate reports end => start
if ($q_start > $q_end) {
($q_start, $q_end) = ($q_end, $q_start);
} elsif ($q_strand < 0) {
# coordinates are in strand of reverse complemented sequence. Need to flip
($q_start, $q_end) = ($q_len - $q_end, $q_len - $q_start);
}
if ($gene_orientation eq '-') {
$t_strand *= -1;
$q_strand *= -1;
# need to reverse the cigar string
my @numbers = split(/\D+/,$cigar_string);
my @letters = split(/\d+/,$cigar_string);
$cigar_string = '';
for (my $i = $#numbers ; $i >= 0 ; $i-- ) {
$cigar_string .= $numbers[$i] . $letters[$i+1];
}
}
# correct for exonerate half-open coords
$q_start+=1;
$t_start+=1;
my $obj_name = $self->query_type =~ /dna/i
? "Bio::EnsEMBL::DnaDnaAlignFeature"
: "Bio::EnsEMBL::DnaPepAlignFeature";
my $feature =
$obj_name->new(
-seqname => $t_id,
-start => $t_start,
-end => $t_end,
-strand => $t_strand,
-hseqname => $q_id,
-hstart => $q_start,
-hend => $q_end,
-hstrand => $q_strand,
-score => $score,
-percent_id => $perc_id,
-cigar_string => $cigar_string,
-hcoverage => $hcoverage,
);
$feature->{"_intron"} = 1 unless $gene_orientation eq '.';
return $feature;
}
1;