Raw content of Bio::EnsEMBL::Analysis::Runnable::ExonerateCloneEnds
=pod
=head1 NAME
Bio::EnsEMBL::Analysis::Runnable::ExonerateCloneEnds
=head1 SYNOPSIS
my $runnable =
Bio::EnsEMBL::Analysis::Runnable::ExonerateCloneEnds->new(
-query_seqs => \@q_seqs,
-query_type => 'dna',
-target_seqs => \@t_seqs,
-options => $options,
);
$runnable->run; #create and fill Bio::Seq object
my @results = $runnable->output;
=head1 DESCRIPTION
This module handles a specific use of the Exonerate (G. Slater) program,
to align clone sequences with genomic sequences.
=head1 CONTACT
ensembl-dev@ebi.ac.uk
=cut
package Bio::EnsEMBL::Analysis::Runnable::ExonerateCloneEnds;
use vars qw(@ISA);
use strict;
use Bio::EnsEMBL::Analysis::Runnable;
use Bio::EnsEMBL::Analysis::Runnable::BaseExonerate;
use Bio::EnsEMBL::DnaDnaAlignFeature;
use Bio::EnsEMBL::Feature;
use Bio::EnsEMBL::FeaturePair;
use Bio::EnsEMBL::Utils::Exception qw(throw warning);
use Bio::EnsEMBL::Utils::Argument qw( rearrange );
@ISA = qw(Bio::EnsEMBL::Analysis::Runnable::BaseExonerate);
sub new {
my ( $class, @args ) = @_;
my $self = $class->SUPER::new(@args);
return $self;
}
#
# Implementation of method in abstract superclass
#
sub parse_results {
my ( $self, $fh ) = @_;
my @features;
while (<$fh>){
next unless /^RESULT:/;
chomp;
my (
$tag, $q_id, $q_start, $q_end, $q_strand,
$t_id, $t_start, $t_end, $t_strand, $score,
$perc_id, $q_length, $t_length, $gene_orientation,
@vulgar_blocks
) = split;
my $cigar_string='';
while (@vulgar_blocks){
throw("Something funny has happened to the input vulgar string." .
" Expecting components in multiples of three, but only have [" .
scalar @vulgar_blocks . "] items left to process.")
unless scalar @vulgar_blocks >= 3;
my $match_type = shift @vulgar_blocks;
my $query_match_length = shift @vulgar_blocks;
my $target_match_length = shift @vulgar_blocks;
if ($match_type eq "G"){
if ($query_match_length == 0){
$match_type="D";
$query_match_length = $target_match_length;
}elsif ($target_match_length == 0){
$match_type="I";
}
}
$cigar_string .= $query_match_length.$match_type;
}
my $feature =
$self->make_feature(
$q_id, $q_start, $q_end, $q_strand,
$t_id, $t_start, $t_end, $t_strand, $score,
$perc_id, $q_length, $cigar_string
);
if($feature){
push @features, $feature;
}else{
warn "Clone end feature from probe :$q_id doesnt match well enough\n";
}
}
return \@features;
}
#
# Create dna align feature objects:
#
sub make_feature{
my ($self, @args) = @_;
my (
$tag, $q_id, $q_start, $q_end, $q_strand,
$t_id, $t_start, $t_end, $t_strand, $score,
$perc_id, $q_length, $cigar_string
) = @_;
if($q_strand eq '+'){
$q_strand = 1;
if($t_strand eq '+'){
$t_strand = 1;
}elsif($t_strand eq '-'){
$t_strand = -1;
}else{
throw "unrecognised target strand symbol: $t_strand\n";
}
}elsif($q_strand eq '-'){
$q_strand = -1;
if($t_strand eq '-'){
$t_strand = -1;
}elsif($t_strand eq '+'){
$t_strand = 1;
}else{
throw "unrecognised target strand symbol: $t_strand\n";
}
}else{
throw "unrecognised query strand symbol: $q_strand\n";
}
# Exonerate reports query start -1 so in some cases we get alignments with hit start 0
# we add 1 to avoid this situation.
$q_start+=1;
$t_start+=1;
# for reverse strand matches, Exonerate reports end => start
if ($q_start > $q_end) {
($q_start, $q_end) = ($q_end, $q_start);
}
if ($t_start > $t_end) {
($t_start, $t_end) = ($t_end, $t_start);
}
my $feature =
new Bio::EnsEMBL::DnaDnaAlignFeature(
-seqname => $t_id,
-start => $t_start,
-end => $t_end,
-strand => $t_strand,
-hseqname => $q_id,
-hstart => $q_start,
-hend => $q_end,
-hstrand => $q_strand,
-score => $score,
-percent_id => $perc_id,
-cigar_string => $cigar_string,
);
return $feature;
}
1;