This module handles a specific use of the Exonerate (G. Slater) program,
to align clone sequences with genomic sequences.
None available.
sub make_feature
{ my ($self,
$q_id, $q_len, $q_start, $q_end, $q_strand,
$t_id, $t_len, $t_start, $t_end, $t_strand,
$score, $perc_id, $cigar_string, $hcoverage, $gene_orientation) = @_;
if($q_strand eq '+'){
$q_strand = 1;
} elsif ($q_strand eq '-') {
$q_strand = -1;
} else {
throw "unrecognised target strand symbol: $q_strand\n";
}
if($t_strand eq '+'){
$t_strand = 1;
} elsif ($t_strand eq '-') {
$t_strand = -1;
} else {
throw "unrecognised target strand symbol: $t_strand\n";
}
if ($t_start > $t_end) {
($t_start, $t_end) = ($t_end, $t_start);
} elsif ($t_strand < 0) {
($t_start, $t_end) = ($t_len - $t_end, $t_len - $t_start);
}
if ($q_start > $q_end) {
($q_start, $q_end) = ($q_end, $q_start);
} elsif ($q_strand < 0) {
($q_start, $q_end) = ($q_len - $q_end, $q_len - $q_start);
}
if ($gene_orientation eq '-') {
$t_strand *= -1;
$q_strand *= -1;
my @numbers = split(/\D+/,$cigar_string);
my @letters = split(/\d+/,$cigar_string);
$cigar_string = '';
for (my $i = $#numbers ; $i >= 0 ; $i-- ) {
$cigar_string .= $numbers[$i] . $letters[$i+1];
}
}
$q_start+=1;
$t_start+=1;
my $obj_name = $self->query_type =~ /dna/i
? "Bio::EnsEMBL::DnaDnaAlignFeature"
: "Bio::EnsEMBL::DnaPepAlignFeature";
my $feature =
$obj_name->new(
-seqname => $t_id,
-start => $t_start,
-end => $t_end,
-strand => $t_strand,
-hseqname => $q_id,
-hstart => $q_start,
-hend => $q_end,
-hstrand => $q_strand,
-score => $score,
-percent_id => $perc_id,
-cigar_string => $cigar_string,
-hcoverage => $hcoverage,
);
$feature->{"_intron"} = 1 unless $gene_orientation eq '.';
return $feature;
}
1; } |
sub parse_results
{ my ( $self, $fh ) = @_;
my @features;
while (<$fh>){
next unless /^RESULT:/;
chomp;
my (
$tag, $q_id, $q_start, $q_end, $q_strand,
$t_id, $t_start, $t_end, $t_strand, $score,
$perc_id, $q_length, $t_length, $gene_orientation,
@vulgar_blocks, $total_match_length
) = split;
my $cigar_string='';
while (@vulgar_blocks){
throw("Something funny has happened to the input vulgar string." .
" Expecting components in multiples of three, but only have [" .
scalar @vulgar_blocks . "] items left to process.")
unless scalar @vulgar_blocks >= 3;
my $str = join(',',@vulgar_blocks);
if ( $str =~ /^3,(\d+),(\d+),I,(\d+),(\d+),5,(\d+),(\d+)/ or
$str =~ /^5,(\d+),(\d+),I,(\d+),(\d+),3,(\d+),(\d+)/ ) {
splice(@vulgar_blocks,0,9);
my $query_match_length = $2+$4+$6 - ( $1+$3+$5 );
my $match_type = 'I';
$cigar_string .= $query_match_length.$match_type;
}
$str = join(',',@vulgar_blocks);
my $match_type = shift @vulgar_blocks;
my $query_match_length = shift @vulgar_blocks;
my $target_match_length = shift @vulgar_blocks;
$total_match_length += $query_match_length;
if ($match_type eq "G"){
if ($query_match_length == 0){
$match_type="I";
$query_match_length = $target_match_length;
}elsif ($target_match_length == 0){
$match_type="D";
}
}
$cigar_string .= $query_match_length.$match_type;
}
my $hcoverage = $total_match_length / $q_length * 100;
my $feature =
$self->make_feature(
$q_id, $q_length, $q_start, $q_end, $q_strand,
$t_id, $t_length, $t_start, $t_end, $t_strand,
$score, $perc_id, $cigar_string, $hcoverage , $gene_orientation
);
if($feature){
push @features, $feature;
}else{
warn "Clone end feature from probe :$q_id doesnt match well enough\n";
}
}
return\@ features;
}
} |