my $runnable =
Bio::EnsEMBL::Analysis::Runnable::ExonerateAffy->new(
-query_seqs => \@q_seqs,
-query_type => 'dna',
-target_seqs => \@t_seqs,
-options => $options,
);
$runnable->run; #create and fill Bio::Seq object
my @results = $runnable->output;
This module handles a specific use of the Exonerate (G. Slater) program, to
align Affymetrix probes to a target genome. (The resulting alignments will
probably be stored in an ensembl db as Bio::EnsEMBL::AffyFeature objects.)
NOTE: the AffyFeature objects refer to AffyProbe id's, and they in turn
refer to AffyArray id's. Both the AffyProbes and the AffyArray's should
be pre-loaded into the ensembl db: there are separate RunnableDB
/RunnableDB's to do this from the Affymetrix data sets. This runnable
just creates fake affy probes in order to create reasonable-looking
affy features.
None available.
sub make_affy_feature
{ my ($self, @args) = @_;
my (
$tag, $q_id, $q_start, $q_end, $q_strand,
$t_id, $t_start, $t_end, $t_strand, $score,
$q_length, $query_match_length
) = @_;
$t_start += 1;
my $probe_internal_id = $q_id;
my $mismatch_count;
if(!($probe_internal_id =~ /\d+/)){
throw "Affy probe headers MUST be the internal db ids of the affy probes for this parser to work!\n";
}
if($query_match_length == $q_length){
if($score == 125){
$mismatch_count = 0;
}elsif ($score == 116) {
$mismatch_count = 1;
} else {
return undef;
}
}elsif($query_match_length == $q_length -1){
if ($score == 120) {
$mismatch_count = 1;
} else {
return undef;
}
}else{
return undef;
}
my $affy_probe =
new Bio::EnsEMBL::AffyProbe(
-dbID =>$q_id,
-name => "replace this probe name",
-arrayname => "replace this array name",
);
if($q_strand eq '+'){
if($t_strand eq '+'){
$t_strand = 1;
}elsif($t_strand eq '-'){
$t_strand = -1;
}else{
throw "unrecognised target strand symbol: $t_strand\n";
}
}elsif($q_strand eq '-'){
if($t_strand eq '-'){
$t_strand = 1;
}elsif($t_strand eq '+'){
$t_strand = -1;
}else{
throw "unrecognised target strand symbol: $t_strand\n";
}
}else{
throw "unrecognised query strand symbol: $q_strand\n";
}
my $affy_feature =
new Bio::EnsEMBL::AffyFeature(
-probe => $affy_probe,
-start => $t_start,
-end => $t_end,
-strand => $t_strand,
-mismatchcount => $mismatch_count
);
$affy_feature->seqname($t_id);
return $affy_feature;
}
1; } |
sub parse_results
{ my ( $self, $fh ) = @_;
my @affy_features;
while (<$fh>){
next unless /^RESULT:/;
chomp;
my (
$tag, $q_id, $q_start, $q_end, $q_strand,
$t_id, $t_start, $t_end, $t_strand, $score,
$perc_id, $q_length, $t_length, $gene_orientation,
@vulgar_blocks
) = split;
my($match_type, $query_match_length, $target_match_length,@rest_of_vulgar) = @vulgar_blocks;
if(@rest_of_vulgar){
throw (
"There is more than a simple match ('M') vulgar output: @vulgar_blocks for
affy tag $q_id mapped to region $t_id\n "
);
}
if(!($match_type eq 'M')){
throw "I have received a starting Vulgar symbol $match_type which is not a match!";
}
my $affy_feature =
$self->make_affy_feature(
$q_id, $q_start, $q_end, $q_strand,
$t_id, $t_start, $t_end, $t_strand, $score,
$q_length, $query_match_length
);
if($affy_feature){
push @affy_features, $affy_feature;
}else{
warn "Affy feature from probe :$q_id doesnt match well enough\n";
}
}
return\@ affy_features;
}
} |