Raw content of Bio::EnsEMBL::Analysis::Runnable::ExonerateProbe
=pod
=head1 NAME
Bio::EnsEMBL::Analysis::Runnable::ExonerateProbe
=head1 SYNOPSIS
my $runnable =
Bio::EnsEMBL::Analysis::Runnable::ExonerateProbe->new(
-query_seqs => \@q_seqs,
-query_type => 'dna',
-target_seqs => \@t_seqs,
-options => $options,
);
$runnable->run; #create and fill Bio::Seq object
my @results = $runnable->output;
=head1 DESCRIPTION
This module handles a specific use of the Exonerate (G. Slater) program, to
align probes to a target genome. (The resulting alignments will be stored in an
ansembl Funcgen db as Bio::EnsEMBL::ProbeFeature objects.)
NOTE: the ProbeFeature objects refer to Probe id's, and they in turn
refer to ArrayChip and Array id's. Hence, Arrays, ArrayChips and Probes
should be pre-loaded into the ensembl db: there are separate RunnableDB
/RunnableDB's to do this from the Affymetrix data sets r use the EFG Importer
to load other arrays e.g. Nimblegen or Sanger.
This runnable just creates fake Probes in order to create reasonable-looking
affy features???????
=head1 CONTACT
ensembl-dev@ebi.ac.uk
=cut
package Bio::EnsEMBL::Analysis::Runnable::ExonerateProbe;
use vars qw(@ISA);
use strict;
use Bio::EnsEMBL::Analysis::Runnable;
use Bio::EnsEMBL::Analysis::Runnable::BaseExonerate;
use Bio::EnsEMBL::Funcgen::Probe;
use Bio::EnsEMBL::Funcgen::ProbeFeature;
use Bio::EnsEMBL::Utils::Exception qw( throw warning );
use Bio::EnsEMBL::Utils::Argument qw( rearrange );
@ISA = qw(Bio::EnsEMBL::Analysis::Runnable::BaseExonerate);
sub new {
my ( $class, @args ) = @_;
#my $self = $class->SUPER::new(@args);
#slightly rearranged order as we want to pass some different defaults to BaseExonerate
my ($max_mismatches, $mapping_type, $basic_options) = rearrange(['max_mismatches', 'mapping_type', 'basic_options'], @args);
my %basic_opts;
#We could change mapping_type to same_strand hits filter
#As this is the only thing we're using it for here
if(! defined $basic_options){
#parse result depends on the output format options
#only override if you intend overload or rewrite the parse_results method.
#Now let's reset the default BaseExonerate options to remove vulgar and add scores
#RESULT: 3020922 0 50 + ENSMUST00000111559 964 1014 + 250 100.00 50 3184 0 scores:0:5:5:5:5:5:5:5:5:5:5:5:5:5:5:5:5:5:5:5:5:5:5:5:5:5:5:5:5:5:5:5:5:5:5:5:5:5:5:5:5:5:5:5:5:5:5:5:5:5:5:0:
$basic_opts{'-basic_options'} = "--showsugar false --showvulgar false --showalignment false --ryo \"RESULT: %S %pi %ql %tl %em scores:{%Ps:}\\n\" ";
}
my $self = $class->SUPER::new(@args, %basic_opts);
if(! defined $max_mismatches){
throw("Must provide a -max_mismatches parameter e.g. 0, 1 or 2");
}
if(! (defined $mapping_type && $mapping_type =~ /genomic|transcript/)){
throw("Must provide a valid -mapping_type parameter e.g. genomic or transcript");
}
#reset to the new default exonerate
if (not $self->program) {
#This is an architecture specific build path
$self->program('/lustre/work1/ensembl/gs2/local/x86_64/bin/exonerate');
}
#if(! (defined $filter_method && (ref($filter_method) ne 'CODE' || $self->can($filter_method)))){
# throw('You must pass a -filter_method name or CODEREF to filter the ProbeFeatures');
# }
##Set code ref or pointer to internal method
#We will always have explicitly pass self to the coderef
#$self->{'filter_method'} = (ref($filter_method) eq 'CODE') ? $filter_method : $self->can($filter_method);
$self->{'max_mismatches'} = $max_mismatches;
$self->{'mapping_type'} = $mapping_type;
return $self;
}
sub max_mismatches{
return $_[0]->{max_mismatches};
}
sub mapping_type{
return $_[0]->{mapping_type};
}
#
# Implementation of method in abstract superclass
#
sub parse_results {
my ( $self, $fh ) = @_;
my @features;
#my $filter_method = $self->{'match_rules'};
#my $filter_method = $self->filter_method;
#No, now uses code ref to allow definition in config
#print "Parsing results from fh ".Data::Dumper::Dumper($fh)."\n";
my ($tag, $probe_id, $q_start, $q_end, $q_strand,
$t_id, $t_start, $t_end, $t_strand, $score, $tscore,
$perc_id, $q_length, $t_length, $mismatch_count, $scores,
$match_length, $align_mismatch, $total_mismatches, $tmp);
my $max_mismatches = $self->max_mismatches;
my $mapping_type = $self->mapping_type;
while (<$fh>){
#print STDERR $_ if $self->_verbose;
next unless /^RESULT:/;
chomp;
#Vulgar blocks are also report in in-between coords! So need to add 1(only to start???)
#Shows the alignments in "vulgar" format. Vulgar is Verbose Useful Labelled Gapped Alignment Report, This format also starts with the same 9 fields as sugar output (see above), and is followed by a series of