$database = a full path location for the directory containing the target (genomic usually) sequence,
@sequences = a list of Bio::Seq objects,
$exonerate = a location for the binary,
$options = a string with options ,
my $runnable = Bio::EnsEMBL::Pipeline::Runnable::ExonerateArray->new(
-db =>$db,
-query_seqs => \@sequences,
-program => $exonerate,
-options => $options,
);
$runnable->run; #create and fill Bio::Seq object
my $results = $runnable->output;
where $results is an arrayref of MiscFeatures.
sub _make_affy_features
{
my ($self,@h) = @_;
my @misc_feats;
my $feature_factory = $self->feature_factory;
foreach my $h (@h) {
my ($coord_system_name,$seq_region_name,$slice) ;
if ($h->{'q_id'} =~/Zebrafish/i) {
$coord_system_name = undef;
if ($h->{'t_id'} =~ /^(\S+)\-1\-.*$/) {
$seq_region_name = $1;
$seq_region_name =~ s/\-1$//;
}
elsif ($h->{'t_id'} =~ /^(\S+)\..*$/) {
$seq_region_name = $1;
}
}
else {
$coord_system_name = "chromosome";
($seq_region_name) = $h->{'t_id'} =~ /^(\S+)\..*$/;
}
$slice = $self->db->get_SliceAdaptor->fetch_by_region($coord_system_name,$seq_region_name);
if (!$slice) {
warning("In ExonerateArray::make_affy_features: Could not obtain slice for seq_region: $coord_system_name : $seq_region_name---try with coord_system_name = undef\n");
$coord_system_name = undef;
$slice = $self->db->get_SliceAdaptor->fetch_by_region($coord_system_name,$seq_region_name);
if (!$slice) {
warning("In ExonerateArray::make_affy_features: Could not obtain slice for seq_region: $coord_system_name : $seq_region_name");
next;
}
}
my $probe_name = $h->{'q_id'};
$probe_name =~ s/^probe\://;
my ($array_name,$composite_name) = split /\:/, $probe_name;
$composite_name =~ s/\;$//;
my $xref_name = $array_name;
$xref_name =~ s/-/_/g;
my $misc_feature = $feature_factory->create_misc_feature
($h->{'t_start'},$h->{'t_end'},$h->{'t_strand'},$slice);
$feature_factory->add_misc_feature_attribute
( $misc_feature, "probeName","Probe name","the name of the probe",$probe_name);
$feature_factory->add_misc_feature_attribute
($misc_feature, "compositeName","Composite name", "the name of the composite", $composite_name);
$feature_factory->add_misc_feature_attribute
($misc_feature,"probeLength", "Probe length", "the length of the probe", $h->{'probe_length'});
$feature_factory->add_misc_feature_attribute
($misc_feature, "matchLength", "Match length", "number of bases in matched alignment", $h->{'matching_length'});
$feature_factory->add_misc_feature_attribute
($misc_feature,"matchStatus", "Match status", "full_match or with_mismatch", $h->{'match_status'});
$feature_factory->add_misc_set
($misc_feature, "AFFY\_$xref_name", $array_name, "array set name", $self->max_length);
$feature_factory->add_misc_set
($misc_feature, "All_Affy", "All-Array-Sets", "all array sets", $self->max_length);
push (@misc_feats, $misc_feature);
}
$self->output(\@misc_feats);
}
} |
sub new
{ my ($class,@args) = @_;
my $self = $class->SUPER::new(@args);
my ($db,$query_seqs) =
rearrange([qw(
DB
QUERY_SEQS
)
], @args);
$self->db($db) if $db;
unless( $query_seqs ){
throw("Exonerate needs a query_seqs: $query_seqs");
}
our (%length);
my $queryfile = $self->queryfile();
foreach my $query_seq (@{$query_seqs}) {
$length{$query_seq->display_id} = $query_seq->length;
$self->write_seq_file ($query_seq);
}
my @lengths = sort {$b<=>$a} values %length;
my $max_length = $lengths[0];
$self->max_length($max_length);
$self->length(\%length);
return $self;
}
} |
sub parse_results
{ my ($self, $results) = @_;
if(!$results){
$results = $self->resultsfile;
}
my %length = %{$self->length()};
open( EXO, $results ) || throw("FAILED to open ".$results." ExonerateArray::parse_results");
my (@pro_features);
while (<EXO>){
if (/^vulgar\:/) {
my $h={};
chomp;
my ( $tag, $q_id, $q_start, $q_end, $q_strand, $t_id, $t_start, $t_end, $t_strand, $score, $match, $matching_length) = split;
$q_start++;
$t_start++;
my $strand;
if ($q_strand eq $t_strand) {
$strand = 1;
}
else {
$strand = -1;
}
$h->{'q_id'} = $q_id;
$h->{'q_start'} = $q_start;
$h->{'q_end'} = $q_end;
$h->{'q_strand'} = $strand;
$h->{'t_id'} = $t_id;
$h->{'t_start'} = $t_start;
$h->{'t_end'} = $t_end;
$h->{'t_strand'} = $strand;
$h->{'score'} = $score;
$h->{'probe_length'} = $length{$h->{'q_id'}};
$h->{'matching_length'} = $matching_length;
if ($h->{'matching_length'} == $h->{'probe_length'}-1) {
$h->{'match_status'} = "Mismatch";
push @pro_features, $h;
}
elsif ($h->{'matching_length'} == $h->{'probe_length'}) {
if ($h->{'score'} == 125) {
$h->{'match_status'} = "Fullmatch";
}
else {
$h->{'match_status'} = "Mismatch";
}
push @pro_features, $h;
}
}
}
close(EXO) or $self->throw("couldn't close pipe ");
$self->_make_affy_features(@pro_features);
} |
sub write_seq_file
{ my ($self, $seq, $filename) = @_;
if(!$seq){
$seq = $self->query;
}
if(!$filename){
$filename = $self->queryfile;
}
my $seqout = Bio::SeqIO->new(
-file => ">>".$filename, -format => 'Fasta',
);
eval{
$seqout->write_seq($seq) if $seq;
};
if($@){
throw("seq is $seq\nFAILED to write $seq to $filename Runnable:write_seq_file : $@");
}
return $filename; } |
The rest of the documentation details each of the object methods.
Internal methods are usually preceded with a _