Raw content of Bio::EnsEMBL::Analysis::Runnable::ExonerateArray#
# Written by Eduardo Eyras
#
# Copyright GRL/EBI 2002
#
# You may distribute this module under the same terms as perl itself
#
# POD documentation - main docs before the code
=pod
=head1 NAME
Bio::EnsEMBL::Pipeline::Runnable::ExonerateArray
=head1 SYNOPSIS
$database = a full path location for the directory containing the target (genomic usually) sequence,
@sequences = a list of Bio::Seq objects,
$exonerate = a location for the binary,
$options = a string with options ,
my $runnable = Bio::EnsEMBL::Pipeline::Runnable::ExonerateArray->new(
-db =>$db,
-query_seqs => \@sequences,
-program => $exonerate,
-options => $options,
);
$runnable->run; #create and fill Bio::Seq object
my $results = $runnable->output;
where $results is an arrayref of MiscFeatures.
=head1 DESCRIPTION
ExonerateArray takes a Bio::Seq (or Bio::PrimarySeq) object and runs Exonerate
against a set of sequences. The resulting output file is parsed
to produce a set of features.
=head1 CONTACT
ensembl-dev@ebi.ac.uk
=head1 APPENDIX
The rest of the documentation details each of the object methods.
Internal methods are usually preceded with a _
=cut
package Bio::EnsEMBL::Analysis::Runnable::ExonerateArray;
use vars qw(@ISA);
use strict;
use Bio::EnsEMBL::Analysis::Runnable;
use Bio::EnsEMBL::Utils::Exception qw(info verbose throw warning);
use Bio::EnsEMBL::Utils::Argument qw( rearrange );
use Bio::EnsEMBL::MiscFeature;
use Bio::EnsEMBL::Attribute;
use Bio::EnsEMBL::MiscSet;
use Bio::EnsEMBL::Analysis;
use Bio::EnsEMBL::Analysis::Tools::FeatureFactory;
@ISA = qw(Bio::EnsEMBL::Analysis::Runnable);
sub new {
my ($class,@args) = @_;
my $self = $class->SUPER::new(@args);
my ($db,$query_seqs) =
rearrange([qw(
DB
QUERY_SEQS
)
], @args);
###$db is needed to create $slice which is needed to create DnaDnaAlignFeatures
$self->db($db) if $db;
# must have a target and a query sequences
unless( $query_seqs ){
throw("Exonerate needs a query_seqs: $query_seqs");
}
our (%length);
my $queryfile = $self->queryfile();
foreach my $query_seq (@{$query_seqs}) {
$length{$query_seq->display_id} = $query_seq->length;
$self->write_seq_file ($query_seq);
}
my @lengths = sort {$b<=>$a} values %length;
my $max_length = $lengths[0];
$self->max_length($max_length);
$self->length(\%length);
return $self;
}
############################################################
#
# Analysis methods
#
############################################################
sub write_seq_file{
my ($self, $seq, $filename) = @_;
if(!$seq){
$seq = $self->query;
}
if(!$filename){
$filename = $self->queryfile;
}
my $seqout = Bio::SeqIO->new(
-file => ">>".$filename, ###added >>
-format => 'Fasta',
);
eval{
$seqout->write_seq($seq) if $seq;
};
if($@){
throw("seq is $seq\nFAILED to write $seq to $filename Runnable:write_seq_file : $@");
}
return $filename;
}
=head2 run
Arg [1] : Bio::EnsEMBL::Analysis::Runnable
Arg [2] : string, directory
Function : a generic run method. This checks the directory specifed
to run it, write the query sequence to file, marks the query sequence
file and results file for deletion, runs the analysis parses the
results and deletes any files
Returntype: 1
Exceptions: throws if no query sequence is specified
Example :
=cut
sub run{
my ($self, $dir) = @_;
if(!$dir){
$dir = $self->workdir;
}
$self->checkdir($dir);
my $filename = $self->write_seq_file();
$self->files_to_delete($filename);
$self->files_to_delete($self->resultsfile);
$self->run_analysis();
$self->parse_results;
$self->delete_files;
return 1;
}
=head2 parse_results
Arg [1] : Bio::EnsEMBL::Analysis::Runnable::ExonerateArray
Arg [2] : string, filename
Function : open and parse the results file into misc_features
features
Returntype: none
Exceptions: throws on failure to open or close output file
Example :
=cut
sub parse_results{
my ($self, $results) = @_;
if(!$results){
$results = $self->resultsfile;
}
my %length = %{$self->length()};
open( EXO, $results ) || throw("FAILED to open ".$results." ExonerateArray::parse_results");
############################################################
# store each alignment as a features
my (@pro_features);
############################################################
# parse results - avoid writing to disk the output
while (){
#info ($_) ;
############################################################
# the output is of the format:
#
#
# vulgar contains 9 fields
# ( ),
#
# The vulgar (Verbose Useful Labelled Gapped Alignment Report) blocks are a series
# of