Raw content of Bio::EnsEMBL::Analysis::RunnableDB
# Ensembl module for Bio::EnsEMBL::Analysis::RunnableDB
#
# Copyright (c) 2004 Ensembl
#
=head1 NAME
Bio::EnsEMBL::Analysis::RunnableDB
=head1 SYNOPSIS
my $repeat_masker = Bio::EnsEMBL::Analysis::RunnableDB::RepeatMasker->
new(
-input_id => 'contig::AL805961.22.1.166258:1:166258:1',
-db => $db,
-analysis => $analysis,
);
$repeat_masker->fetch_input;
$repeat_masker->run;
$repeat_masker->write_output;
=head1 DESCRIPTION
This module acts as a base class for our RunnableDBs who act as an
interface between the core database and our Runnables both fetching
input data and writing data back to the databases
The module provides some base functionality some of which must be
used
The constructor fits the model the pipeline which runs most of
our analyses expects. If a child runnabledb expects more arguments
to the constructor than this one it wont be directly runnable by the
pipeline
Most of the other methods provided are containers of some description
but there are some methods with specific functionality
parameters_hash is there to parse a string from the parameters varible
in the analysis object. This string should have the format
key => value, key => value where the key would be the Runnables
constructor argument and the value the variable. This is to allow
some flexibility in the arguments expected by and the way we run
Runnables.
fetch_sequence fetched a sequence using the fetch_by_name method of
the slice adaptor from the given database. The name, database and an
array of logic_names to determine masking can be given. If no name
or database is provided the method defaults to input_id and db
validate, this is a method which does some basic validation of the
feature before storage. This checks if slice and analysis object are
defined and if start, end and strand are defined then that the start
is smaller than the end and both the start and end are > 0
All runnableDBs need to implement 3 methods to run within the pipeline
fetch_input, this always must be implemented by individual child
RunnableDBs as the input required for different analyses can vary so
widely that it is impossible to write a generic method
run, there is a run method implemented. To use this child runnabledbs
need to have added the runnables they want run to the runnable method
which holds an array of runnables which are each called and the output
stored in this method
write_output, there is also a generic implementation of this. To use this
method the child runnabledb must implement a get_adaptor method which
returns the appropriate adaptor to be used in storage.
=head1 CONTACT
Post questions to the Ensembl development list: ensembl-dev@ebi.ac.uk
=cut
package Bio::EnsEMBL::Analysis::RunnableDB;
use strict;
use warnings;
use Bio::EnsEMBL::Utils::Exception qw(verbose throw warning info );
use Bio::EnsEMBL::Utils::Argument qw( rearrange );
use Bio::EnsEMBL::Analysis::Tools::FeatureFactory;
use Bio::EnsEMBL::Analysis::Tools::Utilities qw(parse_config);
use Bio::EnsEMBL::Analysis::Tools::Logger qw(logger_info logger_verbosity);
use Bio::EnsEMBL::Analysis::Config::General qw(CORE_VERBOSITY
LOGGER_VERBOSITY);
use vars qw (@ISA);
@ISA = qw();
=head2 new
Arg [1] : Bio::EnsEMBL::Analysis::RunnableDB
Arg [2] : Bio::EnsEMBL::Pipeline::DBSQL::DBAdaptor
Arg [3] : Bio::EnsEMBL::Analysis
Function : create a Bio::EnsEMBL::Analysis::RunnableDB object
Returntype: Bio::EnsEMBL::Analysis::RunnableDB
Exceptions: throws if not passed either a dbadaptor, input id or
an analysis object
Example : $rdb = $perl_path->new( -analysis => $self->analysis,
-input_id => $self->input_id,
-db => $self->adaptor->db );
=cut
sub new{
my ($class,@args) = @_;
my $self = bless {},$class;
my ($db, $input_id, $analysis) = rearrange
(['DB', 'INPUT_ID', 'ANALYSIS'], @args);
if(!$db || !$analysis || !$input_id){
throw("Can't create a RunnableDB without a dbadaptor ".
$db." an analysis object ".$analysis.
" or an input_id ".$input_id);
}
#Clone analysis to prevent analysis reference problem when
#using separate pipeline and output DBs
#Do not use adaptor here as caching returns same reference
my $cloned_analysis;
%{$cloned_analysis} = %{$analysis};
$analysis = bless $cloned_analysis, ref ($analysis);
$self->db($db);
$self->analysis($analysis);
$self->input_id($input_id);
verbose($CORE_VERBOSITY);
logger_verbosity($LOGGER_VERBOSITY);
return $self;
}
=head2 db
Arg [1] : Bio::EnsEMBL::Analysis::RunnableDB
Arg [2] : Bio::EnsEMBL::Pipeline::DBAdaptor
Function : container for dbadaptor
Returntype: Bio::EnsEMBL::Pipeline::DBSQL::DBAdaptor
Exceptions: throws if not passed a Bio::EnsEMBL::DBSQL::DBConnection
object
Example :
=cut
sub db{
my $self = shift;
my $db = shift;
if($db){
throw("Must pass RunnableDB:db a Bio::EnsEMBL::DBSQL::DBAdaptor ".
"not a ".$db)
unless($db->isa('Bio::EnsEMBL::DBSQL::DBAdaptor'));
$self->{'db'} = $db;
}
return $self->{'db'};
}
=head2 analysis
Arg [1] : Bio::EnsEMBL::Analysis::RunnableDB
Arg [2] : Bio::EnsEMBL::Analysis
Function : container for analysis object
Returntype: Bio::EnsEMBL::Analysis
Exceptions: throws passed incorrect object type
Example :
=cut
sub analysis{
my $self = shift;
my $analysis = shift;
if($analysis){
throw("Must pass RunnableDB:analysis a Bio::EnsEMBL::Analysis".
"not a ".$analysis) unless($analysis->isa
('Bio::EnsEMBL::Analysis'));
$self->{'analysis'} = $analysis;
}
return $self->{'analysis'};
}
=head2 query
Arg [1] : Bio::EnsEMBL::Analysis::RunnableDB
Arg [2] : Bio::EnsEMBL::Slice
Function : container for slice object
Returntype: Bio::EnsEMBL::Slice
Exceptions: throws if passed the incorrect object type
Example :
=cut
sub query{
my $self = shift;
my $slice = shift;
if($slice){
throw("Must pass RunnableDB:query a Bio::EnsEMBL::Slice".
"not a ".$slice) unless($slice->isa
('Bio::EnsEMBL::Slice'));
$self->{'slice'} = $slice;
}
return $self->{'slice'};
}
=head2 runnable
Arg [1] : Bio::EnsEMBL::Analysis::RunnableDB
Arg [2] : Bio::EnsEMBL::Analysis::Runnable
Function : container for an array of runnables
Returntype: arrayref
Exceptions: throws if passed the wrong object type
Example :
=cut
sub runnable{
my ($self, $runnable) = @_;
if(!$self->{'runnable'}){
$self->{'runnable'} = [];
}
if($runnable){
throw("Must pass RunnableDB:runnable a ".
"Bio::EnsEMBL::Analysis::Runnable not a ".$runnable)
unless($runnable->isa('Bio::EnsEMBL::Analysis::Runnable'));
push(@{$self->{'runnable'}}, $runnable);
}
return $self->{'runnable'};
}
=head2 containers
Arg [1] : Bio::EnsEMBL::Analysis::RunnableDB
Arg [2] : string/int
Function : container for specified variable. This pod refers to the
three methods below, input_id, input_is_void, failing_job_status. This
are simple containers which dont do more than hold and return an given
value
Returntype: string/int
Exceptions: none
Example :
=cut
sub input_id{
my $self = shift;
$self->{'input_id'} = shift if(@_);
return $self->{'input_id'};
}
sub input_is_void{
my $self = shift;
$self->{'input_is_void'} = shift if(@_);
return $self->{'input_is_void'};
}
sub failing_job_status{
my $self = shift;
$self->{'failing_status'} = shift if(@_);
return $self->{'failing_status'};
}
=head2 output
Arg [1] : Bio::EnsEMBL::Analysis::RunnableDB
Arg [2] : arrayref of output
Function : push array passed into onto output array
Returntype: arrayref
Exceptions: throws if not passed correct type
Example : $self->output(\@output);
=cut
sub output{
my ($self, $output) = @_;
if(!$self->{'output'}){
$self->{'output'} = [];
}
if($output){
if(ref($output) ne 'ARRAY'){
throw('Must pass RunnableDB:output an array ref not a '.$output);
}
push(@{$self->{'output'}}, @$output);
}
return $self->{'output'};
}
=head2 feature_factory
Arg [1] : Bio::EnsEMBL::Analysis::RunnableDB
Arg [2] : Bio::EnsEMBL::Analysis::Tools::FeatureFactory
Function : container for a feature factory object. If none is defined
when one is requested a new one is created.
Returntype: Bio::EnsEMBL::Analysis::Tools::FeatureFactory
Exceptions: none
Example :
=cut
sub feature_factory{
my ($self, $feature_factory) = @_;
if($feature_factory){
$self->{'feature_factory'} = $feature_factory;
}
if(!$self->{'feature_factory'}){
$self->{'feature_factory'} = Bio::EnsEMBL::Analysis::Tools::FeatureFactory
->new();
}
return $self->{'feature_factory'};
}
#utility methods
=head2 fetch_sequence
Arg [1] : Bio::EnsEMBL::Analysis::RunnableDB
Arg [2] : string, name
Arg [3] : Bio::EnsEMBL::DBAdaptor
Arg [4] : arrayref of logic_name if sequence is to be masked
Arg [5] : Boolean for softmasking if sequence is to be softmasked
Function : gets sequence from specifed database
Returntype: Bio::EnsEMBL::Slice
Exceptions: none
Example :
=cut
sub fetch_sequence{
my ($self, $name, $db, $repeat_masking, $soft_masking) = @_;
if(!$db){
$db = $self->db;
}
if(!$name){
$name = $self->input_id;
}
my $sa = $db->get_SliceAdaptor;
my $slice = $sa->fetch_by_name($name);
$repeat_masking = [] unless($repeat_masking);
if(!$slice){
throw("Failed to fetch slice ".$name);
}
if(@$repeat_masking){
my $sequence = $slice->get_repeatmasked_seq($repeat_masking, $soft_masking);
$slice = $sequence
}
return $slice;
}
=head2 parameters_hash
Arg [1] : Bio::EnsEMBL::Analysis::RunnableDB
Arg [2] : string, parameters string
Function : parses the parameters string into a hash
for the Runnables constructor. If neither of the delimiters
are found in the string the string is given the key of options
Returntype: hashref
Exceptions:
Example :
=cut
sub parameters_hash{
my ($self, $string) = @_;
if(!$string){
$string = $self->analysis->parameters;
}
my %parameters_hash;
if ($string) {
if($string =~ /,/ || $string =~ /=>/){
my @pairs = split (/,/, $string);
foreach my $pair(@pairs){
my ($key, $value) = split (/=>/, $pair);
if ($key && ($value || $value == 0)) {
$key =~ s/^\s+//g;
$key =~ s/\s+$//g;
$value =~ s/^\s+//g;
$value =~ s/\s+$//g;
$parameters_hash{$key} = $value;
} else {
$parameters_hash{$key} = 1;
}
}
}else{
$parameters_hash{'-options'} = $string;
}
}
return \%parameters_hash;
}
=head2 run
Arg [1] : Bio::EnsEMBL::Analysis::RunnableDB
Function : cycles through all the runnables, calls run and pushes
their output into the RunnableDBs output array
Returntype: array ref
Exceptions: none
Example :
=cut
sub run{
my ($self) = @_;
foreach my $runnable(@{$self->runnable}){
$runnable->run;
$self->output($runnable->output);
}
return $self->{'output'};
}
=head2 write_output
Arg [1] : Bio::EnsEMBL::Analysis::RunnableDB
Function : set analysis and slice on each feature
Returntype: 1
Exceptions: none
Example :
=cut
sub write_output{
my ($self) = @_;
my $adaptor = $self->get_adaptor;
foreach my $feature(@{$self->output}){
$feature->analysis($self->analysis);
$feature->slice($self->query) if(!$feature->slice);
$self->feature_factory->validate($feature);
eval{
$adaptor->store($feature);
};
if($@){
throw("RunnableDB:store failed, failed to write ".$feature." to ".
"the database ".$adaptor->dbc->dbname." $@");
}
}
return 1;
}
=head2 fetch_input
Arg [1] : Bio::EnsEMBL::Analysis::RunnableDB
Function : throw as it means child hasnt implement an essential method
Returntype: none
Exceptions: see function
Example :
=cut
sub fetch_input{
my ($self) = @_;
throw("Must implement fetch input in ".$self." RunnableDB will ".
"not provide this");
}
=head2 read_and_check_config
Arg [1] : Bio::EnsEMBL::Analysis::RunnableDB
Arg [2] : hashref, should be the hashref from which ever config file
you are reading
Function : to on the basis of the entries of the hash in your specific
config file set up instance variables first for the default values then for
any values specific to you logic name
Returntype: none
Exceptions: none
Example :
=cut
sub read_and_check_config{
my ($self, $var_hash) = @_;
parse_config($self, $var_hash, $self->analysis->logic_name);
}
=head2 require_module
Arg [1] : Bio::EnsEMBL::Analysis::RunnableDB::Blast
Arg [2] : string, module path
Function : uses perls require to use the past in module
Returntype: returns module name with / replaced by ::
Exceptions: throws if require fails
Example : my $parser =
$self->require('Bio/EnsEMBL/Analysis/Tools/BPliteWrapper');
=cut
sub require_module{
my ($self, $module) = @_;
my $class;
($class = $module) =~ s/::/\//g;
eval{
require "$class.pm";
};
throw("Couldn't require ".$class." Blast:require_module $@") if($@);
return $module;
}
1;