Raw content of Bio::EnsEMBL::Analysis::RunnableDB::ExonerateCloneEnds
=pod
=head1 NAME
Bio::EnsEMBL::Analysis::RunnableDB::Exonerate2Genes;
=head1 SYNOPSIS
Should not be used directly. Might be called from MapCloneEnds.pm
my $clone =
Bio::EnsEMBL::Analysis::RunnableDB::ExonerateCloneEnds->new(
-db => $refdb,
-analysis => $analysis_obj,
-database => $EST_GENOMIC,
);
$clone->fetch_input();
$clone->run();
$clone->write_output(); #writes to DB
=head1 DESCRIPTION
This object maps clone sequences to a genome,and
write the resulting alignments as DNA align Features.
=head1 CONTACT
Post general queries to B
=head1 APPENDIX
=cut
package Bio::EnsEMBL::Analysis::RunnableDB::ExonerateCloneEnds;
use strict;
use Bio::EnsEMBL::Utils::Exception qw(throw warning);
use Bio::EnsEMBL::Analysis::RunnableDB;
use Bio::EnsEMBL::Analysis::Runnable::ExonerateCloneEnds;
use Bio::EnsEMBL::Analysis::Config::ExonerateCloneEnds;
use Bio::EnsEMBL::Pipeline::SeqFetcher::OBDAIndexSeqFetcher;
use Bio::SeqIO;
use Bio::DB::Flat::OBDAIndex;
use Bio::Seq;
use vars qw(@ISA);
@ISA = qw (Bio::EnsEMBL::Analysis::RunnableDB);
############################################################
sub new {
my ( $class, @args ) = @_;
my $self = $class->SUPER::new(@args);
$self->read_and_check_config($CLONE_CONFIG);
return $self;
}
sub fetch_input {
my ($self, $chunkLine) = @_;
my $logic = $self->analysis->logic_name;
my $seqFetchDB = $self->SEQFETCHDB;
my $seqfetcher = Bio::EnsEMBL::Pipeline::SeqFetcher::OBDAIndexSeqFetcher->new(
-db => [($seqFetchDB)],
-format => 'fasta', );
if ($self->input_id =~ /\w+:[\w\_\.]+:([\w\.]+):([-\w]+):([-\w]+):[-\w]+:(\w+)/){
my $target_id= $1;
my $target_start = $2;
my $target_end = $3;
my $clone_id = $4;
my $db = new Bio::EnsEMBL::DBSQL::DBAdaptor(%{ $self->DNADB });
my $slice_adaptor = $db->get_SliceAdaptor();
my $target_seq = $slice_adaptor->fetch_by_region('toplevel',$target_id, $target_start, $target_end);
my $query_seq = $seqfetcher->get_Seq_by_acc($clone_id);
##########################################
# set up the target (genome)
##########################################
my @target = ();
push (@target,$target_seq);
##########################################
# set up the query (dna clone seq)
##########################################
my @query = ();
push (@query,$query_seq);
##########################################
# setup the runnable
##########################################
my %parameters = %{ $self->parameters_hash };
if (
not exists( $parameters{-options} )
and defined $self->OPTIONS
){
$parameters{-options} = '';
}
$parameters{-options} = $self->OPTIONS;
print STDERR "PROGRAM FILE: ".$self->analysis->program_file."\n";
my $runnable = Bio::EnsEMBL::Analysis::Runnable::ExonerateCloneEnds->new(
-program => $self->analysis->program_file,
-analysis => $self->analysis,
-target_seqs => \@target,
-query_type => $self->QUERYTYPE,
-query_seqs => \@query,
%parameters,
);
$self->runnable($runnable);
}else{
##########################################
# set up the target (genome)
##########################################
my $target = $self->GENOMICSEQS;
if ( -e $target ){
if(-d $target ) {
warn ("Target $target is a directory of files\n");
}elsif (-s $target){
warn ("Target $target is a whole-genome file\n");
}else{
throw("'$target' isn't a file or a directory?");
}
} else {
throw("'$target' could not be found");
}
##########################################
# set up the query (dna clone seq)
##########################################
my @queryseqs = ();
my $iid_regexp = $self->IIDREGEXP;
#print $iid_regexp,"\n";
if (not defined $iid_regexp){
throw("You must define IIDREGEXP in config to enable inference of chunk number and total from your chunklist file" )
}
my ( $chunk_number, $chunk_total ) = $self->input_id =~ /$iid_regexp/;
# Read the line corresponding to chunk_number and parse the input ids from there
my $seq_ids = @{$chunkLine}[$chunk_number];
my @ids_list = split (/:/,$seq_ids);
foreach my $id(@ids_list){
# Get the sequence object for each of the query sequences
my $query_seq = $seqfetcher->get_Seq_by_acc($id);
# Add each query sequence object to the array of sequences that will be passed to exonerate
push (@queryseqs, $query_seq);
}
##########################################
# setup the runnable
##########################################
my %parameters = %{ $self->parameters_hash };
if (
not exists( $parameters{-options} )
and defined $self->OPTIONS
){
$parameters{-options} = '';
}
$parameters{-options} = $self->OPTIONS;
print STDERR "PROGRAM FILE: ".$self->analysis->program_file."\n";
my $runnable = Bio::EnsEMBL::Analysis::Runnable::ExonerateCloneEnds->new(
-program => $self->analysis->program_file,
-analysis => $self->analysis,
-target_file => $target,
-query_type => $self->QUERYTYPE,
-query_seqs => \@queryseqs,
%parameters,
);
$self->runnable($runnable);
}
}
############################################################
sub run {
my ($self) = @_;
my @clone_features;
throw("Can't run - no runnable objects") unless ( $self->runnable );
my $runnable = @{ $self->runnable }[0];
$runnable->run;
@clone_features = @{$runnable->output};
#
#Replace the 'dummy' clone array and probe objects in the
#CloneFeature objects with the 'real' instances found in
#the populate... method
$self->output(\@clone_features);
$self->clone_features(\@clone_features);
$self->clean_clone_features(@{$self->clone_features});
}
############################################################
sub write_output {
my ( $self, @output ) = @_;
my $outdb = $self->create_output_db;
my $clone_feature_adaptor = $outdb->get_DnaAlignFeatureAdaptor;
#Add analysis, slices to DnaAlign_features, and make
#sure they're pointing at the persistent array instances
#instead of the fake arrays they were created with
$self->clean_clone_features(@{$self->clone_features});
foreach my $clone_feature (@{$self->clone_features}){
eval{ $clone_feature_adaptor->store($clone_feature) };
if ($@) {
$self->throw("Unable to store clone feature!\n $@");
}
}
}
############################################################
sub clean_clone_features {
my ( $self, @clone_features ) = @_;
my $db = $self->create_output_db;
my $slice_adaptor = $db->get_SliceAdaptor;
my %genome_slices;
foreach my $clone_feature (@clone_features) {
$clone_feature->analysis( $self->analysis );
# get the slice based on the seqname stamped on in the runnable
my $slice_id = $clone_feature->seqname;
if ( not exists $genome_slices{$slice_id} ) {
# assumes genome seqs were named in the Ensembl API Slice naming
# convention, i.e. coord_syst:version:seq_reg_id:start:end:strand
$genome_slices{$slice_id} = $slice_adaptor->fetch_by_name($slice_id);
}
my $slice = $genome_slices{$slice_id};
$clone_feature->slice($slice);
}
return @clone_features;
}
sub query_file {
my ( $self, $value ) = @_;
if ( defined $value ) {
$self->{'_query_file'} = $value;
}
if ( exists( $self->{'_query_file'} ) ) {
return $self->{'_query_file'};
} else {
return undef;
}
}
sub create_output_db {
my ($self) = @_;
my $outdb;
my $dnadb;
if ( $self->OUTDB && $self->DNADB) {
$dnadb = new Bio::EnsEMBL::DBSQL::DBAdaptor(%{ $self->OUTDB });
$outdb = new Bio::EnsEMBL::DBSQL::DBAdaptor(%{ $self->OUTDB },
-dnadb => $dnadb
);
} elsif( $self->OUTDB) {
$outdb = new Bio::EnsEMBL::DBSQL::DBAdaptor(%{ $self->OUTDB });
} else {
$outdb = $self->db;
}
return $outdb;
}
#############################################################
sub clone_features {
my ( $self, $value ) = @_;
if ( defined $value ) {
$self->{'_clone_features'} = $value;
}
if ( exists( $self->{'_clone_features'} ) ) {
return $self->{'_clone_features'};
} else {
return undef;
}
}
#############################################################
# Declare and set up config variables
#############################################################
sub read_and_check_config {
my $self = shift;
$self->SUPER::read_and_check_config($CLONE_CONFIG);
##########
# CHECKS
##########
my $logic = $self->analysis->logic_name;
# check that compulsory options have values
foreach my $config_var (
qw(
QUERYSEQS
QUERYTYPE
GENOMICSEQS
CHUNKSLIST
SEQFETCHDB
)
){
if ( not defined $self->$config_var ){
throw("You must define $config_var in config for logic '$logic'");
}
}
# output db does not have to be defined, but if it is, it should be a hash
if ($self->OUTDB && ref( $self->OUTDB ) ne "HASH") {
throw("OUTDB in config for '$logic' must be a hash ref of db connection pars.");
}
if ( $self->DNADB and ref( $self->DNADB ) ne "HASH" ) {
throw("DNADB in config for '$logic' must be a hash ref of db connection pars.");
}
}
sub QUERYSEQS {
my ( $self, $value ) = @_;
if ( defined $value ) {
$self->{'_CONFIG_QUERYSEQS'} = $value;
}
if ( exists( $self->{'_CONFIG_QUERYSEQS'} ) ) {
return $self->{'_CONFIG_QUERYSEQS'};
} else {
return undef;
}
}
sub QUERYTYPE {
my ( $self, $value ) = @_;
if ( defined $value ) {
$self->{'_CONFIG_QUERYTYPE'} = $value;
}
if ( exists( $self->{'_CONFIG_QUERYTYPE'} ) ) {
return $self->{'_CONFIG_QUERYTYPE'};
} else {
return undef;
}
}
sub GENOMICSEQS {
my ( $self, $value ) = @_;
if ( defined $value ) {
$self->{'_CONFIG_GENOMICSEQS'} = $value;
}
if ( exists( $self->{'_CONFIG_GENOMICSEQS'} ) ) {
return $self->{'_CONFIG_GENOMICSEQS'};
} else {
return undef;
}
}
sub IIDREGEXP {
my ( $self, $value ) = @_;
if ( defined $value ) {
$self->{'_CONFIG_IIDREGEXP'} = $value;
}
if ( exists( $self->{'_CONFIG_IIDREGEXP'} ) ) {
return $self->{'_CONFIG_IIDREGEXP'};
} else {
return undef;
}
}
sub OUTDB {
my ( $self, $value ) = @_;
if ( defined $value ) {
$self->{'_CONFIG_OUTDB'} = $value;
}
if ( exists( $self->{'_CONFIG_OUTDB'} ) ) {
return $self->{'_CONFIG_OUTDB'};
} else {
return undef;
}
}
sub DNADB {
my ( $self, $value ) = @_;
if ( defined $value ) {
$self->{'_CONFIG_DNADB'} = $value;
}
if ( exists( $self->{'_CONFIG_DNADB'} ) ) {
return $self->{'_CONFIG_DNADB'};
} else {
return undef;
}
}
sub OPTIONS {
my ( $self, $value ) = @_;
if ( defined $value ) {
$self->{'_CONFIG_OPTIONS'} = $value;
}
if ( exists( $self->{'_CONFIG_OPTIONS'} ) ) {
return $self->{'_CONFIG_OPTIONS'};
} else {
return undef;
}
}
sub CHUNKSLIST {
my ( $self, $value ) = @_;
if ( defined $value ) {
$self->{'_CONFIG_CHUNKSLIST'} = $value;
}
if ( exists( $self->{'_CONFIG_CHUNKSLIST'} ) ) {
return $self->{'_CONFIG_CHUNKSLIST'};
} else {
return undef;
}
}
sub SEQFETCHDB {
my ( $self, $value ) = @_;
if ( defined $value ) {
$self->{'_CONFIG_SEQFETCHDB'} = $value;
}
if ( exists( $self->{'_CONFIG_SEQFETCHDB'} ) ) {
return $self->{'_CONFIG_SEQFETCHDB'};
} else {
return undef;
}
}
###############################################
### end of config
###############################################
1;