Raw content of Bio::EnsEMBL::Compara::Production::GenomicAlignBlock::CreateSimpleNetsJobs
#
# You may distribute this module under the same terms as perl itself
#
# POD documentation - main docs before the code
=pod
=head1 NAME
Bio::EnsEMBL::Compara::RunnableDB::CreateSimpleNetsJobs
=cut
=head1 SYNOPSIS
my $db = Bio::EnsEMBL::Compara::DBAdaptor->new($locator);
my $runnableDB = Bio::EnsEMBL::Pipeline::RunnableDB::CreateSimpleNetsJobs->new (
-input_id => $input_id
-analysis => $analysis );
$runnableDB->fetch_input(); #reads from DB
$runnableDB->run();
$runnableDB->output();
$runnableDB->write_output(); #writes to DB
=cut
=head1 DESCRIPTION
=cut
=head1 CONTACT
Abel Ureta-Vidal
=cut
=head1 APPENDIX
The rest of the documentation details each of the object methods.
Internal methods are usually preceded with a _
=cut
package Bio::EnsEMBL::Compara::Production::GenomicAlignBlock::CreateSimpleNetsJobs;
use strict;
use Bio::EnsEMBL::Compara::Production::DBSQL::DBAdaptor;
use Bio::EnsEMBL::Hive::DBSQL::AnalysisJobAdaptor;
use Bio::EnsEMBL::Hive::Process;
use Bio::EnsEMBL::Utils::Exception;
our @ISA = qw(Bio::EnsEMBL::Hive::Process);
sub fetch_input {
my $self = shift;
$self->get_params($self->parameters);
$self->get_params($self->input_id);
# get DnaCollection of query
throw("must specify 'query_genome_db_id' to identify query of net")
if not defined $self->QUERY_GENOME_DB_ID;
throw("must specify 'target_genome_db_id' to target of net")
if not defined $self->TARGET_GENOME_DB_ID;
$self->compara_db(Bio::EnsEMBL::Compara::Production::DBSQL::DBAdaptor
->new(-DBCONN => $self->db->dbc));
my $mlss = $self->compara_db->get_MethodLinkSpeciesSetAdaptor
->fetch_by_method_link_type_genome_db_ids($self->INPUT_METHOD_LINK,
[$self->QUERY_GENOME_DB_ID, $self->TARGET_GENOME_DB_ID]);
throw("Could not identity a source MLSS from " .
$self->INPUT_METHOD_LINK . " " .
$self->QUERY_GENOME_DB_ID . " " .
$self->TARGET_GENOME_DB_DB)
if not defined $mlss;
$self->input_method_link_species_set($mlss);
my $out_analysis = $self->db->get_AnalysisAdaptor->fetch_by_logic_name($self->LOGIC_NAME);
throw("Could not get output analysis object from " . $self->LOGIC_NAME)
if not defined $out_analysis;
$self->output_analysis($out_analysis);
return 1;
}
sub get_params {
my ($self, $param_string) = @_;
return unless($param_string);
print("parsing parameter string : ",$param_string,"\n");
my $params = eval($param_string);
return unless($params);
# most parameters can be passed straight through to the job.
# we are concerned with the ones specific to each job here
if (exists $params->{query_genome_db_id}) {
$self->QUERY_GENOME_DB_ID($params->{query_genome_db_id});
}
if (exists $params->{target_genome_db_id}) {
$self->TARGET_GENOME_DB_ID($params->{target_genome_db_id});
}
if (exists $params->{input_method_link}) {
$self->INPUT_METHOD_LINK($params->{input_method_link});
}
if (exists $params->{logic_name}) {
$self->LOGIC_NAME($params->{logic_name});
}
}
sub run {
my $self = shift;
$self->createAlignmentNetsJobs();
return 1;
}
sub write_output {
my $self = shift;
my $output_id_hash = {
query_genome_db_id => $self->QUERY_GENOME_DB_ID,
target_genom_db_id => $self->TARGET_GENOME_DB_ID,
};
my $output_id = $self->encode_hash($output_id_hash);
$self->dataflow_output_id($output_id, 2);
return 1;
}
sub createAlignmentNetsJobs {
my $self = shift;
# assumption: we only need to consider qy_dnafrags that have
# alignments from the specified mlss, and dont need to consider
# the target genome_db_id
my $sql = "select distinct d.dnafrag_id ";
$sql .= "from genomic_align ga, dnafrag d ";
$sql .= "where ga.dnafrag_id = d.dnafrag_id ";
$sql .= "and method_link_species_set_id = ? ";
$sql .= "and genome_db_id = ?";
my $sth = $self->compara_db->dbc->prepare($sql);
$sth->execute($self->input_method_link_species_set->dbID,
$self->QUERY_GENOME_DB_ID);
my ($qy_dnafrag_id);
$sth->bind_columns(\$qy_dnafrag_id);
my @qy_dnafrag_ids;
while ($sth->fetch()) {
push @qy_dnafrag_ids, $qy_dnafrag_id;
}
$sth->finish;
foreach my $id (@qy_dnafrag_ids) {
my $input_hash = {
qy_dnafrag_id => $id,
tg_genomedb_id => $self->TARGET_GENOME_DB_ID,
};
my $input_id = $self->encode_hash($input_hash);
Bio::EnsEMBL::Hive::DBSQL::AnalysisJobAdaptor->CreateNewJob
(-input_id => $input_id,
-analysis => $self->output_analysis,
-input_job_id => 0);
}
}
######################
sub compara_db {
my ($self, $val) = @_;
if (defined $val) {
$self->{_compara_db} = $val;
}
return $self->{_compara_db};
}
sub input_method_link_species_set {
my ($self, $val) = @_;
if (defined $val) {
$self->{_input_mlss} = $val;
}
return $self->{_input_mlss};
}
sub output_analysis {
my ($self, $val) = @_;
if (defined $val) {
$self->{_output_analysis} = $val;
}
return $self->{_output_analysis};
}
#########
# Config variables
#########
sub QUERY_GENOME_DB_ID {
my ($self, $val) = @_;
if (defined $val){
$self->{_query_genome_db_id} = $val;
}
return $self->{_query_genome_db_id};
}
sub TARGET_GENOME_DB_ID {
my ($self, $val) = @_;
if (defined $val){
$self->{_target_genome_db_id} = $val;
}
return $self->{_target_genome_db_id};
}
sub INPUT_METHOD_LINK {
my ($self, $val) = @_;
if (defined $val){
$self->{_input_method_link} = $val;
}
return $self->{_input_method_link};
}
sub LOGIC_NAME {
my ($self, $val) = @_;
if (defined $val) {
$self->{_output_logic_name} = $val;
}
return $self->{_output_logic_name};
}
1;