Raw content of Bio::EnsEMBL::Compara::Production::GenomicAlignBlock::CreateAlignmentChainsJobs
#
# You may distribute this module under the same terms as perl itself
#
# POD documentation - main docs before the code
=pod
=head1 NAME
Bio::EnsEMBL::Compara::RunnableDB::CreateAlignmentChainsJobs
=cut
=head1 SYNOPSIS
my $db = Bio::EnsEMBL::Compara::DBAdaptor->new($locator);
my $runnableDB = Bio::EnsEMBL::Compara::RunnableDB::CreateAlignmentChainsJobs->new (
-input_id => $input_id
-analysis => $analysis );
$runnableDB->fetch_input(); #reads from DB
$runnableDB->run();
$runnableDB->output();
$runnableDB->write_output(); #writes to DB
=cut
=head1 DESCRIPTION
=cut
=head1 CONTACT
Abel Ureta-Vidal
=cut
=head1 APPENDIX
The rest of the documentation details each of the object methods.
Internal methods are usually preceded with a _
=cut
package Bio::EnsEMBL::Compara::Production::GenomicAlignBlock::CreateAlignmentChainsJobs;
use strict;
use Bio::EnsEMBL::Compara::Production::DBSQL::DBAdaptor;
use Bio::EnsEMBL::Hive::DBSQL::AnalysisJobAdaptor;
use Bio::EnsEMBL::Hive::Process;
use Bio::EnsEMBL::Utils::Exception;
our @ISA = qw(Bio::EnsEMBL::Hive::Process);
#my $DEFAULT_DUMP_MIN_SIZE = 11500000;
my $DEFAULT_OUTPUT_METHOD_LINK = "BLASTZ_CHAIN";
sub fetch_input {
my $self = shift;
#
# parameters which can be set either via
# $self->parameters OR
# $self->input_id
#
$self->{'query_collection'} = undef;
$self->{'target_collection'} = undef;
$self->{'query_genome_db'} = undef;
$self->{'target_genome_db'} = undef;
$self->{'method_link_species_set'} = undef;
$self->{'output_method_link'} = undef;
$self->get_params($self->parameters);
$self->get_params($self->input_id);
# create a Compara::DBAdaptor which shares my DBConnection
$self->{'comparaDBA'} = Bio::EnsEMBL::Compara::Production::DBSQL::DBAdaptor->new(-DBCONN => $self->db->dbc);
# get DnaCollection of query
throw("must specify 'query_collection_name' to identify DnaCollection of query")
unless(defined($self->{'query_collection_name'}));
$self->{'query_collection'} = $self->{'comparaDBA'}->get_DnaCollectionAdaptor->
fetch_by_set_description($self->{'query_collection_name'});
throw("unable to find DnaCollection with name : ". $self->{'query_collection_name'})
unless(defined($self->{'query_collection'}));
# get DnaCollection of target
throw("must specify 'target_collection_name' to identify DnaCollection of query")
unless(defined($self->{'target_collection_name'}));
$self->{'target_collection'} = $self->{'comparaDBA'}->get_DnaCollectionAdaptor->
fetch_by_set_description($self->{'target_collection_name'});
throw("unable to find DnaCollection with name : ". $self->{'target_collection_name'})
unless(defined($self->{'target_collection'}));
# get genome_db of query
throw("must specify 'query_genome_db_id' to identify DnaCollection of query")
unless(defined($self->{'query_genome_db_id'}));
$self->{'query_genome_db'} = $self->{'comparaDBA'}->get_GenomeDBAdaptor->
fetch_by_dbID($self->{'query_genome_db_id'});
throw("unable to find genome_db with dbID : ". $self->{'query_genome_db_id'})
unless(defined($self->{'query_genome_db'}));
# get genome_db of target
throw("must specify 'target_genome_db_id' to identify DnaCollection of target")
unless(defined($self->{'target_genome_db_id'}));
$self->{'target_genome_db'} = $self->{'comparaDBA'}->get_GenomeDBAdaptor->
fetch_by_dbID($self->{'target_genome_db_id'});
throw("unable to find genome_db with dbID : ". $self->{'target_genome_db_id'})
unless(defined($self->{'target_genome_db'}));
# get the MethodLinkSpeciesSet
throw("must specify a method_link to identify a MethodLinkSpeciesSet")
unless(defined($self->{'method_link'}));
if ($self->{'query_genome_db'}->dbID == $self->{'target_genome_db'}->dbID) {
$self->{'method_link_species_set'} = $self->{'comparaDBA'}->get_MethodLinkSpeciesSetAdaptor->fetch_by_method_link_type_genome_db_ids($self->{'method_link'}, [$self->{'query_genome_db'}->dbID] );
} else {
$self->{'method_link_species_set'} = $self->{'comparaDBA'}->get_MethodLinkSpeciesSetAdaptor->fetch_by_method_link_type_genome_db_ids($self->{'method_link'}, [$self->{'query_genome_db'}->dbID, $self->{'target_genome_db'}->dbID] );
}
throw("unable to find method_link_species_set for method_link=",$self->{'method_link'}," and the following genome_db_ids ",$self->{'query_genome_db_id'},", ",$self->{'target_genome_db_id'},"\n")
unless(defined($self->{'method_link_species_set'}));
$self->print_params;
return 1;
}
sub run
{
my $self = shift;
$self->createAlignmentChainsJobs();
return 1;
}
sub write_output
{
my $self = shift;
my $output_id = "{\'query_genome_db_id\' => \'" . $self->{'query_genome_db_id'} . "\',\'target_genome_db_id\' => \'" . $self->{'target_genome_db_id'} . "\'}";
$self->dataflow_output_id($output_id, 2);
return 1;
}
##################################
#
# subroutines
#
##################################
sub get_params {
my $self = shift;
my $param_string = shift;
return unless($param_string);
print("parsing parameter string : ",$param_string,"\n");
my $params = eval($param_string);
return unless($params);
foreach my $key (keys %$params) {
print(" $key : ", $params->{$key}, "\n");
}
# from input_id
$self->{'query_genome_db_id'} = $params->{'query_genome_db_id'} if(defined($params->{'query_genome_db_id'}));
$self->{'target_genome_db_id'} = $params->{'target_genome_db_id'} if(defined($params->{'target_genome_db_id'}));
$self->{'query_collection_name'} = $params->{'query_collection_name'} if(defined($params->{'query_collection_name'}));
$self->{'target_collection_name'} = $params->{'target_collection_name'} if(defined($params->{'target_collection_name'}));
$self->{'logic_name'} = $params->{'logic_name'} if(defined($params->{'logic_name'}));
# from parameters
## method_link alias for input_method_link, for backwards compatibility
$self->{'method_link'} = $params->{'method_link'} if(defined($params->{'method_link'}));
$self->{'method_link'} = $params->{'input_method_link'} if(defined($params->{'input_method_link'}));
$self->{'output_method_link'} = $params->{'output_method_link'} if(defined($params->{'output_method_link'}));
return;
}
sub print_params {
my $self = shift;
printf(" params:\n");
printf(" method_link_species_set_id : %d\n", $self->{'method_link_species_set'}->dbID);
printf(" query_collection : (%d) %s\n",
$self->{'query_collection'}->dbID, $self->{'query_collection'}->description);
printf(" target_collection : (%d) %s\n",
$self->{'target_collection'}->dbID, $self->{'target_collection'}->description);
printf(" query_genome_db : (%d) %s\n",
$self->{'query_genome_db'}->dbID, $self->{'query_genome_db'}->name);
printf(" target_genome_db : (%d) %s\n",
$self->{'target_genome_db'}->dbID, $self->{'target_genome_db'}->name);
}
sub createAlignmentChainsJobs
{
my $self = shift;
#my $analysis = $self->db->get_AnalysisAdaptor->fetch_by_logic_name('AlignmentChains');
my $analysis = $self->db->get_AnalysisAdaptor->fetch_by_logic_name($self->{'logic_name'});
my (%qy_dna_hash, %tg_dna_hash);
foreach my $obj (@{$self->{'query_collection'}->get_all_dna_objects}) {
my @dna_chunks;
if ($obj->isa("Bio::EnsEMBL::Compara::Production::DnaFragChunkSet")) {
push @dna_chunks, @{$obj->get_all_DnaFragChunks};
} else {
push @dna_chunks, $obj;
}
foreach my $chunk (@dna_chunks) {
my $dnafrag = $chunk->dnafrag;
if (not exists $qy_dna_hash{$dnafrag->dbID}) {
$qy_dna_hash{$dnafrag->dbID} = $dnafrag;
}
}
}
my %target_dna_hash;
foreach my $dna_object (@{$self->{'target_collection'}->get_all_dna_objects}) {
my @dna_chunks;
if ($dna_object->isa('Bio::EnsEMBL::Compara::Production::DnaFragChunkSet')) {
push @dna_chunks, @{$dna_object->get_all_DnaFragChunks};
} else {
push @dna_chunks, $dna_object;
}
foreach my $chunk (@dna_chunks) {
my $dnafrag = $chunk->dnafrag;
if (not exists $tg_dna_hash{$dnafrag->dbID}) {
$tg_dna_hash{$chunk->dnafrag->dbID} = $dnafrag;
}
}
}
my $count=0;
my $sql = "select g2.dnafrag_id from genomic_align g1, genomic_align g2 where g1.method_link_species_set_id = ? and g1.genomic_align_block_id=g2.genomic_align_block_id and g1.dnafrag_id = ? and g1.genomic_align_id != g2.genomic_align_id group by g2.dnafrag_id";
my $sth = $self->{'comparaDBA'}->dbc->prepare($sql);
my $reverse_pairs; # used to avoid getting twice the same results for self-comparisons
foreach my $qy_dnafrag_id (keys %qy_dna_hash) {
$sth->execute($self->{'method_link_species_set'}->dbID, $qy_dnafrag_id);
my $tg_dnafrag_id;
$sth->bind_columns(\$tg_dnafrag_id);
while ($sth->fetch()) {
next unless exists $tg_dna_hash{$tg_dnafrag_id};
next if (defined($reverse_pairs->{$qy_dnafrag_id}->{$tg_dnafrag_id}));
my $input_hash = {};
$input_hash->{'qyDnaFragID'} = $qy_dnafrag_id;
$input_hash->{'tgDnaFragID'} = $tg_dnafrag_id;
if ($self->{'query_collection'}->dump_loc) {
my $nib_file = $self->{'query_collection'}->dump_loc
. "/"
. $qy_dna_hash{$qy_dnafrag_id}->name
. ".nib";
if (-e $nib_file) {
$input_hash->{'query_nib_dir'} = $self->{'query_collection'}->dump_loc;
}
}
if ($self->{'target_collection'}->dump_loc) {
my $nib_file = $self->{'target_collection'}->dump_loc
. "/"
. $tg_dna_hash{$tg_dnafrag_id}->name
. ".nib";
if (-e $nib_file) {
$input_hash->{'target_nib_dir'} = $self->{'target_collection'}->dump_loc;
}
}
$reverse_pairs->{$tg_dnafrag_id}->{$qy_dnafrag_id} = 1;
my $input_id = main::encode_hash($input_hash);
#printf("create_job : %s : %s\n", $self->{'pair_aligner'}->logic_name, $input_id);
Bio::EnsEMBL::Hive::DBSQL::AnalysisJobAdaptor->CreateNewJob (
-input_id => $input_id,
-analysis => $analysis,
-input_job_id => 0,
);
$count++;
}
}
$sth->finish;
if ($count == 0) {
# No alignments have been found. Remove the control rule to unblock following analyses
my $analysis_ctrl_rule_adaptor = $self->db->get_AnalysisCtrlRuleAdaptor;
$analysis_ctrl_rule_adaptor->remove_by_condition_analysis_url($analysis->logic_name);
print "No jobs created. Deleting analysis ctrl rule for " . $analysis->logic_name . "\n";
} else {
printf("created %d jobs for AlignmentChains\n", $count);
}
## Create new MethodLinkSpeciesSet
# Use the value set in the parameters or the input_id
my $output_method_link = $self->{'output_method_link'};
# Use the output_method_link from the target analysis if the previous is not set
if (!$output_method_link and $self->{'logic_name'}) {
my $params = eval($analysis->parameters);
if ($params->{'output_method_link'}) {
$output_method_link = $params->{'output_method_link'}
}
}
# Use the default value otherwise
if (!$output_method_link) {
$output_method_link = $DEFAULT_OUTPUT_METHOD_LINK;
}
my $new_mlss = new Bio::EnsEMBL::Compara::MethodLinkSpeciesSet(
-species_set => $self->{'method_link_species_set'}->species_set,
-method_link_type => $output_method_link
);
$self->{'comparaDBA'}->get_MethodLinkSpeciesSetAdaptor->store($new_mlss);
}
1;