Raw content of Bio::EnsEMBL::Compara::Production::GenomicAlignBlock::AlignmentNets
# Cared for by Ensembl
#
# Copyright GRL & EBI
#
# You may distribute this module under the same terms as perl itself
#
# POD documentation - main docs before the code
=pod
=head1 NAME
Bio::EnsEMBL::Compara::Production::GenomicAlign::AlignmentNets
=head1 SYNOPSIS
my $db = Bio::EnsEMBL::DBAdaptor->new($locator);
my $genscan = Bio::EnsEMBL::Compara::Production::GenomicAlign::AlignmentNets->new (
-db => $db,
-input_id => $input_id
-analysis => $analysis );
$genscan->fetch_input();
$genscan->run();
$genscan->write_output(); #writes to DB
=head1 DESCRIPTION
Given an compara MethodLinkSpeciesSet identifer, and a reference genomic
slice identifer, fetches the GenomicAlignBlocks from the given compara
database, infers chains from the group identifiers, and then forms
an alignment net from the chains and writes the result
back to the database.
This module (at least for now) relies heavily on Jim Kent\'s Axt tools.
=cut
package Bio::EnsEMBL::Compara::Production::GenomicAlignBlock::AlignmentNets;
use strict;
use Bio::EnsEMBL::Compara::Production::GenomicAlignBlock::AlignmentProcessing;
use Bio::EnsEMBL::Analysis::Runnable::AlignmentNets;
use Bio::EnsEMBL::Compara::MethodLinkSpeciesSet;
our @ISA = qw(Bio::EnsEMBL::Compara::Production::GenomicAlignBlock::AlignmentProcessing);
my $WORKDIR; # global variable holding the path to the working directory where output will be written
my $BIN_DIR = "/usr/local/ensembl/bin";
sub get_params {
my $self = shift;
my $param_string = shift;
return unless($param_string);
#print("parsing parameter string : ",$param_string,"\n");
my $params = eval($param_string);
return unless($params);
$self->SUPER::get_params($param_string);
# from input_id
if(defined($params->{'DnaFragID'})) {
my $dnafrag = $self->compara_dba->get_DnaFragAdaptor->fetch_by_dbID($params->{'DnaFragID'});
$self->query_dnafrag($dnafrag);
}
if (defined $params->{'start'}) {
$self->REGION_START($params->{'start'});
}
if (defined $params->{'end'}) {
$self->REGION_END($params->{'end'});
}
if (defined $params->{'method_link_species_set_id'}) {
$self->METHOD_LINK_SPECIES_SET_ID($params->{'method_link_species_set_id'});
}
if (defined($params->{'bin_dir'})) {
$self->BIN_DIR($params->{'bin_dir'});
}
return 1;
}
=head2 fetch_input
Title : fetch_input
Usage : $self->fetch_input
Function:
Returns : nothing
Args : none
=cut
sub fetch_input {
my( $self) = @_;
$self->SUPER::fetch_input;
$self->compara_dba->dbc->disconnect_when_inactive(0);
my $mlssa = $self->compara_dba->get_MethodLinkSpeciesSetAdaptor;
my $dafa = $self->compara_dba->get_DnaAlignFeatureAdaptor;
my $gaba = $self->compara_dba->get_GenomicAlignBlockAdaptor;
$self->get_params($self->analysis->parameters);
$self->get_params($self->input_id);
################################################################
# get the compara data: MethodLinkSpeciesSet, reference DnaFrag,
# and GenomicAlignBlocks
################################################################
my $mlss = $mlssa->fetch_by_dbID($self->METHOD_LINK_SPECIES_SET_ID);
throw("No MethodLinkSpeciesSet for method_link_species_set_id".$self->METHOD_LINK_SPECIES_SET_ID)
if not $mlss;
my $out_mlss = new Bio::EnsEMBL::Compara::MethodLinkSpeciesSet;
$out_mlss->method_link_type($self->OUTPUT_METHOD_LINK_TYPE);
$out_mlss->species_set($mlss->species_set);
$mlssa->store($out_mlss);
######## needed for output####################
$self->output_MethodLinkSpeciesSet($out_mlss);
if ($self->input_job->retry_count > 0) {
print STDERR "Deleting alignments as it is a rerun\n";
$self->delete_alignments($out_mlss,
$self->query_dnafrag,
$self->REGION_START,
$self->REGION_END);
}
my $gabs = $gaba->fetch_all_by_MethodLinkSpeciesSet_DnaFrag($mlss,
$self->query_dnafrag,
$self->REGION_START,
$self->REGION_END);
###################################################################
# get the target slices and bin the GenomicAlignBlocks by group id
###################################################################
my (%features_by_group, %query_lengths, %target_lengths);
my %group_score;
while (my $gab = shift @{$gabs}) {
my ($qy_ga) = $gab->reference_genomic_align;
my ($tg_ga) = @{$gab->get_all_non_reference_genomic_aligns};
if (not exists($self->query_DnaFrag_hash->{$qy_ga->dnafrag->name})) {
######### needed for output ######################################
$self->query_DnaFrag_hash->{$qy_ga->dnafrag->name} = $qy_ga->dnafrag;
}
if (not exists($self->target_DnaFrag_hash->{$tg_ga->dnafrag->name})) {
######### needed for output #######################################
$self->target_DnaFrag_hash->{$tg_ga->dnafrag->name} = $tg_ga->dnafrag;
}
my $group_id = $gab->group_id();
push @{$features_by_group{$group_id}}, $gab;
if (! defined $group_score{$group_id} || $gab->score > $group_score{$group_id}) {
$group_score{$group_id} = $gab->score;
}
}
foreach my $group_id (keys %features_by_group) {
$features_by_group{$group_id} = [ sort {$a->reference_genomic_align->dnafrag_start <=> $b->reference_genomic_align->dnafrag_start} @{$features_by_group{$group_id}} ];
}
$WORKDIR = "/tmp/worker.$$";
unless(defined($WORKDIR) and (-e $WORKDIR)) {
#create temp directory to hold fasta databases
mkdir($WORKDIR, 0777);
}
foreach my $nm (keys %{$self->query_DnaFrag_hash}) {
$query_lengths{$nm} = $self->query_DnaFrag_hash->{$nm}->length;
}
foreach my $nm (keys %{$self->target_DnaFrag_hash}) {
$target_lengths{$nm} = $self->target_DnaFrag_hash->{$nm}->length;
}
my %parameters = (-analysis => $self->analysis,
-query_lengths => \%query_lengths,
-target_lengths => \%target_lengths,
-chains => [ map {$features_by_group{$_}} sort {$group_score{$b} <=> $group_score{$a}} keys %group_score ],
-chains_sorted => 1,
-chainNet => $self->BIN_DIR . "/" . "chainNet",
-workdir => $WORKDIR,
-min_chain_score => $self->MIN_CHAIN_SCORE);
my $run = Bio::EnsEMBL::Analysis::Runnable::AlignmentNets->new(%parameters);
$self->runnable($run);
}
sub query_dnafrag {
my ($self, $dir) = @_;
if (defined $dir) {
$self->{_query_dnafrag} = $dir;
}
return $self->{_query_dnafrag};
}
sub delete_alignments {
my ($self, $mlss, $qy_dnafrag, $start, $end) = @_;
my $dbc = $self->db->dbc;
my $sql = "select ga1.genomic_align_block_id, ga1.genomic_align_id, ga2.genomic_align_id from genomic_align ga1, genomic_align ga2 where ga1.genomic_align_block_id=ga2.genomic_align_block_id and ga1.dnafrag_id = ? and ga1.dnafrag_id!=ga2.dnafrag_id and ga1.method_link_species_set_id = ?";
my $sth;
if (defined $start and defined $end) {
$sql .= " and ga1.dnafrag_start <= ? and ga1.dnafrag_end >= ?";
$sth = $dbc->prepare($sql);
$sth->execute($qy_dnafrag->dbID, $mlss->dbID, $end, $start);
} elsif (defined $start) {
$sql .= " and ga1.dnafrag_end >= ?";
$sth->execute($qy_dnafrag->dbID, $mlss->dbID, $start);
} elsif (defined $end) {
$sql .= " and ga1.dnafrag_start <= ? ";
$sth->execute($qy_dnafrag->dbID, $mlss->dbID, $end);
} else {
$sth = $dbc->prepare($sql);
$sth->execute($qy_dnafrag->dbID, $mlss->dbID);
}
my $nb_gabs = 0;
my @gabs;
while (my $aref = $sth->fetchrow_arrayref) {
my ($gab_id, $ga_id1, $ga_id2) = @$aref;
push @gabs, [$gab_id, $ga_id1, $ga_id2];
$nb_gabs++;
}
my $sql_gab = "delete from genomic_align_block where genomic_align_block_id in ";
my $sql_ga = "delete from genomic_align where genomic_align_id in ";
for (my $i=0; $i < scalar @gabs; $i=$i+20000) {
my (@gab_ids, @ga1_ids, @ga2_ids);
for (my $j = $i; ($j < scalar @gabs && $j < $i+20000); $j++) {
push @gab_ids, $gabs[$j][0];
push @ga1_ids, $gabs[$j][1];
push @ga2_ids, $gabs[$j][2];
# print $j," ",$gabs[$j][0]," ",$gabs[$j][1]," ",$gabs[$j][2],"\n";
}
my $sql_gab_to_exec = $sql_gab . "(" . join(",", @gab_ids) . ")";
my $sql_ga_to_exec1 = $sql_ga . "(" . join(",", @ga1_ids) . ")";
my $sql_ga_to_exec2 = $sql_ga . "(" . join(",", @ga2_ids) . ")";
foreach my $sql ($sql_gab_to_exec,$sql_ga_to_exec1,$sql_ga_to_exec2) {
my $sth = $dbc->prepare($sql);
$sth->execute;
$sth->finish;
}
}
}
sub run {
my ($self) = @_;
foreach my $runnable(@{$self->runnable}){
$runnable->run;
$self->cleanse_output($runnable->output);
$self->output($runnable->output);
}
}
sub write_output {
my $self = shift;
my $disconnect_when_inactive_default = $self->compara_dba->dbc->disconnect_when_inactive;
$self->compara_dba->dbc->disconnect_when_inactive(0);
$self->SUPER::write_output;
$self->compara_dba->dbc->disconnect_when_inactive($disconnect_when_inactive_default);
}
###########################
## parameter place-holders
sub REGION_START {
my ($self, $val) = @_;
if (defined $val) {
$self->{_region_start} = $val;
}
if (exists $self->{_region_start}) {
return $self->{_region_start};
} else {
return undef;
}
}
sub REGION_END {
my ($self, $val) = @_;
if (defined $val) {
$self->{_region_end} = $val;
}
if (exists $self->{_region_end}) {
return $self->{_region_end};
} else {
return undef;
}
}
sub METHOD_LINK_SPECIES_SET_ID {
my ($self, $val) = @_;
if (defined $val) {
$self->{_method_link_species_set_id} = $val;
}
return $self->{_method_link_species_set_id}
}
sub BIN_DIR {
my ($self, $val) = @_;
$self->{_bin_dir} = $val if defined $val;
return $self->{_bin_dir} || $BIN_DIR;
}
1;