Raw content of Bio::EnsEMBL::Compara::Production::GenomicAlignBlock::AlignmentChains
# Cared for by Ensembl
#
# Copyright GRL & EBI
#
# You may distribute this module under the same terms as perl itself
#
# POD documentation - main docs before the code
=pod
=head1 NAME
Bio::EnsEMBL::Compara::Production::GenomicAlignBlock::AlignmentChains
=head1 SYNOPSIS
my $db = Bio::EnsEMBL::DBAdaptor->new($locator);
my $genscan = Bio::EnsEMBL::Compara::Production::GenomicAlignBlock::AlignmentChains->new (
-db => $db,
-input_id => $input_id
-analysis => $analysis );
$genscan->fetch_input();
$genscan->run();
$genscan->write_output(); #writes to DB
=head1 DESCRIPTION
Given an compara MethodLinkSpeciesSet identifer, and a reference genomic
slice identifer, fetches the GenomicAlignBlocks from the given compara
database, forms them into sets of alignment chains, and writes the result
back to the database.
This module (at least for now) relies heavily on Jim Kent\'s Axt tools.
=cut
package Bio::EnsEMBL::Compara::Production::GenomicAlignBlock::AlignmentChains;
use strict;
use Bio::EnsEMBL::Compara::Production::GenomicAlignBlock::AlignmentProcessing;
use Bio::EnsEMBL::Analysis::Runnable::AlignmentChains;
use Bio::EnsEMBL::Compara::MethodLinkSpeciesSet;
use Bio::EnsEMBL::DnaDnaAlignFeature;
our @ISA = qw(Bio::EnsEMBL::Compara::Production::GenomicAlignBlock::AlignmentProcessing);
my $WORKDIR; # global variable holding the path to the working directory where output will be written
my $BIN_DIR = "/usr/local/ensembl/bin";
############################################################
sub get_params {
my $self = shift;
my $param_string = shift;
return unless($param_string);
#print("parsing parameter string : ",$param_string,"\n");
my $params = eval($param_string);
return unless($params);
$self->SUPER::get_params($param_string);
if(defined($params->{'qyDnaFragID'})) {
my $dnafrag = $self->compara_dba->get_DnaFragAdaptor->fetch_by_dbID($params->{'qyDnaFragID'});
$self->query_dnafrag($dnafrag);
}
if(defined($params->{'tgDnaFragID'})) {
my $dnafrag = $self->compara_dba->get_DnaFragAdaptor->fetch_by_dbID($params->{'tgDnaFragID'});
$self->target_dnafrag($dnafrag);
}
if(defined($params->{'query_nib_dir'})) {
$self->QUERY_NIB_DIR($params->{'query_nib_dir'});
}
if(defined($params->{'target_nib_dir'})) {
$self->TARGET_NIB_DIR($params->{'target_nib_dir'});
}
if(defined($params->{'bin_dir'})) {
$self->BIN_DIR($params->{'bin_dir'});
}
return 1;
}
=head2 fetch_input
Title : fetch_input
Usage : $self->fetch_input
Returns : nothing
Args : none
=cut
sub fetch_input {
my( $self) = @_;
$self->SUPER::fetch_input;
$self->compara_dba->dbc->disconnect_when_inactive(0);
my $mlssa = $self->compara_dba->get_MethodLinkSpeciesSetAdaptor;
my $dafa = $self->compara_dba->get_DnaAlignFeatureAdaptor;
my $gaba = $self->compara_dba->get_GenomicAlignBlockAdaptor;
$gaba->lazy_loading(0);
$self->get_params($self->analysis->parameters);
$self->get_params($self->input_id);
my $qy_gdb = $self->query_dnafrag->genome_db;
my $tg_gdb = $self->target_dnafrag->genome_db;
################################################################
# get the compara data: MethodLinkSpeciesSet, reference DnaFrag,
# and all GenomicAlignBlocks
################################################################
print "mlss: ",$self->INPUT_METHOD_LINK_TYPE," ",$qy_gdb->dbID," ",$tg_gdb->dbID,"\n";
my $mlss;
if ($qy_gdb->dbID == $tg_gdb->dbID) {
$mlss = $mlssa->fetch_by_method_link_type_GenomeDBs($self->INPUT_METHOD_LINK_TYPE,
[$qy_gdb]);
} else {
$mlss = $mlssa->fetch_by_method_link_type_GenomeDBs($self->INPUT_METHOD_LINK_TYPE,
[$qy_gdb,
$tg_gdb]);
}
throw("No MethodLinkSpeciesSet for :\n" .
$self->INPUT_METHOD_LINK_TYPE . "\n" .
$qy_gdb->dbID . "\n" .
$tg_gdb->dbID)
if not $mlss;
my $out_mlss = new Bio::EnsEMBL::Compara::MethodLinkSpeciesSet;
$out_mlss->method_link_type($self->OUTPUT_METHOD_LINK_TYPE);
if ($qy_gdb->dbID == $tg_gdb->dbID) {
$out_mlss->species_set([$qy_gdb]);
} else {
$out_mlss->species_set([$qy_gdb, $tg_gdb]);
}
$mlssa->store($out_mlss);
######## needed for output####################
$self->output_MethodLinkSpeciesSet($out_mlss);
my $query_slice = $self->query_dnafrag->slice;
my $target_slice = $self->target_dnafrag->slice;
print STDERR "Fetching all DnaDnaAlignFeatures by query and target...\n";
print STDERR "start fetching at time: ",scalar(localtime),"\n";
if ($self->input_job->retry_count > 0) {
print STDERR "Deleting alignments as it is a rerun\n";
$self->delete_alignments($out_mlss,$self->query_dnafrag,$self->target_dnafrag);
}
my $gabs = $gaba->fetch_all_by_MethodLinkSpeciesSet_DnaFrag_DnaFrag($mlss,$self->query_dnafrag,undef,undef,$self->target_dnafrag);
my $features;
while (my $gab = shift @{$gabs}) {
my ($qy_ga) = $gab->reference_genomic_align;
my ($tg_ga) = @{$gab->get_all_non_reference_genomic_aligns};
unless (defined $self->query_DnaFrag_hash->{$qy_ga->dnafrag->name}) {
######### needed for output ######################################
$self->query_DnaFrag_hash->{$qy_ga->dnafrag->name} = $qy_ga->dnafrag;
}
unless (defined $self->target_DnaFrag_hash->{$tg_ga->dnafrag->name}) {
######### needed for output #######################################
$self->target_DnaFrag_hash->{$tg_ga->dnafrag->name} = $tg_ga->dnafrag;
}
my $daf_cigar = $self->daf_cigar_from_compara_cigars($qy_ga->cigar_line,
$tg_ga->cigar_line);
if (defined $daf_cigar) {
my $daf = Bio::EnsEMBL::DnaDnaAlignFeature->new
(-seqname => $qy_ga->dnafrag->name,
-start => $qy_ga->dnafrag_start,
-end => $qy_ga->dnafrag_end,
-strand => $qy_ga->dnafrag_strand,
-hseqname => $tg_ga->dnafrag->name,
-hstart => $tg_ga->dnafrag_start,
-hend => $tg_ga->dnafrag_end,
-hstrand => $tg_ga->dnafrag_strand,
-cigar_string => $daf_cigar);
push @{$features}, $daf;
}
}
print STDERR scalar @{$features}," features at time: ",scalar(localtime),"\n";
$WORKDIR = "/tmp/worker.$$";
unless(defined($WORKDIR) and (-e $WORKDIR)) {
#create temp directory to hold fasta databases
mkdir($WORKDIR, 0777);
}
my %parameters = (-analysis => $self->analysis,
-query_slice => $query_slice,
-target_slices => {$self->target_dnafrag->name => $target_slice},
#-query_nib_dir => $query_slice->length > $DEFAULT_DUMP_MIN_SIZE ? $self->QUERY_NIB_DIR : undef,
#-target_nib_dir => $target_slice->length > $DEFAULT_DUMP_MIN_SIZE ? $self->TARGET_NIB_DIR : undef,
-query_nib_dir => undef,
-target_nib_dir => undef,
-features => $features,
-workdir => $WORKDIR);
if ($self->QUERY_NIB_DIR and
-d $self->QUERY_NIB_DIR and
-e $self->QUERY_NIB_DIR . "/" . $query_slice->seq_region_name . ".nib") {
$parameters{-query_nib_dir} = $self->QUERY_NIB_DIR;
}
if ($self->TARGET_NIB_DIR and
-d $self->TARGET_NIB_DIR and
-e $self->TARGET_NIB_DIR . "/" . $target_slice->seq_region_name . ".nib") {
$parameters{-target_nib_dir} = $self->TARGET_NIB_DIR;
}
foreach my $program (qw(faToNib lavToAxt axtChain)) {
$parameters{'-' . $program} = $self->BIN_DIR . "/" . $program;
}
my $runnable = Bio::EnsEMBL::Analysis::Runnable::AlignmentChains->new(%parameters);
$self->runnable($runnable);
}
sub write_output {
my $self = shift;
my $disconnect_when_inactive_default = $self->compara_dba->dbc->disconnect_when_inactive;
$self->compara_dba->dbc->disconnect_when_inactive(0);
$self->SUPER::write_output;
$self->compara_dba->dbc->disconnect_when_inactive($disconnect_when_inactive_default);
}
sub query_dnafrag {
my ($self, $dir) = @_;
if (defined $dir) {
$self->{_query_dnafrag} = $dir;
}
return $self->{_query_dnafrag};
}
sub target_dnafrag {
my ($self, $dir) = @_;
if (defined $dir) {
$self->{_target_dnafrag} = $dir;
}
return $self->{_target_dnafrag};
}
sub QUERY_NIB_DIR {
my ($self, $dir) = @_;
if (defined $dir) {
$self->{_query_nib_dir} = $dir;
}
return $self->{_query_nib_dir};
}
sub TARGET_NIB_DIR {
my ($self, $dir) = @_;
if (defined $dir) {
$self->{_target_nib_dir} = $dir;
}
return $self->{_target_nib_dir};
}
sub BIN_DIR {
my ($self, $val) = @_;
$self->{_bin_dir} = $val if defined $val;
return $self->{_bin_dir} || $BIN_DIR;
}
1;