Bio::EnsEMBL::Compara::Production::GenomicAlignBlock Lagan
SummaryIncluded librariesPackage variablesSynopsisDescriptionGeneral documentationMethods
Toolbar
WebCvsRaw content
Summary
Bio::EnsEMBL::Compara::Production::GenomicAlignBlock::Lagan
Package variables
No package variables defined.
Included modules
Bio::EnsEMBL::Analysis::Runnable::Lagan
Bio::EnsEMBL::Compara::DnaFragRegion
Bio::EnsEMBL::Compara::Graph::NewickParser
Bio::EnsEMBL::Compara::NestedSet
Bio::EnsEMBL::Compara::Production::DBSQL::DBAdaptor ;
Bio::EnsEMBL::Hive::Process
Bio::EnsEMBL::Utils::Exception
Time::HiRes qw ( time gettimeofday tv_interval )
Inherit
Bio::EnsEMBL::Hive::Process
Synopsis
Description
This module interfaces the Hive and the Analsys systems: it allows to run Lagan jobs in an
ensembl-hive production systems. Jobs are run using the Bio::EnsEMBL::Analysis::Runnable::Lagan
module.
Methods
build_tree_string
No description
Code
dumpFasta
No description
Code
fasta_files
No description
Code
fetch_inputDescriptionCode
get_params
No description
Code
method_link_species_set_id
No description
Code
run
No description
Code
synteny_region_id
No description
Code
tree_file
No description
Code
tree_string
No description
Code
update_node_names
No description
Code
write_output
No description
Code
Methods description
fetch_inputcode    nextTop
    Title   :   fetch_input
Usage : $self->fetch_input
Function: Fetches input data for repeatmasker from the database
Returns : none
Args : none
Methods code
build_tree_stringdescriptionprevnextTop
sub build_tree_string {
  my $self = shift;
  my $tree_file = $self->tree_file;
  open F, $tree_file || throw("Can not open $tree_file");
  my $newick = "";
  while (<F>) {
    chomp;
    if (/^\s*(.*)\s*$/) {
      $newick .= $1;
    }
  }
  close F;

  my $tree = Bio::EnsEMBL::Compara::Graph::NewickParser::parse_newick_into_tree($newick);

  $self->update_node_names($tree);

  my $tree_string = $tree->newick_simple_format;

  $tree_string =~ s/:\d+\.\d+//g;
  $tree_string =~ s/[,;]/ /g;
  $tree_string =~ s/\"//g;

  $tree->release_tree;

  return $tree_string;
}
dumpFastadescriptionprevnextTop
sub dumpFasta {
  my $self = shift;

#  $self->{'comparaDBA'}->dbc->disconnect_when_inactive(1);
my $sra = $self->{'comparaDBA'}->get_SyntenyRegionAdaptor; my $sr = $sra->fetch_by_dbID($self->synteny_region_id); my $idx = 1; foreach my $dfr (@{$sr->children}) { my $file = $self->worker_temp_directory . "/seq" . $idx . ".fa"; my $masked_file = $self->worker_temp_directory . "/seq" . $idx . ".fa.masked"; $idx++; open F, ">$file" || throw("Couldn't open $file"); open MF, ">$masked_file" || throw("Couldn't open $masked_file"); # WARNING this is a hack. It won't work at all on self comparisons!!!
# This will be more generic and fixed when the retain/release call will be
# cleaned from the Node/Link/NestedSet code.
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
$self->{'_dnafrag_regions'}{$dfr->dnafrag_id} = $dfr; $dfr->retain; $dfr->disavow_parent; my $slice = $dfr->slice; print F ">DnaFrag" . $dfr->dnafrag_id . ".\n"; print MF ">DnaFrag" . $dfr->dnafrag_id . ".\n"; my $seq = $slice->seq; $seq =~ s/(.{80})/$1\n/g; chomp $seq; print F $seq,"\n"; $seq = $slice->get_repeatmasked_seq->seq; $seq =~ s/(.{80})/$1\n/g; chomp $seq; print MF $seq,"\n"; close F; close MF; push @{$self->fasta_files}, $file; } # $self->{'comparaDBA'}->dbc->disconnect_when_inactive(0);
$sr->release_tree; if ($self->tree_file) { my $tree_string = $self->build_tree_string; $self->tree_string($tree_string); } return 1;
}
fasta_filesdescriptionprevnextTop
sub fasta_files {
  my $self = shift;

  $self->{'_fasta_files'} = [] unless (defined $self->{'_fasta_files'});

  if (@_) {
    my $value = shift;
    push @{$self->{'_fasta_files'}}, $value;
  }

  return $self->{'_fasta_files'};
}
fetch_inputdescriptionprevnextTop
sub fetch_input {
  my( $self) = @_;

  $self->{'comparaDBA'} = Bio::EnsEMBL::Compara::Production::DBSQL::DBAdaptor->new(-DBCONN=>$self->db->dbc);
  $self->get_params($self->parameters);
  $self->get_params($self->input_id);

  $self->dumpFasta;

  return 1;
}
get_paramsdescriptionprevnextTop
sub get_params {
  my $self         = shift;
  my $param_string = shift;

  return unless($param_string);
#   print("parsing parameter string : ",$param_string,"\n");
my $params = eval($param_string); return unless($params); if(defined($params->{'synteny_region_id'})) { $self->synteny_region_id($params->{'synteny_region_id'}); } if(defined($params->{'method_link_species_set_id'})) { $self->method_link_species_set_id($params->{'method_link_species_set_id'}); } if(defined($params->{'tree_file'})) { $self->tree_file($params->{'tree_file'}); } return 1;
}
method_link_species_set_iddescriptionprevnextTop
sub method_link_species_set_id {
  my $self = shift;
  $self->{'_method_link_species_set_id'} = shift if(@_);
  return $self->{'_method_link_species_set_id'};
}

##########################################
#
# internal methods
#
##########################################
}
rundescriptionprevnextTop
sub run {
  my $self = shift;

  my $runnable = new Bio::EnsEMBL::Analysis::Runnable::Lagan
    (-workdir => $self->worker_temp_directory,
     -fasta_files => $self->fasta_files,
     -tree_string => $self->tree_string,
     -analysis => $self->analysis);
  $self->{'_runnable'} = $runnable;
  $runnable->run_analysis;
}
synteny_region_iddescriptionprevnextTop
sub synteny_region_id {
  my $self = shift;
  $self->{'_synteny_region_id'} = shift if(@_);
  return $self->{'_synteny_region_id'};
}
tree_filedescriptionprevnextTop
sub tree_file {
  my $self = shift;
  $self->{'_tree_file'} = shift if(@_);
  return $self->{'_tree_file'};
}
tree_stringdescriptionprevnextTop
sub tree_string {
  my $self = shift;
  $self->{'_tree_string'} = shift if(@_);
  return $self->{'_tree_string'};
}
update_node_namesdescriptionprevnextTop
sub update_node_names {
  my $self = shift;
  my $tree = shift;
  my %gdb_id2dfr;
  foreach my $dfr (values %{$self->{'_dnafrag_regions'}}) {
    $gdb_id2dfr{$dfr->dnafrag->genome_db->dbID} = "DnaFrag".$dfr->dnafrag_id .".";
  }

  foreach my $leaf (@{$tree->get_all_leaves}) {
    if (defined $gdb_id2dfr{$leaf->name}) {
      $leaf->name($gdb_id2dfr{$leaf->name});
    } else {
      $leaf->disavow_parent;
      $tree = $tree->minimize_tree;
    }
  }
  if ($tree->get_child_count == 1) {
    my $child = $tree->children->[0];
    $child->parent->merge_children($child);
    $child->disavow_parent;
  }
  return $tree;
}

1;
}
write_outputdescriptionprevnextTop
sub write_output {
  my ($self) = @_;

  my $mlssa = $self->{'comparaDBA'}->get_MethodLinkSpeciesSetAdaptor;
  my $mlss = $mlssa->fetch_by_dbID($self->method_link_species_set_id);
  my $gaba = $self->{'comparaDBA'}->get_GenomicAlignBlockAdaptor;
  foreach my $gab (@{$self->{'_runnable'}->output}) {
    foreach my $ga (@{$gab->genomic_align_array}) {
      $ga->method_link_species_set($mlss);
      my $dfr = $self->{'_dnafrag_regions'}{$ga->dnafrag_id};
      $ga->dnafrag_id($dfr->dnafrag_id);
      $ga->dnafrag($dfr->dnafrag);
      $ga->dnafrag_start($dfr->dnafrag_start);
      $ga->dnafrag_end($dfr->dnafrag_end);
      $ga->dnafrag_strand($dfr->dnafrag_strand);
      $ga->level_id(1);
      $dfr->release;
      unless (defined $gab->length) {
        $gab->length(length($ga->aligned_sequence));
      }
    }
    $gab->method_link_species_set($mlss);
    $gaba->store($gab);
  }
  return 1;
}

##########################################
#
# getter/setter methods
#
##########################################
#sub input_dir {
# my $self = shift;
# $self->{'_input_dir'} = shift if(@_);
# return $self->{'_input_dir'};
#
}
General documentation
AUTHORSTop
Javier Herrero
Abel Ureta-Vidal
COPYRIGHTTop
Copyright (c) 2006. EnsEMBL Team
You may distribute this module under the same terms as perl itself
CONTACTTop
This modules is part of the EnsEMBL project ()
Questions can be posted to the ensembl-dev mailing list: ensembl-dev@ebi.ac.uk
APPENDIXTop
The rest of the documentation details each of the object methods.
Internal methods are usually preceded with a _