Raw content of Bio::EnsEMBL::Compara::Production::GenomicAlignBlock::Lagan # # POD documentation - main docs before the code # =pod =head1 NAME Bio::EnsEMBL::Compara::Production::GenomicAlignBlock::Lagan =cut =head1 SYNOPSIS =cut =head1 DESCRIPTION This module interfaces the Hive and the Analsys systems: it allows to run Lagan jobs in an ensembl-hive production systems. Jobs are run using the Bio::EnsEMBL::Analysis::Runnable::Lagan module. =cut =head1 AUTHORS Javier Herrero Abel Ureta-Vidal =head1 COPYRIGHT Copyright (c) 2006. EnsEMBL Team You may distribute this module under the same terms as perl itself =head1 CONTACT This modules is part of the EnsEMBL project () Questions can be posted to the ensembl-dev mailing list: ensembl-dev@ebi.ac.uk =cut =head1 APPENDIX The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _ =cut package Bio::EnsEMBL::Compara::Production::GenomicAlignBlock::Lagan; use strict; use Bio::EnsEMBL::Analysis::Runnable::Lagan; use Bio::EnsEMBL::Compara::DnaFragRegion; use Bio::EnsEMBL::Compara::Production::DBSQL::DBAdaptor;; use Bio::EnsEMBL::Utils::Exception; use Bio::EnsEMBL::Compara::NestedSet; use Bio::EnsEMBL::Compara::Graph::NewickParser; use Time::HiRes qw(time gettimeofday tv_interval); use Bio::EnsEMBL::Hive::Process; our @ISA = qw(Bio::EnsEMBL::Hive::Process); $| = 1; =head2 fetch_input Title : fetch_input Usage : $self->fetch_input Function: Fetches input data for repeatmasker from the database Returns : none Args : none =cut sub fetch_input { my( $self) = @_; $self->{'comparaDBA'} = Bio::EnsEMBL::Compara::Production::DBSQL::DBAdaptor->new(-DBCONN=>$self->db->dbc); $self->get_params($self->parameters); $self->get_params($self->input_id); $self->dumpFasta; return 1; } sub run { my $self = shift; my $runnable = new Bio::EnsEMBL::Analysis::Runnable::Lagan (-workdir => $self->worker_temp_directory, -fasta_files => $self->fasta_files, -tree_string => $self->tree_string, -analysis => $self->analysis); $self->{'_runnable'} = $runnable; $runnable->run_analysis; } sub write_output { my ($self) = @_; my $mlssa = $self->{'comparaDBA'}->get_MethodLinkSpeciesSetAdaptor; my $mlss = $mlssa->fetch_by_dbID($self->method_link_species_set_id); my $gaba = $self->{'comparaDBA'}->get_GenomicAlignBlockAdaptor; foreach my $gab (@{$self->{'_runnable'}->output}) { foreach my $ga (@{$gab->genomic_align_array}) { $ga->method_link_species_set($mlss); my $dfr = $self->{'_dnafrag_regions'}{$ga->dnafrag_id}; $ga->dnafrag_id($dfr->dnafrag_id); $ga->dnafrag($dfr->dnafrag); $ga->dnafrag_start($dfr->dnafrag_start); $ga->dnafrag_end($dfr->dnafrag_end); $ga->dnafrag_strand($dfr->dnafrag_strand); $ga->level_id(1); $dfr->release; unless (defined $gab->length) { $gab->length(length($ga->aligned_sequence)); } } $gab->method_link_species_set($mlss); $gaba->store($gab); } return 1; } ########################################## # # getter/setter methods # ########################################## #sub input_dir { # my $self = shift; # $self->{'_input_dir'} = shift if(@_); # return $self->{'_input_dir'}; #} sub synteny_region_id { my $self = shift; $self->{'_synteny_region_id'} = shift if(@_); return $self->{'_synteny_region_id'}; } sub fasta_files { my $self = shift; $self->{'_fasta_files'} = [] unless (defined $self->{'_fasta_files'}); if (@_) { my $value = shift; push @{$self->{'_fasta_files'}}, $value; } return $self->{'_fasta_files'}; } sub tree_file { my $self = shift; $self->{'_tree_file'} = shift if(@_); return $self->{'_tree_file'}; } sub tree_string { my $self = shift; $self->{'_tree_string'} = shift if(@_); return $self->{'_tree_string'}; } sub method_link_species_set_id { my $self = shift; $self->{'_method_link_species_set_id'} = shift if(@_); return $self->{'_method_link_species_set_id'}; } ########################################## # # internal methods # ########################################## sub get_params { my $self = shift; my $param_string = shift; return unless($param_string); # print("parsing parameter string : ",$param_string,"\n"); my $params = eval($param_string); return unless($params); if(defined($params->{'synteny_region_id'})) { $self->synteny_region_id($params->{'synteny_region_id'}); } if(defined($params->{'method_link_species_set_id'})) { $self->method_link_species_set_id($params->{'method_link_species_set_id'}); } if(defined($params->{'tree_file'})) { $self->tree_file($params->{'tree_file'}); } return 1; } sub dumpFasta { my $self = shift; # $self->{'comparaDBA'}->dbc->disconnect_when_inactive(1); my $sra = $self->{'comparaDBA'}->get_SyntenyRegionAdaptor; my $sr = $sra->fetch_by_dbID($self->synteny_region_id); my $idx = 1; foreach my $dfr (@{$sr->children}) { my $file = $self->worker_temp_directory . "/seq" . $idx . ".fa"; my $masked_file = $self->worker_temp_directory . "/seq" . $idx . ".fa.masked"; $idx++; open F, ">$file" || throw("Couldn't open $file"); open MF, ">$masked_file" || throw("Couldn't open $masked_file"); # WARNING this is a hack. It won't work at all on self comparisons!!! # This will be more generic and fixed when the retain/release call will be # cleaned from the Node/Link/NestedSet code. # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! $self->{'_dnafrag_regions'}{$dfr->dnafrag_id} = $dfr; $dfr->retain; $dfr->disavow_parent; my $slice = $dfr->slice; print F ">DnaFrag" . $dfr->dnafrag_id . ".\n"; print MF ">DnaFrag" . $dfr->dnafrag_id . ".\n"; my $seq = $slice->seq; $seq =~ s/(.{80})/$1\n/g; chomp $seq; print F $seq,"\n"; $seq = $slice->get_repeatmasked_seq->seq; $seq =~ s/(.{80})/$1\n/g; chomp $seq; print MF $seq,"\n"; close F; close MF; push @{$self->fasta_files}, $file; } # $self->{'comparaDBA'}->dbc->disconnect_when_inactive(0); $sr->release_tree; if ($self->tree_file) { my $tree_string = $self->build_tree_string; $self->tree_string($tree_string); } return 1; } sub build_tree_string { my $self = shift; my $tree_file = $self->tree_file; open F, $tree_file || throw("Can not open $tree_file"); my $newick = ""; while (<F>) { chomp; if (/^\s*(.*)\s*$/) { $newick .= $1; } } close F; my $tree = Bio::EnsEMBL::Compara::Graph::NewickParser::parse_newick_into_tree($newick); $self->update_node_names($tree); my $tree_string = $tree->newick_simple_format; $tree_string =~ s/:\d+\.\d+//g; $tree_string =~ s/[,;]/ /g; $tree_string =~ s/\"//g; $tree->release_tree; return $tree_string; } sub update_node_names { my $self = shift; my $tree = shift; my %gdb_id2dfr; foreach my $dfr (values %{$self->{'_dnafrag_regions'}}) { $gdb_id2dfr{$dfr->dnafrag->genome_db->dbID} = "DnaFrag".$dfr->dnafrag_id ."."; } foreach my $leaf (@{$tree->get_all_leaves}) { if (defined $gdb_id2dfr{$leaf->name}) { $leaf->name($gdb_id2dfr{$leaf->name}); } else { $leaf->disavow_parent; $tree = $tree->minimize_tree; } } if ($tree->get_child_count == 1) { my $child = $tree->children->[0]; $child->parent->merge_children($child); $child->disavow_parent; } return $tree; } 1;