Raw content of Bio::EnsEMBL::Compara::Production::GenomicAlignBlock::ImportAlignment # # You may distribute this module under the same terms as perl itself # # POD documentation - main docs before the code =pod =head1 NAME Bio::EnsEMBL::Compara::Production::GenomicAlignBlock::ImportAlignment =head1 SYNOPSIS =head1 DESCRIPTION This module imports a specified alignment. This is used in the low coverage genome alignment pipeline for importing the high coverage alignment which is used to build the low coverage genomes on. =head1 PARAMETERS =head1 CONTACT Post questions to the Ensembl development list: ensembl-dev@ebi.ac.uk =head1 APPENDIX The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _ =cut package Bio::EnsEMBL::Compara::Production::GenomicAlignBlock::ImportAlignment; use strict; use Bio::EnsEMBL::Registry; use Bio::EnsEMBL::Compara::Production::DBSQL::DBAdaptor; use Bio::EnsEMBL::Utils::Exception qw(throw); use Bio::EnsEMBL::Compara::Graph::NewickParser; use Bio::EnsEMBL::Hive::Process; our @ISA = qw(Bio::EnsEMBL::Hive::Process); =head2 fetch_input Title : fetch_input Usage : $self->fetch_input Function: Fetches input data for gerp from the database Returns : none Args : none =cut sub fetch_input { my( $self) = @_; #create a Compara::DBAdaptor which shares the same DBI handle #with $self->db (Hive DBAdaptor) $self->{'comparaDBA'} = Bio::EnsEMBL::Compara::Production::DBSQL::DBAdaptor->new(-DBCONN=>$self->db->dbc); $self->{'comparaDBA'}->dbc->disconnect_when_inactive(0); $self->{'hiveDBA'} = Bio::EnsEMBL::Hive::DBSQL::DBAdaptor->new(-DBCONN => $self->{'comparaDBA'}->dbc); #read from analysis table $self->get_params($self->parameters); #read from analysis_job table $self->get_params($self->input_id); my $reg = "Bio::EnsEMBL::Registry"; $reg->load_registry_from_url($self->from_db_url); } =head2 run Title : run Usage : $self->run Function: Run gerp Returns : none Args : none =cut sub run { my $self = shift; $self->importAlignment(); } =head2 write_output Title : write_output Usage : $self->write_output Function: Write results to the database Returns : 1 Args : none =cut sub write_output { my ($self) = @_; return 1; } #Uses copy_data method from copy_data.pl script sub importAlignment { my $self = shift; #if the database name is defined in the url, then open that if ($self->from_db_url =~ /mysql:\/\/.*@.*\/.+/) { $self->{'from_comparaDBA'} = new Bio::EnsEMBL::Compara::DBSQL::DBAdaptor(-url=>$self->from_db_url); } else { #open the most recent compara database $self->{'from_comparaDBA'} = Bio::EnsEMBL::Registry->get_DBAdaptor("Multi", "compara"); } my $analysis = $self->db->get_AnalysisAdaptor->fetch_by_logic_name("ImportAlignment"); my $dbname = $self->{'from_comparaDBA'}->dbc->dbname; my $analysis_id = $analysis->dbID; my $mlss_id = $self->method_link_species_set_id; #copy genomic_align_block table copy_data($self->{'from_comparaDBA'}, $self->{'comparaDBA'}, "genomic_align_block", "SELECT * FROM genomic_align_block WHERE method_link_species_set_id = $mlss_id"); my $do_all = 1; if ($do_all) { #copy genomic_align table copy_data($self->{'from_comparaDBA'}, $self->{'comparaDBA'}, "genomic_align", "SELECT ga.*". " FROM genomic_align_block gab LEFT JOIN genomic_align ga USING (genomic_align_block_id)". " WHERE gab.method_link_species_set_id = $mlss_id"); #copy genomic_align_group table copy_data($self->{'from_comparaDBA'}, $self->{'comparaDBA'}, "genomic_align_group", "SELECT gag.*". " FROM genomic_align_block gab LEFT JOIN genomic_align ga USING (genomic_align_block_id)". " LEFT JOIN genomic_align_group gag USING (genomic_align_id)". " WHERE gag.group_id IS NOT NULL AND gab.method_link_species_set_id = $mlss_id"); #copy genomic_align_tree table copy_data($self->{'from_comparaDBA'}, $self->{'comparaDBA'}, "genomic_align_tree", "SELECT gat.*". " FROM genomic_align_block gab LEFT JOIN genomic_align ga USING (genomic_align_block_id)". " LEFT JOIN genomic_align_group gag USING (genomic_align_id)". " LEFT JOIN genomic_align_tree gat ON (node_id=gag.group_id) WHERE gag.group_id IS NOT NULL AND gab.method_link_species_set_id = $mlss_id"); } } =head2 copy_data Arg[1] : Bio::EnsEMBL::Compara::DBSQL::DBAdaptor $from_dba Arg[2] : Bio::EnsEMBL::Compara::DBSQL::DBAdaptor $to_dba Arg[3] : Bio::EnsEMBL::Compara::MethodLinkSpeciesSet $this_mlss Arg[4] : string $table Arg[5] : string $sql_query Description : copy data in this table using this SQL query. Returns : Exceptions : throw if argument test fails =cut sub copy_data { my ($from_dba, $to_dba, $table_name, $query) = @_; print "Copying data in table $table_name\n"; my $sth = $from_dba->dbc->db_handle->column_info($from_dba->dbc->dbname, undef, $table_name, '%'); $sth->execute; my $all_rows = $sth->fetchall_arrayref; my $binary_mode = 0; foreach my $this_col (@$all_rows) { if (($this_col->[5] eq "BINARY") or ($this_col->[5] eq "VARBINARY") or ($this_col->[5] eq "BLOB") or ($this_col->[5] eq "BIT")) { $binary_mode = 1; last; } } if ($binary_mode) { #copy_data_in_binary_mode($from_dba, $to_dba, $table_name, $query); } else { copy_data_in_text_mode($from_dba, $to_dba, $table_name, $query); } } =head2 copy_data_in_text_mode Arg[1] : Bio::EnsEMBL::Compara::DBSQL::DBAdaptor $from_dba Arg[2] : Bio::EnsEMBL::Compara::DBSQL::DBAdaptor $to_dba Arg[3] : Bio::EnsEMBL::Compara::MethodLinkSpeciesSet $this_mlss Arg[4] : string $table Arg[5] : string $sql_query Description : copy data in this table using this SQL query. Returns : Exceptions : throw if argument test fails =cut sub copy_data_in_text_mode { my ($from_dba, $to_dba, $table_name, $query) = @_; my $user = $to_dba->dbc->username; my $pass = $to_dba->dbc->password; my $host = $to_dba->dbc->host; my $port = $to_dba->dbc->port; my $dbname = $to_dba->dbc->dbname; my $start = 0; #my $step = 1000000; my $step = 10000; while (1) { my $sth = $from_dba->dbc->prepare($query." LIMIT $start, $step"); $start += $step; $sth->execute(); my $all_rows = $sth->fetchall_arrayref; ## EXIT CONDITION return if (!@$all_rows); my $filename = "/tmp/$table_name.copy_data.$$.txt"; open(TEMP, ">$filename") or die; foreach my $this_row (@$all_rows) { print TEMP join("\t", map {defined($_)?$_:'\N'} @$this_row), "\n"; } close(TEMP); if ($pass) { system("mysqlimport", "-u$user", "-p$pass", "-h$host", "-P$port", "-L", "-l", "-i", $dbname, $filename); } else { system("mysqlimport", "-u$user", "-h$host", "-P$port", "-L", "-l", "-i", $dbname, $filename); } unlink("$filename"); } } #this assumes the from and to databases are on the same server. sub importAlignment_old { my $self = shift; #if the database name is defined in the url, then open that if ($self->from_db_url =~ /mysql:\/\/.*@.*\/.+/) { $self->{'from_comparaDBA'} = new Bio::EnsEMBL::Compara::DBSQL::DBAdaptor(-url=>$self->from_db_url); } else { #open the most recent compara database $self->{'from_comparaDBA'} = Bio::EnsEMBL::Registry->get_DBAdaptor("Multi", "compara"); } my $analysis = $self->db->get_AnalysisAdaptor->fetch_by_logic_name("ImportAlignment"); my $dbname = $self->{'from_comparaDBA'}->dbc->dbname; my $analysis_id = $analysis->dbID; my $mlss_id = $self->method_link_species_set_id; my $sql = "INSERT INTO genomic_align_block SELECT * FROM ?.genomic_align_block WHERE method_link_species_set_id = ?\n"; my $sth = $self->{'comparaDBA'}->dbc->prepare($sql); $sth->execute($dbname, $mlss_id); $sth->finish(); $sql = "INSERT INTO genomic_align SELECT genomic_align.* FROM ?.genomic_align LEFT JOIN WHERE method_link_species_set_id = ?\n"; my $sth = $self->{'comparaDBA'}->dbc->prepare($sql); $sth->execute($dbname, $mlss_id); $sth->finish(); $sql = "INSERT INTO genomic_align_group SELECT genomic_align_group.* FROM ?.genomic_align_group LEFT JOIN ?.genomic_align USING (genomic_align_id) LEFT JOIN ?.genomic_align_block USING (genomic_align_block_id) WHERE genomic_align_block.method_link_species_set_id = ?\n"; my $sth = $self->{'comparaDBA'}->dbc->prepare($sql); $sth->execute($dbname, $dbname, $mlss_id); $sth->finish(); $sql = "INSERT INTO genomic_align_tree SELECT genomic_align_tree.* FROM ?.genomic_align_tree LEFT JOIN ?.genomic_align_group ON (node_id=group_id) LEFT JOIN ?.genomic_align USING (genomic_align_id) LEFT JOIN ?.genomic_align_block WHERE genomic_align_block.method_link_species_set_id = ?\n"; my $sth = $self->{'comparaDBA'}->dbc->prepare($sql); $sth->execute($dbname, $dbname, $dbname, $dbname, $mlss_id); $sth->finish(); } ########################################## # # getter/setter methods # ########################################## sub method_link_species_set_id { my $self = shift; $self->{'_method_link_species_set_id'} = shift if(@_); return $self->{'_method_link_species_set_id'}; } sub from_db_url { my $self = shift; $self->{'_from_db_url'} = shift if(@_); return $self->{'_from_db_url'}; } ########################################## # # internal methods # ########################################## sub get_params { my $self = shift; my $param_string = shift; return unless($param_string); print("parsing parameter string : ",$param_string,"\n"); my $params = eval($param_string); return unless($params); if(defined($params->{'method_link_species_set_id'})) { $self->method_link_species_set_id($params->{'method_link_species_set_id'}); } if (defined($params->{'from_db_url'})) { $self->from_db_url($params->{'from_db_url'}); } return 1; }