Raw content of Bio::EnsEMBL::Compara::RunnableDB::CreateHomology

Raw content of Bio::EnsEMBL::Compara::RunnableDB::CreateHomology_dNdSJobs # # You may distribute this module under the same terms as perl itself # # POD documentation - main docs before the code =pod =head1 NAME Bio::EnsEMBL::Compara::RunnableDB::CreateHomology_dNdSJobs =cut =head1 SYNOPSIS my $aa = $sdba->get_AnalysisAdaptor; my $analysis = $aa->fetch_by_logic_name('CreateHomology_dNdSJobs'); my $rdb = new Bio::EnsEMBL::Compara::RunnableDB::CreateHomology_dNdSJobs( -input_id => [[1,2,3,14],[4,13],[11,16]] -analysis => $analysis); $rdb->fetch_input $rdb->run; =cut =head1 DESCRIPTION This is a homology compara specific runnableDB, that based on an input of arrayrefs of genome_db_ids, creates Homology_dNdS jobs in the hive analysis_job table. =cut =head1 CONTACT abel@ebi.ac.uk, jessica@ebi.ac.uk =cut =head1 APPENDIX The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _ =cut package Bio::EnsEMBL::Compara::RunnableDB::CreateHomology_dNdSJobs; use strict; use Bio::EnsEMBL::Compara::DBSQL::DBAdaptor; use Bio::EnsEMBL::Hive; use Bio::EnsEMBL::Hive::Process; our @ISA = qw(Bio::EnsEMBL::Hive::Process); sub fetch_input { my( $self) = @_; $self->{'species_sets_aref'} = undef; $self->throw("No input_id") unless defined($self->input_id); #create a Compara::DBAdaptor which shares the same DBI handle #with the pipeline DBAdaptor that is based into this runnable $self->{'comparaDBA'} = Bio::EnsEMBL::Compara::DBSQL::DBAdaptor->new(-DBCONN=>$self->db->dbc); $self->get_params($self->input_id); return 1; } sub get_params { my $self = shift; my $param_string = shift; return unless($param_string); print("parsing parameter string : ",$param_string,"\n"); my $params = eval($param_string); return unless($params); if (!defined($params->{method_link_types})) { # Default will be orthologues $params->{method_link_types} = ['ENSEMBL_ORTHOLOGUES']; } foreach my $key (keys %$params) { print(" $key : ", $params->{$key}, "\n"); } if (defined $params->{'species_sets'}) { $self->{'species_sets_aref'} = [@{$params->{'species_sets'}}]; } if (defined $params->{'method_link_types'}){ $self->{'method_link_types'} = [@{$params->{'method_link_types'}}]; } elsif (defined $params->{'method_link_type'}) { warn( 'The method_link_type paramerter is deprecated. '. 'Please use method_link_types with an arrayref value instead' ); $self->{'method_link_types'} = [$params->{'method_link_type'}]; } return; } sub run { my $self = shift; return 1 unless($self->{'species_sets_aref'}); $self->create_analysis_jobs($self->{'species_sets_aref'}); return 1; } sub write_output { my $self = shift; return 1; } ########################################## # # internal methods # ########################################## sub create_analysis_jobs { my $self = shift; my $species_sets_aref = shift; my $aa = $self->db->get_AnalysisAdaptor; my $Homology_dNdS_analysis = $aa->fetch_by_logic_name('Homology_dNdS'); # my $sql = "insert ignore into analysis_job (analysis_id,input_id,status) select " . # $Homology_dNdS_analysis->dbID . # ",homology_id,'READY' from homology where method_link_species_set_id = ?"; my $sql = "select homology_id from homology where method_link_species_set_id = ?"; my $sth = $self->db->dbc->prepare($sql); my $mlssa = $self->{'comparaDBA'}->get_MethodLinkSpeciesSetAdaptor; my @homologies; foreach my $species_set (@{$species_sets_aref}) { while (my $genome_db_id1 = shift @{$species_set}) { foreach my $genome_db_id2 (@{$species_set}) { foreach my $mlt(@{$self->{'method_link_types'}||[]}){ my $mlss = $mlssa->fetch_by_method_link_type_genome_db_ids ($mlt,[$genome_db_id1,$genome_db_id2]); next unless($mlss); $sth->execute($mlss->dbID); while( my $ref = $sth->fetchrow_arrayref() ) { my ($homology_id) = @$ref; push @homologies, $homology_id; } } } } } my $job_size = int(((scalar @homologies)/25000)); $job_size = 1 if ($job_size < 1); $job_size = 25 if ($job_size > 20); # limit of 255 chars in input_id my $analysis_id = $Homology_dNdS_analysis->dbID; while (@homologies) { my @job_array = splice(@homologies,0,$job_size); my $input_id = "[" . join(',',@job_array) . "]"; my $input_string = "{'ids'=>" . $input_id . "}"; my $sql = "insert ignore into analysis_job (analysis_id,input_id,status) VALUES ($analysis_id,\"$input_string\",'READY')"; my $sth2 = $self->db->dbc->prepare($sql); $sth2->execute; } $sth->finish; } 1;