Raw content of Bio::EnsEMBL::Compara::RunnableDB::CreateBlastRules # # You may distribute this module under the same terms as perl itself # # POD documentation - main docs before the code =pod =head1 NAME Bio::EnsEMBL::Compara::RunnableDB::CreateBlastRules =cut =head1 SYNOPSIS my $db = Bio::EnsEMBL::Compara::DBAdaptor->new($locator); my $repmask = Bio::EnsEMBL::Compara::RunnableDB::CreateBlastRules->new ( -db => $db, -input_id => $input_id -analysis => $analysis ); $repmask->fetch_input(); #reads from DB $repmask->run(); $repmask->output(); $repmask->write_output(); #writes to DB =cut =head1 DESCRIPTION This object wraps Bio::EnsEMBL::Analysis::Runnable::Blast to add functionality to read and write to databases. The appropriate Bio::EnsEMBL::Analysis object must be passed for extraction of appropriate parameters. =cut =head1 CONTACT Describe contact details here =cut =head1 APPENDIX The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _ =cut package Bio::EnsEMBL::Compara::RunnableDB::CreateBlastRules; use strict; use Bio::EnsEMBL::DBSQL::DBAdaptor; use Bio::EnsEMBL::Compara::DBSQL::DBAdaptor; use Bio::EnsEMBL::Hive::DBSQL::DataflowRuleAdaptor; use Bio::EnsEMBL::Utils::Exception; use Bio::EnsEMBL::Hive::Process; our @ISA = qw(Bio::EnsEMBL::Hive::Process); sub fetch_input { my $self = shift; throw("No input_id") unless defined($self->input_id); print("input_id = ".$self->input_id."\n"); throw("Improper formated input_id") unless ($self->input_id =~ /\s*\{/); $self->{'selfBlast'} = 1; $self->{'phylumBlast'} = 0; if($self->analysis->parameters =~ /\s*\{/) { my $paramHash = eval($self->analysis->parameters); if($paramHash) { $self->{'phylumBlast'}=1 if($paramHash->{'phylumBlast'}==1); $self->{'selfBlast'}=0 if($paramHash->{'selfBlast'}==0); $self->{'cr_analysis_logic_name'} = $paramHash->{'cr_analysis_logic_name'} if(defined $paramHash->{'cr_analysis_logic_name'}); } } #create a new Compara::DBAdaptor which points to the same database #as the pipeline DBAdaptor passed in ($self->db) #the -DBCONN options uses the dbname,user,pass,port,host,driver from the #variable DBConnection to create the new connection (in essence a copy) $self->{'comparaDBA'} = Bio::EnsEMBL::Compara::DBSQL::DBAdaptor->new(-DBCONN => $self->db->dbc); if (defined $self->{'cr_analysis_logic_name'}) { $self->{'cr_analysis'} = $self->db->get_AnalysisAdaptor->fetch_by_logic_name($self->{'cr_analysis_logic_name'}); throw($self->{'cr_analysis_logic_name'} . " analysis is missing, can't proceed\n") unless(defined($self->{'cr_analysis'})); } return 1; } sub run { #need to subclass otherwise it defaults to a version that fails #just return 1 so success return 1; } sub write_output { my $self = shift; my $input_hash = eval($self->input_id); if($input_hash and $input_hash->{'peps'} and $input_hash->{'blast'}) { my $conditionLogicName = $input_hash->{'peps'}; my $goalLogicName = $input_hash->{'blast'}; print("create rule $conditionLogicName => $goalLogicName\n"); my $conditionAnalysis = $self->db->get_AnalysisAdaptor->fetch_by_logic_name($conditionLogicName); my $goalAnalysis = $self->db->get_AnalysisAdaptor->fetch_by_logic_name($goalLogicName); $self->linkSubmitBlastPair($conditionAnalysis, $goalAnalysis); } else { $self->createAllBlastRules(); } return 1; } ################################## # # subroutines # ################################## sub createAllBlastRules { my $self = shift; my $analysisList = $self->db->get_AnalysisAdaptor->fetch_all(); my @submitList; my @blastList; foreach my $submitAnalysis (@{$analysisList}) { next unless($submitAnalysis->logic_name =~ /SubmitPep_(.*)/); my $blast_name = "blast_".$1; printf("found submit %s\n", $submitAnalysis->logic_name); push @submitList, $submitAnalysis; $self->db->get_AnalysisCtrlRuleAdaptor->create_rule($submitAnalysis, $self->{'cr_analysis'}); my $blastAnalysis = $self->db->get_AnalysisAdaptor->fetch_by_logic_name($blast_name); if($blastAnalysis) { push @blastList, $blastAnalysis; $self->db->get_AnalysisCtrlRuleAdaptor->create_rule($blastAnalysis, $self->{'cr_analysis'}); } } foreach my $submitAnalysis (@submitList) { foreach my $blastAnalysis (@blastList) { if (!$self->{'selfBlast'}) { my ($submit_id) = $submitAnalysis->logic_name =~ /SubmitPep_(.*)/; my ($blast_id) = $blastAnalysis->logic_name =~ /blast_(.*)/; next if ($submit_id eq $blast_id); } # If it uses BlastcomparaPepAcross, we only create one Blast job 1 # job only across all the sps in 'species_set' in PAFCluster # instead of a job per sp. This is to avoid creating an # exponentially large ((n*n-1)/2) number of jobs that collapses # the analysis_job table. if ($blastAnalysis->module eq 'Bio::EnsEMBL::Compara::RunnableDB::BlastComparaPepAcross') { my ($submit_id) = $submitAnalysis->logic_name =~ /SubmitPep_(.*)/; my ($blast_id) = $blastAnalysis->logic_name =~ /blast_(.*)/; next unless ($submit_id eq $blast_id); } $self->linkSubmitBlastPair($submitAnalysis, $blastAnalysis); } } } sub linkSubmitBlastPair { my $self = shift; my $conditionAnalysis = shift; my $goalAnalysis = shift; printf("link %s => %s\n", $conditionAnalysis->logic_name, $goalAnalysis->logic_name); if($self->db->get_DataflowRuleAdaptor->create_rule($conditionAnalysis, $goalAnalysis)) { printf("reset_all_jobs_for_analysis %s\n", $conditionAnalysis->logic_name); $self->db->get_AnalysisJobAdaptor->reset_all_jobs_for_analysis_id($conditionAnalysis->dbID); } } 1;