Bio::EnsEMBL::Compara::RunnableDB
GenomeSubmitPep
Toolbar
Summary
Bio::EnsEMBL::Compara::RunnableDB::GenomeSubmitPep
Package variables
No package variables defined.
Included modules
Inherit
Synopsis
my $db = Bio::EnsEMBL::Compara::DBAdaptor->new($locator);
my $repmask = Bio::EnsEMBL::Compara::RunnableDB::GenomeSubmitPep->new (
-db => $db,
-input_id => $input_id
-analysis => $analysis );
$repmask->fetch_input(); #reads from DB
$repmask->run();
$repmask->output();
$repmask->write_output(); #writes to DB
Description
Process module which takes the member peptides defined in a subset and genome_db
passed in the input_id and creates an new analysis and fills it with these peptides
as jobs to be flowed into the Blast analyses.
Methods
createSubmitPepAnalysis | No description | Code |
create_peptide_align_feature_table | No description | Code |
fetch_input | Description | Code |
getSubsetIdForGenomeDBId | No description | Code |
run | No description | Code |
write_output | No description | Code |
Methods description
Title : fetch_input Usage : $self->fetch_input Function: Fetches input data for repeatmasker from the database Returns : none Args : none |
Methods code
createSubmitPepAnalysis | description | prev | next | Top |
sub createSubmitPepAnalysis
{ my $self = shift;
my $subset = shift;
if (!UNIVERSAL::isa($subset, "Bio::EnsEMBL::Compara::Subset")) {
throw("Calling createSubmitPepAnalysis without a proper subset [$subset]");
}
print("\ncreateSubmitPepAnalysis\n");
my $logic_name = "SubmitPep_" . $self->{'reference_name'};
print(" see if analysis '$logic_name' is in database\n");
my $analysis = $self->db->get_AnalysisAdaptor->fetch_by_logic_name($logic_name);
if($analysis) { print(" YES in database with analysis_id=".$analysis->dbID()); }
unless($analysis) {
print(" NOPE: go ahead and insert\n");
$analysis = Bio::EnsEMBL::Analysis->new(
-db => '',
-db_file => $subset->dump_loc(),
-db_version => '1',
-parameters => "{subset_id=>" . $subset->dbID()."}",
-logic_name => $logic_name,
-module => 'Bio::EnsEMBL::Hive::RunnableDB::Dummy',
);
$self->db->get_AnalysisAdaptor()->store($analysis);
my $stats = $self->{'analysisStatsDBA'}->fetch_by_analysis_id($analysis->dbID);
$stats->batch_size(500);
$stats->hive_capacity(3);
$stats->status('BLOCKED');
$stats->update();
}
my $createRules = $self->db->get_AnalysisAdaptor->fetch_by_logic_name('CreateBlastRules');
$self->db->get_AnalysisCtrlRuleAdaptor->create_rule($createRules, $analysis);
print("store member_id into analysis_job table\n");
my $errorCount=0;
my $tryCount=0;
my @member_id_list = @{$subset->member_id_list()};
print($#member_id_list+1 . " members in subset\n");
foreach my $member_id (@member_id_list) {
Bio::EnsEMBL::Hive::DBSQL::AnalysisJobAdaptor->CreateNewJob (
-input_id => $member_id,
-analysis => $analysis,
-input_job_id => 0,
);
}
print("CREATED all analysis_jobs\n"); } |
create_peptide_align_feature_table | description | prev | next | Top |
sub create_peptide_align_feature_table
{ my ($self, $genome_db) = @_;
my $genome_db_id = $genome_db->dbID;
my $species_name = lc($genome_db->name);
$species_name =~ s/\ /\_/g;
my $table_name = "peptide_align_feature_${species_name}_${genome_db_id}";
my $sql = "CREATE TABLE IF NOT EXISTS $table_name like peptide_align_feature";
my $sth = $self->{'comparaDBA'}->dbc->prepare($sql);
$sth->execute();
$sql = "ALTER TABLE $table_name DISABLE KEYS";
$sth = $self->{'comparaDBA'}->dbc->prepare($sql);
$sth->execute();
$sth->finish();
}
1; } |
sub fetch_input
{ my $self = shift;
$self->throw("No input_id") unless defined($self->input_id);
print("input_id = ".$self->input_id."\n");
$self->throw("Improper formated input_id") unless ($self->input_id =~ /\{/);
my $input_hash = eval($self->input_id);
$self->{'comparaDBA'} = Bio::EnsEMBL::Compara::DBSQL::DBAdaptor->new(-DBCONN => $self->db->dbc);
$self->{'analysisStatsDBA'} = $self->db->get_AnalysisStatsAdaptor;
$self->db->dbc->disconnect_when_inactive(0);
$self->{'comparaDBA'}->dbc->disconnect_when_inactive(0);
my $genome_db_id = $input_hash->{'gdb'};
my $subset_id = $input_hash->{'ss'};
$self->{'reference_name'} = undef;
if(defined($genome_db_id)) {
print("gdb = $genome_db_id\n");
$self->{'genome_db'} = $self->{'comparaDBA'}->get_GenomeDBAdaptor->fetch_by_dbID($genome_db_id);
$self->{'reference_name'} = $self->{'genome_db'}->dbID()."_".$self->{'genome_db'}->assembly();
unless($subset_id) {
$subset_id = $self->getSubsetIdForGenomeDBId($genome_db_id);
}
}
throw("no subset defined, can't figure out which peptides to use\n")
unless(defined($subset_id));
$self->{'pepSubset'} = $self->{'comparaDBA'}->get_SubsetAdaptor()->fetch_by_dbID($subset_id);
unless($self->{'reference_name'}) {
$self->{'reference_name'} = $self->{'pepSubset'}->description;
$self->{'reference_name'} =~ s/\s+/_/g;
}
return 1; } |
sub getSubsetIdForGenomeDBId
{ my $self = shift;
my $genome_db_id = shift;
my @subsetIds = ();
my $subset_id;
my $sql = "SELECT distinct subset.subset_id " .
"FROM member, subset, subset_member " .
"WHERE subset.subset_id=subset_member.subset_id ".
"AND subset.description like '%longest%' ".
"AND member.member_id=subset_member.member_id ".
"AND member.genome_db_id=$genome_db_id;";
my $sth = $self->{'comparaDBA'}->prepare( $sql );
$sth->execute();
$sth->bind_columns( undef,\$ subset_id );
while( $sth->fetch() ) {
print("found subset_id = $subset_id for genome_db_id = $genome_db_id\n");
push @subsetIds, $subset_id;
}
$sth->finish();
if($#subsetIds > 0) {
warn ("Compara DB: more than 1 subset of longest peptides defined for genome_db_id = $genome_db_id\n");
}
if($#subsetIds < 0) {
warn ("Compara DB: no subset of longest peptides defined for genome_db_id = $genome_db_id\n");
}
return $subsetIds[0];
}
} |
sub run
{
my $self = shift;
$self->create_peptide_align_feature_table($self->{'genome_db'});
return 1; } |
sub write_output
{
my $self = shift;
$self->createSubmitPepAnalysis($self->{'pepSubset'});
return 1;
}
} |
General documentation
Describe contact details here
The rest of the documentation details each of the object methods.
Internal methods are usually preceded with a _
eval {
$tryCount++;
$sicDBA->store_input_id_analysis($member_id, #input_id
$analysis,
'gaia', #execution_host
0 #save runtime NO (ie do insert)
);
};
if($@) {
$errorCount++;
if($errorCount>42 && ($errorCount/$tryCount > 0.95)) {
die("too many repeated failed insert attempts, assume will continue for durration. ACK!!\n");
}
} # should handle the error, but ignore for now
}
};