Bio::EnsEMBL::Compara::RunnableDB GenomeSubmitPep
SummaryIncluded librariesPackage variablesSynopsisDescriptionGeneral documentationMethods
Toolbar
WebCvsRaw content
Summary
Bio::EnsEMBL::Compara::RunnableDB::GenomeSubmitPep
Package variables
No package variables defined.
Included modules
Bio::EnsEMBL::Compara::DBSQL::DBAdaptor
Bio::EnsEMBL::DBSQL::DBAdaptor
Bio::EnsEMBL::Hive::DBSQL::AnalysisJobAdaptor
Bio::EnsEMBL::Hive::DBSQL::AnalysisStatsAdaptor
Bio::EnsEMBL::Utils::Exception
Inherit
Bio::EnsEMBL::Hive::Process
Synopsis
my $db = Bio::EnsEMBL::Compara::DBAdaptor->new($locator);
my $repmask = Bio::EnsEMBL::Compara::RunnableDB::GenomeSubmitPep->new (
-db => $db,
-input_id => $input_id
-analysis => $analysis );
$repmask->fetch_input(); #reads from DB
$repmask->run();
$repmask->output();
$repmask->write_output(); #writes to DB
Description
Process module which takes the member peptides defined in a subset and genome_db
passed in the input_id and creates an new analysis and fills it with these peptides
as jobs to be flowed into the Blast analyses.
Methods
createSubmitPepAnalysis
No description
Code
create_peptide_align_feature_table
No description
Code
fetch_inputDescriptionCode
getSubsetIdForGenomeDBId
No description
Code
run
No description
Code
write_output
No description
Code
Methods description
fetch_inputcode    nextTop
    Title   :   fetch_input
Usage : $self->fetch_input
Function: Fetches input data for repeatmasker from the database
Returns : none
Args : none
Methods code
createSubmitPepAnalysisdescriptionprevnextTop
sub createSubmitPepAnalysis {
  my $self    = shift;
  my $subset  = shift;

  if (!UNIVERSAL::isa($subset, "Bio::EnsEMBL::Compara::Subset")) {
    throw("Calling createSubmitPepAnalysis without a proper subset [$subset]");
  }
  
  print("\ncreateSubmitPepAnalysis\n");
  
  my $logic_name = "SubmitPep_" . $self->{'reference_name'};

  print("  see if analysis '$logic_name' is in database\n");
  my $analysis =  $self->db->get_AnalysisAdaptor->fetch_by_logic_name($logic_name);
  if($analysis) { print("  YES in database with analysis_id=".$analysis->dbID()); }

  unless($analysis) {
    print("  NOPE: go ahead and insert\n");
    $analysis = Bio::EnsEMBL::Analysis->new(
        -db              => '',
        -db_file         => $subset->dump_loc(),
        -db_version      => '1',
        -parameters      => "{subset_id=>" . $subset->dbID()."}",
        -logic_name      => $logic_name,
        -module          => 'Bio::EnsEMBL::Hive::RunnableDB::Dummy',
      );
    $self->db->get_AnalysisAdaptor()->store($analysis);

    my $stats = $self->{'analysisStatsDBA'}->fetch_by_analysis_id($analysis->dbID);
    $stats->batch_size(500);
    $stats->hive_capacity(3);
    $stats->status('BLOCKED');
    $stats->update();   
  }

  # create unblocking rules from CreateBlastRules to this new analysis
my $createRules = $self->db->get_AnalysisAdaptor->fetch_by_logic_name('CreateBlastRules'); $self->db->get_AnalysisCtrlRuleAdaptor->create_rule($createRules, $analysis); #my $host = hostname();
print("store member_id into analysis_job table\n"); my $errorCount=0; my $tryCount=0; my @member_id_list = @{$subset->member_id_list()}; print($#member_id_list+1 . " members in subset\n"); foreach my $member_id (@member_id_list) { Bio::EnsEMBL::Hive::DBSQL::AnalysisJobAdaptor->CreateNewJob ( -input_id => $member_id, -analysis => $analysis, -input_job_id => 0, ); } print("CREATED all analysis_jobs\n");
}
create_peptide_align_feature_tabledescriptionprevnextTop
sub create_peptide_align_feature_table {
  my ($self, $genome_db) = @_;

  my $genome_db_id = $genome_db->dbID;
  my $species_name = lc($genome_db->name);
  $species_name =~ s/\ /\_/g;
  my $table_name = "peptide_align_feature_${species_name}_${genome_db_id}";
  my $sql = "CREATE TABLE IF NOT EXISTS $table_name like peptide_align_feature";

  my $sth = $self->{'comparaDBA'}->dbc->prepare($sql);
  $sth->execute();

  # Disable keys makes inserts faster
$sql = "ALTER TABLE $table_name DISABLE KEYS"; $sth = $self->{'comparaDBA'}->dbc->prepare($sql); $sth->execute(); $sth->finish(); } 1;
}
fetch_inputdescriptionprevnextTop
sub fetch_input {
  my $self = shift;

  $self->throw("No input_id") unless defined($self->input_id);
  print("input_id = ".$self->input_id."\n");
  $self->throw("Improper formated input_id") unless ($self->input_id =~ /\{/);
  my $input_hash = eval($self->input_id);
  
  #create a Compara::DBAdaptor which shares the same DBConnection as $self->db
$self->{'comparaDBA'} = Bio::EnsEMBL::Compara::DBSQL::DBAdaptor->new(-DBCONN => $self->db->dbc); $self->{'analysisStatsDBA'} = $self->db->get_AnalysisStatsAdaptor; $self->db->dbc->disconnect_when_inactive(0); $self->{'comparaDBA'}->dbc->disconnect_when_inactive(0); my $genome_db_id = $input_hash->{'gdb'}; my $subset_id = $input_hash->{'ss'}; $self->{'reference_name'} = undef; if(defined($genome_db_id)) { print("gdb = $genome_db_id\n"); #get the Compara::GenomeDB object for the genome_db_id
$self->{'genome_db'} = $self->{'comparaDBA'}->get_GenomeDBAdaptor->fetch_by_dbID($genome_db_id); $self->{'reference_name'} = $self->{'genome_db'}->dbID()."_".$self->{'genome_db'}->assembly(); unless($subset_id) { # get the subset of 'longest transcripts' for this genome_db_id
$subset_id = $self->getSubsetIdForGenomeDBId($genome_db_id); } } throw("no subset defined, can't figure out which peptides to use\n") unless(defined($subset_id)); $self->{'pepSubset'} = $self->{'comparaDBA'}->get_SubsetAdaptor()->fetch_by_dbID($subset_id); unless($self->{'reference_name'}) { $self->{'reference_name'} = $self->{'pepSubset'}->description; $self->{'reference_name'} =~ s/\s+/_/g; } return 1;
}
getSubsetIdForGenomeDBIddescriptionprevnextTop
sub getSubsetIdForGenomeDBId {
  my $self         = shift;
  my $genome_db_id = shift;

  my @subsetIds = ();
  my $subset_id;

  my $sql = "SELECT distinct subset.subset_id " .
            "FROM member, subset, subset_member " .
            "WHERE subset.subset_id=subset_member.subset_id ".
            "AND subset.description like '%longest%' ".
            "AND member.member_id=subset_member.member_id ".
            "AND member.genome_db_id=$genome_db_id;";
  my $sth = $self->{'comparaDBA'}->prepare( $sql );
  $sth->execute();

  $sth->bind_columns( undef,\$ subset_id );
  while( $sth->fetch() ) {
    print("found subset_id = $subset_id for genome_db_id = $genome_db_id\n");
    push @subsetIds, $subset_id;
  }
  $sth->finish();

  if($#subsetIds > 0) {
    warn ("Compara DB: more than 1 subset of longest peptides defined for genome_db_id = $genome_db_id\n");
  }
  if($#subsetIds < 0) {
    warn ("Compara DB: no subset of longest peptides defined for genome_db_id = $genome_db_id\n");
  }

  return $subsetIds[0];
}


# working from the longest peptide subset, create an analysis of
# with logic_name 'SubmitPep_<taxon_id>_<assembly>'
# with type MemberPep and fill the input_id_analysis table where
# input_id is the member_id of a peptide and the analysis_id
# is the above mentioned analysis
#
# This creates the starting point for the blasts (members against database)
}
rundescriptionprevnextTop
sub run {
  my $self = shift;
  $self->create_peptide_align_feature_table($self->{'genome_db'});
  return 1;
}
write_outputdescriptionprevnextTop
sub write_output {
  my $self = shift;

  # working from the longest peptide subset, create an analysis of
# with logic_name 'SubmitPep_<taxon_id>_<assembly>'
# with type MemberPep and fill the input_id_analysis table where
# input_id is the member_id of a peptide and the analysis_id
# is the above mentioned analysis
#
# This creates the starting point for the blasts (members against database)
$self->createSubmitPepAnalysis($self->{'pepSubset'}); return 1; } ##################################
#
# subroutines
#
##################################
}
General documentation
CONTACTTop
Describe contact details here
APPENDIXTop
The rest of the documentation details each of the object methods.
Internal methods are usually preceded with a _
Top
      eval {
$tryCount++;
$sicDBA->store_input_id_analysis($member_id, #input_id
$analysis,
'gaia', #execution_host
0 #save runtime NO (ie do insert)
);
};
if($@) {
$errorCount++;
if($errorCount>42 && ($errorCount/$tryCount > 0.95)) {
die("too many repeated failed insert attempts, assume will continue for durration. ACK!!\n");
}
} # should handle the error, but ignore for now
}
};