Bio::EnsEMBL::Compara::RunnableDB GenomeLoadMembers
SummaryIncluded librariesPackage variablesSynopsisDescriptionGeneral documentationMethods
Toolbar
WebCvsRaw content
Summary
Bio::EnsEMBL::Compara::RunnableDB::GenomeLoadMembers
Package variables
No package variables defined.
Included modules
Bio::EnsEMBL::Compara::DBSQL::DBAdaptor
Bio::EnsEMBL::Compara::Homology
Bio::EnsEMBL::Compara::Member(1)
Bio::EnsEMBL::Compara::Member(2)
Bio::EnsEMBL::Compara::Subset
Bio::EnsEMBL::DBLoader
Bio::EnsEMBL::DBSQL::DBAdaptor
Bio::EnsEMBL::Hive::Process
Bio::EnsEMBL::Pipeline::DBSQL::DBAdaptor
Bio::EnsEMBL::Utils::Exception
Inherit
Bio::EnsEMBL::Hive::Process
Synopsis
my $db = Bio::EnsEMBL::Compara::DBAdaptor->new($locator);
my $g_load_members = Bio::EnsEMBL::Compara::RunnableDB::GenomeLoadMembers->new (
-db => $db,
-input_id => $input_id
-analysis => $analysis );
$g_load_members->fetch_input(); #reads from DB
$g_load_members->run();
$g_load_members->output();
$g_load_members->write_output(); #writes to DB
Description
This object wraps Bio::EnsEMBL::Pipeline::Runnable::Blast to add
functionality to read and write to databases.
The appropriate Bio::EnsEMBL::Analysis object must be passed for
extraction of appropriate parameters. A Bio::EnsEMBL::Pipeline::DBSQL::Obj is
required for databse access.
Methods
fasta_description
No description
Code
fetch_inputDescriptionCode
loadMembersFromCoreSlices
No description
Code
run
No description
Code
store_gene_and_all_transcripts
No description
Code
write_output
No description
Code
Methods description
fetch_inputcode    nextTop
    Title   :   fetch_input
Usage : $self->fetch_input
Function: prepares global variables and DB connections
Returns : none
Args : none
Methods code
fasta_descriptiondescriptionprevnextTop
sub fasta_description {
  my ($self, $gene, $transcript) = @_;

  my $description = "Transcript:" . $transcript->stable_id .
                    " Gene:" .      $gene->stable_id .
                    " Chr:" .       $gene->seq_region_name .
                    " Start:" .     $gene->seq_region_start .
                    " End:" .       $gene->seq_region_end;
  return $description;
}


1;
}
fetch_inputdescriptionprevnextTop
sub fetch_input {
  my( $self) = @_;

  $self->throw("No input_id") unless defined($self->input_id);
  print("input_id = ".$self->input_id."\n");
  $self->throw("Improper formated input_id") unless ($self->input_id =~ /{/);

  my $input_hash = eval($self->input_id);
  my $genome_db_id = $input_hash->{'gdb'};
  print("gdb = $genome_db_id\n");
  $self->throw("No genome_db_id in input_id") unless defined($genome_db_id);
  if($input_hash->{'pseudo_stableID_prefix'}) {
    $self->{'pseudo_stableID_prefix'} = $input_hash->{'pseudo_stableID_prefix'};
  }
  
  #create a Compara::DBAdaptor which shares the same DBI handle
#with the Pipeline::DBAdaptor that is based into this runnable
$self->{'comparaDBA'} = Bio::EnsEMBL::Compara::DBSQL::DBAdaptor->new(-DBCONN => $self->db->dbc); #get the Compara::GenomeDB object for the genome_db_id
$self->{'genome_db'} = $self->{'comparaDBA'}->get_GenomeDBAdaptor->fetch_by_dbID($genome_db_id); #using genome_db_id, connect to external core database
$self->{'coreDBA'} = $self->{'genome_db'}->db_adaptor(); $self->throw("Can't connect to genome database for id=$genome_db_id") unless($self->{'coreDBA'}); #global boolean control value (whether the genes are also stored as members)
$self->{'store_genes'} = 1; $self->{'verbose'} = 0; #variables for tracking success of process
$self->{'sliceCount'} = 0; $self->{'geneCount'} = 0; $self->{'realGeneCount'} = 0; $self->{'transcriptCount'} = 0; $self->{'longestCount'} = 0; return 1;
}
loadMembersFromCoreSlicesdescriptionprevnextTop
sub loadMembersFromCoreSlices {
  my $self = shift;

  #create subsets for the gene members, and the longest peptide members
$self->{'pepSubset'} = Bio::EnsEMBL::Compara::Subset->new( -name=>"gdb:".$self->{'genome_db'}->dbID ." ". $self->{'genome_db'}->name . ' longest translations'); $self->{'geneSubset'} = Bio::EnsEMBL::Compara::Subset->new( -name=>"gdb:".$self->{'genome_db'}->dbID ." ". $self->{'genome_db'}->name . ' genes'); $self->{'comparaDBA'}->get_SubsetAdaptor->store($self->{'pepSubset'}); $self->{'comparaDBA'}->get_SubsetAdaptor->store($self->{'geneSubset'}); #from core database, get all slices, and then all genes in slice
#and then all transcripts in gene to store as members in compara
my @slices = @{$self->{'coreDBA'}->get_SliceAdaptor->fetch_all('toplevel')}; print("fetched ",scalar(@slices), " slices to load from\n"); throw("problem: no toplevel slices") unless(scalar(@slices)); SLICE: foreach my $slice (@slices) { $self->{'sliceCount'}++; #print("slice " . $slice->name . "\n");
foreach my $gene (@{$slice->get_all_Genes}) { $self->{'geneCount'}++; # LV and C are for the Ig/TcR family, which rearranges
# somatically so is considered as a different biotype in EnsEMBL
# D and J are very short or have no translation at all
if (lc($gene->biotype) eq 'protein_coding' || lc($gene->biotype) eq 'IG_V_gene' || lc($gene->biotype) eq 'IG_C_gene' || lc($gene->biotype) eq 'C_segment' || lc($gene->biotype) eq 'V_segment') { $self->{'realGeneCount'}++; $self->store_gene_and_all_transcripts($gene); } # if($self->{'transcriptCount'} >= 100) { last SLICE; }
# if($self->{'geneCount'} >= 1000) { last SLICE; }
} # last SLICE;
} print("loaded ".$self->{'sliceCount'}." slices\n"); print(" ".$self->{'geneCount'}." genes\n"); print(" ".$self->{'realGeneCount'}." real genes\n"); print(" ".$self->{'transcriptCount'}." transscripts\n"); print(" ".$self->{'longestCount'}." longest transcripts\n"); print(" ".$self->{'pepSubset'}->count()." in Subset\n");
}
rundescriptionprevnextTop
sub run {
  my $self = shift;

  $self->{'comparaDBA'}->dbc->disconnect_when_inactive(0);
  $self->{'coreDBA'}->dbc->disconnect_when_inactive(0);

  # main routine which takes a genome_db_id (from input_id) and
# access the ensembl_core database, useing the SliceAdaptor
# it will load all slices, all genes, and all transscripts
# and convert them into members to be stored into compara
$self->loadMembersFromCoreSlices(); $self->{'comparaDBA'}->dbc->disconnect_when_inactive(1); $self->{'coreDBA'}->dbc->disconnect_when_inactive(1); return 1;
}
store_gene_and_all_transcriptsdescriptionprevnextTop
sub store_gene_and_all_transcripts {
  my $self = shift;
  my $gene = shift;
  
  my @longestPeptideMember;
  my $maxLength=0;
  my $gene_member;
  my $gene_member_not_stored = 1;

  my $MemberAdaptor = $self->{'comparaDBA'}->get_MemberAdaptor();

  if(defined($self->{'pseudo_stableID_prefix'})) {
    $gene->stable_id($self->{'pseudo_stableID_prefix'} ."G_". $gene->dbID);
  }

  foreach my $transcript (@{$gene->get_all_Transcripts}) {
    unless (defined $transcript->translation) {
      warn("COREDB error: No translation for transcript ", $transcript->stable_id, "(dbID=",$transcript->dbID.")\n");
      next;
    }
#    This test might be useful to put here, thus avoiding to go further in trying to get a peptide
# my $next = 0;
# try {
# $transcript->translate;
# } catch {
# warn("COREDB error: transcript does not translate", $transcript->stable_id, "(dbID=",$transcript->dbID.")\n");
# $next = 1;
# };
# next if ($next);
my $translation = $transcript->translation; if(defined($self->{'pseudo_stableID_prefix'})) { $transcript->stable_id($self->{'pseudo_stableID_prefix'} ."T_". $transcript->dbID); $translation->stable_id($self->{'pseudo_stableID_prefix'} ."P_". $translation->dbID); } $self->{'transcriptCount'}++; #print("gene " . $gene->stable_id . "\n");
print(" transcript " . $transcript->stable_id ) if($self->{'verbose'}); unless (defined $translation->stable_id) { throw("COREDB error: does not contain translation stable id for translation_id ". $translation->dbID."\n"); next; } my $description = $self->fasta_description($gene, $transcript); my $pep_member = Bio::EnsEMBL::Compara::Member->new_from_transcript( -transcript=>$transcript, -genome_db=>$self->{'genome_db'}, -translate=>'yes', -description=>$description); print(" => member " . $pep_member->stable_id) if($self->{'verbose'}); unless($pep_member->sequence) { print(" => NO SEQUENCE!\n") if($self->{'verbose'}); next; } print(" len=",$pep_member->seq_length ) if($self->{'verbose'}); # store gene_member here only if at least one peptide is to be loaded for
# the gene.
if($self->{'store_genes'} && $gene_member_not_stored) { print(" gene " . $gene->stable_id ) if($self->{'verbose'}); $gene_member = Bio::EnsEMBL::Compara::Member->new_from_gene( -gene=>$gene, -genome_db=>$self->{'genome_db'}); print(" => member " . $gene_member->stable_id) if($self->{'verbose'}); eval { $MemberAdaptor->store($gene_member); print(" : stored") if($self->{'verbose'}); }; $self->{'geneSubset'}->add_member($gene_member); print("\n") if($self->{'verbose'}); $gene_member_not_stored = 0; } $MemberAdaptor->store($pep_member); $MemberAdaptor->store_gene_peptide_link($gene_member->dbID, $pep_member->dbID); print(" : stored\n") if($self->{'verbose'}); if($pep_member->seq_length > $maxLength) { $maxLength = $pep_member->seq_length; @longestPeptideMember = ($transcript, $pep_member); } } if(@longestPeptideMember) { my ($transcript, $member) = @longestPeptideMember; $self->{'pepSubset'}->add_member($member); $self->{'longestCount'}++; # print(" LONGEST " . $transcript->stable_id . "\n");
}
}
write_outputdescriptionprevnextTop
sub write_output {
  my $self = shift;

  my $output_id = "{'gdb'=>" . $self->{'genome_db'}->dbID .
                  ",'ss'=>" . $self->{'pepSubset'}->dbID . "}";
  $self->input_job->input_id($output_id);
  return 1;
}


######################################
#
# subroutines
#
#####################################
}
General documentation
CONTACTTop
Describe contact details here
APPENDIXTop
The rest of the documentation details each of the object methods.
Internal methods are usually preceded with a _