Bio::EnsEMBL::Compara::RunnableDB
GenomeLoadMembers
Toolbar
Summary
Bio::EnsEMBL::Compara::RunnableDB::GenomeLoadMembers
Package variables
No package variables defined.
Included modules
Bio::EnsEMBL::Compara::Member(1)
Bio::EnsEMBL::Compara::Member(2)
Inherit
Synopsis
my $db = Bio::EnsEMBL::Compara::DBAdaptor->new($locator);
my $g_load_members = Bio::EnsEMBL::Compara::RunnableDB::GenomeLoadMembers->new (
-db => $db,
-input_id => $input_id
-analysis => $analysis );
$g_load_members->fetch_input(); #reads from DB
$g_load_members->run();
$g_load_members->output();
$g_load_members->write_output(); #writes to DB
Description
This object wraps Bio::EnsEMBL::Pipeline::Runnable::Blast to add
functionality to read and write to databases.
The appropriate Bio::EnsEMBL::Analysis object must be passed for
extraction of appropriate parameters. A Bio::EnsEMBL::Pipeline::DBSQL::Obj is
required for databse access.
Methods
fasta_description | No description | Code |
fetch_input | Description | Code |
loadMembersFromCoreSlices | No description | Code |
run | No description | Code |
store_gene_and_all_transcripts | No description | Code |
write_output | No description | Code |
Methods description
Title : fetch_input Usage : $self->fetch_input Function: prepares global variables and DB connections Returns : none Args : none |
Methods code
fasta_description | description | prev | next | Top |
sub fasta_description
{ my ($self, $gene, $transcript) = @_;
my $description = "Transcript:" . $transcript->stable_id .
" Gene:" . $gene->stable_id .
" Chr:" . $gene->seq_region_name .
" Start:" . $gene->seq_region_start .
" End:" . $gene->seq_region_end;
return $description;
}
1; } |
sub fetch_input
{ my( $self) = @_;
$self->throw("No input_id") unless defined($self->input_id);
print("input_id = ".$self->input_id."\n");
$self->throw("Improper formated input_id") unless ($self->input_id =~ /{/);
my $input_hash = eval($self->input_id);
my $genome_db_id = $input_hash->{'gdb'};
print("gdb = $genome_db_id\n");
$self->throw("No genome_db_id in input_id") unless defined($genome_db_id);
if($input_hash->{'pseudo_stableID_prefix'}) {
$self->{'pseudo_stableID_prefix'} = $input_hash->{'pseudo_stableID_prefix'};
}
$self->{'comparaDBA'} = Bio::EnsEMBL::Compara::DBSQL::DBAdaptor->new(-DBCONN => $self->db->dbc);
$self->{'genome_db'} = $self->{'comparaDBA'}->get_GenomeDBAdaptor->fetch_by_dbID($genome_db_id);
$self->{'coreDBA'} = $self->{'genome_db'}->db_adaptor();
$self->throw("Can't connect to genome database for id=$genome_db_id") unless($self->{'coreDBA'});
$self->{'store_genes'} = 1;
$self->{'verbose'} = 0;
$self->{'sliceCount'} = 0;
$self->{'geneCount'} = 0;
$self->{'realGeneCount'} = 0;
$self->{'transcriptCount'} = 0;
$self->{'longestCount'} = 0;
return 1; } |
sub loadMembersFromCoreSlices
{
my $self = shift;
$self->{'pepSubset'} = Bio::EnsEMBL::Compara::Subset->new(
-name=>"gdb:".$self->{'genome_db'}->dbID ." ". $self->{'genome_db'}->name . ' longest translations');
$self->{'geneSubset'} = Bio::EnsEMBL::Compara::Subset->new(
-name=>"gdb:".$self->{'genome_db'}->dbID ." ". $self->{'genome_db'}->name . ' genes');
$self->{'comparaDBA'}->get_SubsetAdaptor->store($self->{'pepSubset'});
$self->{'comparaDBA'}->get_SubsetAdaptor->store($self->{'geneSubset'});
my @slices = @{$self->{'coreDBA'}->get_SliceAdaptor->fetch_all('toplevel')};
print("fetched ",scalar(@slices), " slices to load from\n");
throw("problem: no toplevel slices") unless(scalar(@slices));
SLICE: foreach my $slice (@slices) {
$self->{'sliceCount'}++;
foreach my $gene (@{$slice->get_all_Genes}) {
$self->{'geneCount'}++;
if (lc($gene->biotype) eq 'protein_coding' ||
lc($gene->biotype) eq 'IG_V_gene' ||
lc($gene->biotype) eq 'IG_C_gene' ||
lc($gene->biotype) eq 'C_segment' ||
lc($gene->biotype) eq 'V_segment') {
$self->{'realGeneCount'}++;
$self->store_gene_and_all_transcripts($gene);
}
}
}
print("loaded ".$self->{'sliceCount'}." slices\n");
print(" ".$self->{'geneCount'}." genes\n");
print(" ".$self->{'realGeneCount'}." real genes\n");
print(" ".$self->{'transcriptCount'}." transscripts\n");
print(" ".$self->{'longestCount'}." longest transcripts\n");
print(" ".$self->{'pepSubset'}->count()." in Subset\n"); } |
sub run
{
my $self = shift;
$self->{'comparaDBA'}->dbc->disconnect_when_inactive(0);
$self->{'coreDBA'}->dbc->disconnect_when_inactive(0);
$self->loadMembersFromCoreSlices();
$self->{'comparaDBA'}->dbc->disconnect_when_inactive(1);
$self->{'coreDBA'}->dbc->disconnect_when_inactive(1);
return 1; } |
sub store_gene_and_all_transcripts
{
my $self = shift;
my $gene = shift;
my @longestPeptideMember;
my $maxLength=0;
my $gene_member;
my $gene_member_not_stored = 1;
my $MemberAdaptor = $self->{'comparaDBA'}->get_MemberAdaptor();
if(defined($self->{'pseudo_stableID_prefix'})) {
$gene->stable_id($self->{'pseudo_stableID_prefix'} ."G_". $gene->dbID);
}
foreach my $transcript (@{$gene->get_all_Transcripts}) {
unless (defined $transcript->translation) {
warn("COREDB error: No translation for transcript ", $transcript->stable_id, "(dbID=",$transcript->dbID.")\n");
next;
}
my $translation = $transcript->translation;
if(defined($self->{'pseudo_stableID_prefix'})) {
$transcript->stable_id($self->{'pseudo_stableID_prefix'} ."T_". $transcript->dbID);
$translation->stable_id($self->{'pseudo_stableID_prefix'} ."P_". $translation->dbID);
}
$self->{'transcriptCount'}++;
print(" transcript " . $transcript->stable_id ) if($self->{'verbose'});
unless (defined $translation->stable_id) {
throw("COREDB error: does not contain translation stable id for translation_id ". $translation->dbID."\n");
next;
}
my $description = $self->fasta_description($gene, $transcript);
my $pep_member = Bio::EnsEMBL::Compara::Member->new_from_transcript(
-transcript=>$transcript,
-genome_db=>$self->{'genome_db'},
-translate=>'yes',
-description=>$description);
print(" => member " . $pep_member->stable_id) if($self->{'verbose'});
unless($pep_member->sequence) {
print(" => NO SEQUENCE!\n") if($self->{'verbose'});
next;
}
print(" len=",$pep_member->seq_length ) if($self->{'verbose'});
if($self->{'store_genes'} && $gene_member_not_stored) {
print(" gene " . $gene->stable_id ) if($self->{'verbose'});
$gene_member = Bio::EnsEMBL::Compara::Member->new_from_gene(
-gene=>$gene,
-genome_db=>$self->{'genome_db'});
print(" => member " . $gene_member->stable_id) if($self->{'verbose'});
eval {
$MemberAdaptor->store($gene_member);
print(" : stored") if($self->{'verbose'});
};
$self->{'geneSubset'}->add_member($gene_member);
print("\n") if($self->{'verbose'});
$gene_member_not_stored = 0;
}
$MemberAdaptor->store($pep_member);
$MemberAdaptor->store_gene_peptide_link($gene_member->dbID, $pep_member->dbID);
print(" : stored\n") if($self->{'verbose'});
if($pep_member->seq_length > $maxLength) {
$maxLength = $pep_member->seq_length;
@longestPeptideMember = ($transcript, $pep_member);
}
}
if(@longestPeptideMember) {
my ($transcript, $member) = @longestPeptideMember;
$self->{'pepSubset'}->add_member($member);
$self->{'longestCount'}++;
} } |
sub write_output
{
my $self = shift;
my $output_id = "{'gdb'=>" . $self->{'genome_db'}->dbID .
",'ss'=>" . $self->{'pepSubset'}->dbID . "}";
$self->input_job->input_id($output_id);
return 1;
}
} |
General documentation
Describe contact details here
The rest of the documentation details each of the object methods.
Internal methods are usually preceded with a _