Bio::EnsEMBL::IdMapping
Archiver
Toolbar
Summary
Bio::EnsEMBL::IdMapping::Archiver - create gene_archive and peptide_archive
Package variables
Privates (from "my" definitions)
$pa_id;
Included modules
Digest::MD5 qw ( md5_hex )
Inherit
Synopsis
my $archiver = Bio::EnsEMBL::IdMapping::Archiver->new(
-LOGGER => $logger,
-CONF => $conf,
-CACHE => $cache
);
# create gene and peptide archive
$archiver->create_archive($mapping_session_id);
# dump existing archive tables to file
my $num_entries =
$archiver->dump_table_to_file( 'source', 'gene_archive',
'gene_archive_existing.txt', 1 );
Description
This module creates the gene_archive and peptide_archive
tables. Data is written to a file as tab-delimited text for
loading into a MySQL database (this can be done manually, or using
StableIdmapper->upload_file_into_table()).
An archive entry for a given source gene is created if no target
gene exists, or if any of its transcripts or their translations
changed. Non-coding transcripts only have an entry in gene_archive (i.e.
without a corresponding peptide_archive entry).
Methods
Methods description
Arg[1] : Int $mapping_session_id - the mapping_session_id for this run Example : $archiver->create_archive($stable_id_mapper->mapping_session_id); Description : Creates the gene_archive and peptide_archive tables and writes the data to a tab-delimited file. The decision as to what to archive is deferred to dump_gene(), see documentation there for details. Return type : none Exceptions : Thrown on missing argument. Caller : id_mapping.pl Status : At Risk : under development |
Arg[1] : Bio::EnsEMBL::IdMapping::TinyGene $s_gene - source gene Arg[2] : Bio::EnsEMBL::IdMapping::TinyGene $t_gene - target gene Arg[3] : Filehandle $ga_fh - filehandle for writing gene_archive data Arg[4] : Filehandle $pa_fh - filehandle for writing peptide_archive data Example : my $target_gene = $gene_mappings{$source_gene->stable_id}; $archiver->dump_gene($source_gene, $target_gene, $ga_fh, $pa_fh); Description : Given a source gene, it will write a gene_achive and peptide_achive entry for it if no target gene exists, or if any of its transcripts or their translation changed. Return type : none Exceptions : none Caller : create_archive() Status : At Risk : under development |
Arg[1] : Bio::EnsEMBL::IdMapping::TinyGene $gene - gene to archive Arg[2] : Bio::EnsEMBL::IdMapping::TinyTrancript $tr - its transcript Arg[3] : Filehandle $ga_fh - filehandle for writing gene_archive data Example : $archive->dump_nc_row($s_gene, $s_tr, $ga_fh); Description : Writes an entry line for gene_archive for non-coding transcripts. Return type : none Exceptions : none Caller : dump_gene() Status : At Risk : under development |
Arg[1] : Bio::EnsEMBL::IdMapping::TinyGene $gene - gene to archive Arg[2] : Bio::EnsEMBL::IdMapping::TinyTrancript $tr - its transcript Arg[3] : Bio::EnsEMBL::IdMapping::TinyTranslation $tl - its translation Arg[4] : Filehandle $ga_fh - filehandle for writing gene_archive data Arg[5] : Filehandle $pa_fh - filehandle for writing peptide_archive data Example : $archive->dump_tuple($s_gene, $s_tr, $s_tl, $ga_fh, $pa_fh); Description : Writes entry lines for gene_archive and peptide_archive. Return type : none Exceptions : none Caller : dump_gene() Status : At Risk : under development |
Arg[1] : (optional) Int - mapping_session_id to set Example : my $msi = $archiver->mapping_session_id; Description : Getter/setter for mapping_session_id. Return type : Int Exceptions : none Caller : create_archive() Status : At Risk : under development |
Methods code
sub create_archive
{ my $self = shift;
my $mapping_session_id = shift;
unless ($mapping_session_id) {
$self->logger->warning("No mapping_session_id set.");
}
$self->mapping_session_id($mapping_session_id);
my $ga_fh = $self->get_filehandle('gene_archive_new.txt', 'tables');
my $pa_fh = $self->get_filehandle('peptide_archive_new.txt', 'tables');
my $s_dba = $self->cache->get_DBAdaptor('source');
my $s_dbh = $s_dba->dbc->db_handle;
my $sql = qq(SELECT MAX(peptide_archive_id) FROM peptide_archive);
$pa_id = $self->fetch_value_from_db($s_dbh, $sql);
unless ($pa_id) {
$self->logger->warning("No max(peptide_archive_id) found in db.\n", 1);
$self->logger->info("That's ok if this is the first stable ID mapping for this species.\n", 1);
}
$pa_id++;
$self->logger->debug("Starting with peptide_archive_id $pa_id.\n");
my %target_genes = map { $_->stable_id => $_ }
values %{ $self->cache->get_by_name("genes_by_id", 'target') };
foreach my $source_gene (values %{ $self->cache->get_by_name("genes_by_id",
'source') }) {
$self->dump_gene($source_gene, $target_genes{$source_gene->stable_id},
$ga_fh, $pa_fh);
}
close($ga_fh);
close($pa_fh); } |
sub dump_gene
{ my ($self, $s_gene, $t_gene, $ga_fh, $pa_fh) = @_;
my $is_ncRNA = 0;
$is_ncRNA = 1 if ($s_gene->biotype =~ /RNA/);
foreach my $s_tr (@{ $s_gene->get_all_Transcripts }) {
my $s_tl = $s_tr->translation;
if ($s_tl) {
if (! $t_gene) {
$self->dump_tuple($s_gene, $s_tr, $s_tl, $ga_fh, $pa_fh);
} else {
my $changed_flag = 1;
foreach my $t_tr (@{ $t_gene->get_all_Transcripts }) {
my $t_tl = $t_tr->translation;
next unless ($t_tl);
if (($s_tr->stable_id eq $t_tr->stable_id) and
($s_tl->stable_id eq $t_tl->stable_id) and
($s_tl->seq eq $t_tl->seq)) {
$changed_flag = 0;
}
}
if ($changed_flag) {
$self->dump_tuple($s_gene, $s_tr, $s_tl, $ga_fh, $pa_fh);
}
}
} elsif ($is_ncRNA) {
if (! $t_gene) {
$self->dump_nc_row($s_gene, $s_tr, $ga_fh);
} else {
my $changed_flag = 1;
foreach my $t_tr (@{ $t_gene->get_all_Transcripts }) {
$changed_flag = 0 if ($s_tr->stable_id eq $t_tr->stable_id);
}
if ($changed_flag) {
$self->dump_nc_row($s_gene, $s_tr, $ga_fh);
}
}
}
} } |
sub dump_nc_row
{ my ($self, $gene, $tr, $ga_fh) = @_;
print $ga_fh join("\t",
$gene->stable_id,
$gene->version,
$tr->stable_id,
$tr->version,
'\N',
'\N',
'\N',
$self->mapping_session_id
);
print $ga_fh "\n"; } |
sub dump_tuple
{ my ($self, $gene, $tr, $tl, $ga_fh, $pa_fh) = @_;
print $ga_fh join("\t",
$gene->stable_id,
$gene->version,
$tr->stable_id,
$tr->version,
$tl->stable_id,
$tl->version,
$pa_id,
$self->mapping_session_id
);
print $ga_fh "\n";
my $pep_seq = $tl->seq;
print $pa_fh join("\t", $pa_id, md5_hex($pep_seq), $pep_seq);
print $pa_fh "\n";
$pa_id++; } |
sub mapping_session_id
{ my $self = shift;
$self->{'_mapping_session_id'} = shift if (@_);
return $self->{'_mapping_session_id'};
}
1; } |
General documentation
Copyright (c) 1999-2009 The European Bioinformatics Institute and
Genome Research Limited. All rights reserved.
This software is distributed under a modified Apache license.
For license details, please see
/info/about/code_licence.html