Bio::EnsEMBL::IdMapping
StableIdMapper
Toolbar
Summary
Package variables
Privates (from "my" definitions)
%debug_mappings;
Included modules
Inherit
Synopsis
Description
Methods
add_stable_id_event | No description | Code |
dump_debug_mappings | No description | Code |
filter_same_gene_transcript_similarities | No description | Code |
generate_mapping_session | No description | Code |
generate_mapping_stats | No description | Code |
generate_similarity_events | No description | Code |
generate_translation_similarity_events | No description | Code |
get_all_stable_id_events | No description | Code |
map_stable_ids | No description | Code |
mapping_session_date | No description | Code |
mapping_session_date_fmt | No description | Code |
mapping_session_id | No description | Code |
new | No description | Code |
stable_id_generator | No description | Code |
write_stable_id_events | No description | Code |
write_stable_ids_to_file | No description | Code |
Methods description
None available.
Methods code
add_stable_id_event | description | prev | next | Top |
sub add_stable_id_event
{ my ($self, $type, $event) = @_;
throw("Need an event type (new|similarity).") unless ($type);
$self->{'stable_id_events'}->{$type}->{$event} = 1; } |
sub dump_debug_mappings
{ my $self = shift;
foreach my $type (keys %debug_mappings) {
$self->logger->debug("Writing $type mappings to debug/${type}_mappings.txt...\n");
my $fh = $self->get_filehandle("${type}_mappings.txt", 'debug');
foreach my $row (@{ $debug_mappings{$type} }) {
print $fh join("\t", @$row);
print $fh "\n";
}
close($fh);
$self->logger->debug("Done.\n");
} } |
filter_same_gene_transcript_similarities | description | prev | next | Top |
sub filter_same_gene_transcript_similarities
{ my $self = shift;
my $transcript_scores = shift;
unless ($transcript_scores and
$transcript_scores->isa('Bio::EnsEMBL::IdMapping::ScoredMappingMatrix')) {
throw('Need a Bio::EnsEMBL::IdMapping::ScoredMappingMatrix of transcripts.');
}
my $filtered_scores = Bio::EnsEMBL::IdMapping::ScoredMappingMatrix->new(
-DUMP_PATH => path_append($self->conf->param('basedir'), 'matrix'),
-CACHE_FILE => 'filtered_transcript_scores.ser',
);
my %all_targets = map { $_->stable_id => 1 }
values %{ $self->cache->get_by_name("transcripts_by_id", 'target') };
my $i = 0;
foreach my $e (@{ $transcript_scores->get_all_Entries }) {
my $s_tr = $self->cache->get_by_key('transcripts_by_id', 'source',
$e->source);
my $s_gene = $self->cache->get_by_key('genes_by_transcript_id', 'source',
$e->source);
my $t_gene = $self->cache->get_by_key('genes_by_transcript_id', 'target',
$e->target);
if (($s_gene->stable_id eq $t_gene->stable_id) and
$all_targets{$s_tr->stable_id}) {
$i++;
next;
}
$filtered_scores->add_Entry($e);
}
$self->logger->debug("Skipped $i same gene transcript mappings.\n");
return $filtered_scores; } |
sub generate_mapping_session
{ my $self = shift;
return if ($self->mapping_session_date);
$self->logger->info("Generating new mapping_session...\n");
$self->mapping_session_date(time);
$self->mapping_session_date_fmt(strftime("%Y-%m-%d %T",
localtime($self->mapping_session_date)));
my $s_dba = $self->cache->get_DBAdaptor('source');
my $s_dbh = $s_dba->dbc->db_handle;
my $t_dba = $self->cache->get_DBAdaptor('target');
my $t_dbh = $t_dba->dbc->db_handle;
my $mapping_session_id = $self->conf->param('mapping_session_id');
if ($mapping_session_id) {
$self->logger->debug("Using manually configured mapping_session_id $mapping_session_id\n", 1);
} else {
my $sql = qq(SELECT MAX(mapping_session_id) FROM mapping_session);
$mapping_session_id = $self->fetch_value_from_db($s_dbh, $sql);
unless ($mapping_session_id) {
$self->logger->debug("No previous mapping_session found.\n", 1);
}
$mapping_session_id++;
$self->logger->debug("Using mapping_session_id $mapping_session_id\n", 1);
}
$self->mapping_session_id($mapping_session_id);
my $i;
my $fh = $self->get_filehandle('mapping_session.txt', 'tables');
my $sth1 = $s_dbh->prepare("SELECT * FROM mapping_session");
$sth1->execute;
while (my @row = $sth1->fetchrow_array) {
$i++;
print $fh join("\t", @row);
print $fh "\n";
}
$sth1->finish;
my $release_sql = qq(
SELECT meta_value FROM meta WHERE meta_key = 'schema_version'
);
my $old_release = $self->fetch_value_from_db($s_dbh, $release_sql);
my $new_release = $self->fetch_value_from_db($t_dbh, $release_sql);
my $assembly_sql = qq(
SELECT meta_value FROM meta WHERE meta_key = 'assembly.default'
);
my $old_assembly = $self->fetch_value_from_db($s_dbh, $assembly_sql);
my $new_assembly = $self->fetch_value_from_db($t_dbh, $assembly_sql);
unless ($old_release and $new_release and $old_assembly and $new_assembly) {
$self->logger->warning("Not all data for new mapping_session found:\n", 1);
$self->logger->info("old_release: $old_release, new_release: $new_release");
$self->logger->info("old_assembly: $old_assembly, new_assembly $new_assembly\n", 2);
}
print $fh join("\t",
$mapping_session_id,
$self->conf->param('sourcedbname'),
$self->conf->param('targetdbname'),
$old_release,
$new_release,
$old_assembly,
$new_assembly,
$self->mapping_session_date_fmt);
print $fh "\n";
close($fh);
$self->logger->info("Done writing ".++$i." mapping_session entries.\n\n"); } |
sub generate_mapping_stats
{ my $self = shift;
my $type = shift;
my $stats = shift;
my $result = ucfirst($type)." mapping results:\n\n";
my $fmt1 = "%-10s%-10s%-10s%-10s\n";
my $fmt2 = "%-10s%6.0f %6.0f %4.2f%%\n";
$result .= sprintf($fmt1, qw(TYPE MAPPED LOST PERCENTAGE));
$result .= ('-'x40)."\n";
my $mapped_total = $stats->{'mapped_known'} + $stats->{'mapped_novel'};
my $lost_total = $stats->{'lost_known'} + $stats->{'lost_novel'};
my $known_total = $stats->{'mapped_known'} + $stats->{'lost_known'};
my $novel_total = $stats->{'mapped_novel'} + $stats->{'lost_novel'};
unless ( $type eq 'exon' ) {
$result .= sprintf( $fmt2,
'known',
$stats->{'mapped_known'},
$stats->{'lost_known'},
($known_total ? $stats->{'mapped_known'}/$known_total*100 : 0) );
$result .= sprintf( $fmt2,
'novel',
$stats->{'mapped_novel'},
$stats->{'lost_novel'},
($novel_total ? $stats->{'mapped_novel'}/$novel_total*100 : 0) ); }
$result .= sprintf($fmt2, 'total', $mapped_total, $lost_total,
$mapped_total/($known_total + $novel_total)*100);
$self->logger->info($result."\n");
my $fh = $self->get_filehandle("${type}_mapping_stats.txt", 'stats');
print $fh $result;
close($fh); } |
sub generate_similarity_events
{ my $self = shift;
my $mappings = shift;
my $scores = shift;
my $type = shift;
unless ($mappings and
$mappings->isa('Bio::EnsEMBL::IdMapping::MappingList')) {
throw('Need a gene Bio::EnsEMBL::IdMapping::MappingList.');
}
unless ($scores and
$scores->isa('Bio::EnsEMBL::IdMapping::ScoredMappingMatrix')) {
throw('Need a Bio::EnsEMBL::IdMapping::ScoredMappingMatrix.');
}
throw("Need a type (gene|transcript|translation).") unless ($type);
my $mapped;
foreach my $e (@{ $mappings->get_all_Entries }) {
$mapped->{'source'}->{$e->source} = 1;
$mapped->{'target'}->{$e->target} = 1;
my @others = @{ $scores->get_Entries_for_target($e->target) };
push @others, @{ $scores->get_Entries_for_source($e->source) };
while (my $e2 = shift(@others)) {
next if (($e->source eq $e2->source) and ($e->target eq $e2->target));
if ($e2->score > ($e->score * 0.98)) {
my $s_obj = $self->cache->get_by_key("${type}s_by_id", 'source',
$e2->source);
my $t_obj = $self->cache->get_by_key("${type}s_by_id", 'target',
$e2->target);
my $key = join("\t",
$s_obj->stable_id,
$s_obj->version,
$t_obj->stable_id,
$t_obj->version,
$self->mapping_session_id,
$type,
$e2->score
);
$self->add_stable_id_event('similarity', $key);
}
}
}
foreach my $dbtype (keys %$mapped) {
my $m1 = "get_all_${dbtype}s";
my $m2 = "get_Entries_for_${dbtype}";
foreach my $id (@{ $scores->$m1 }) {
next if ($mapped->{$dbtype}->{$id});
my @entries = sort { $b->score <=> $a->score } @{ $scores->$m2($id) };
next unless (@entries);
my $top_score = $entries[0]->score;
next if ($top_score < 0.7);
while (my $e = shift(@entries)) {
if ($e->score > ($top_score * 0.95)) {
my $s_obj = $self->cache->get_by_key("${type}s_by_id", 'source',
$e->source);
my $t_obj = $self->cache->get_by_key("${type}s_by_id", 'target',
$e->target);
my $key = join("\t",
$s_obj->stable_id,
$s_obj->version,
$t_obj->stable_id,
$t_obj->version,
$self->mapping_session_id,
$type,
$e->score
);
$self->add_stable_id_event('similarity', $key);
}
}
}
} } |
generate_translation_similarity_events | description | prev | next | Top |
sub generate_translation_similarity_events
{ my $self = shift;
my $mappings = shift;
my $transcript_scores = shift;
unless ($mappings and
$mappings->isa('Bio::EnsEMBL::IdMapping::MappingList')) {
throw('Need a gene Bio::EnsEMBL::IdMapping::MappingList.');
}
unless ($transcript_scores and
$transcript_scores->isa('Bio::EnsEMBL::IdMapping::ScoredMappingMatrix')) {
throw('Need a Bio::EnsEMBL::IdMapping::ScoredMappingMatrix.');
}
my $translation_scores = Bio::EnsEMBL::IdMapping::ScoredMappingMatrix->new(
-DUMP_PATH => path_append($self->conf->param('basedir'), 'matrix'),
-CACHE_FILE => 'translation_scores.ser',
);
foreach my $e (@{ $transcript_scores->get_all_Entries }) {
my $s_tl = $self->cache->get_by_key('transcripts_by_id', 'source',
$e->source)->translation;
my $t_tl = $self->cache->get_by_key('transcripts_by_id', 'target',
$e->target)->translation;
if ($s_tl and $t_tl) {
$translation_scores->add_score($s_tl->id, $t_tl->id, $e->score);
}
}
$self->generate_similarity_events($mappings, $translation_scores,
'translation'); } |
sub get_all_stable_id_events
{ my ($self, $type) = @_;
throw("Need an event type (new|similarity).") unless ($type);
return [ keys %{ $self->{'stable_id_events'}->{$type} } ]; } |
sub map_stable_ids
{ my $self = shift;
my $mappings = shift;
my $type = shift;
unless ($mappings and
$mappings->isa('Bio::EnsEMBL::IdMapping::MappingList')) {
throw("Need a Bio::EnsEMBL::IdMapping::MappingList of ${type}s.");
}
$self->generate_mapping_session;
$self->logger->info("== Stable ID mapping for $type...\n\n", 0, 'stamped');
my %all_sources = %{ $self->cache->get_by_name("${type}s_by_id", 'source') };
my %all_targets = %{ $self->cache->get_by_name("${type}s_by_id", 'target') };
unless (scalar(keys %all_sources)) {
$self->logger->info("No cached ${type}s found.\n\n");
return;
}
my %stats = map { $_ => 0 }
qw(mapped_known mapped_novel new lost_known lost_novel);
my %sources_mapped = ();
my %targets_mapped = ();
my %scores_by_target = ();
foreach my $e (@{ $mappings->get_all_Entries }) {
$sources_mapped{$e->source} = $e->target;
$targets_mapped{$e->target} = $e->source;
$scores_by_target{$e->target} = $e->score;
}
my $new_stable_id = $self->stable_id_generator->initial_stable_id($type);
foreach my $tid (keys %all_targets) {
my $t_obj = $all_targets{$tid};
if (my $sid = $targets_mapped{$tid}) {
my $s_obj = $all_sources{$sid};
$t_obj->stable_id($s_obj->stable_id);
$t_obj->created_date($s_obj->created_date);
$t_obj->version($self->stable_id_generator->calculate_version(
$s_obj, $t_obj));
if ($s_obj->version == $t_obj->version) {
$t_obj->modified_date($s_obj->modified_date);
} else {
$t_obj->modified_date($self->mapping_session_date);
}
unless ($type eq 'exon') {
my $key = join("\t",
$s_obj->stable_id,
$s_obj->version,
$t_obj->stable_id,
$t_obj->version,
$self->mapping_session_id,
$type,
$scores_by_target{$tid}
);
$self->add_stable_id_event('new', $key);
}
push @{ $debug_mappings{$type} }, [ $sid, $tid, $t_obj->stable_id ];
if ($s_obj->is_known) {
$stats{'mapped_known'}++;
} else {
$stats{'mapped_novel'}++;
}
} else {
$t_obj->stable_id($new_stable_id);
$t_obj->version(1);
$t_obj->created_date($self->mapping_session_date);
$t_obj->modified_date($self->mapping_session_date);
unless ($type eq 'exon') {
my $key = join("\t",
'\N',
0,
$t_obj->stable_id,
$t_obj->version,
$self->mapping_session_id,
$type,
0
);
$self->add_stable_id_event('new', $key);
}
$new_stable_id = $self->stable_id_generator->increment_stable_id(
$new_stable_id);
$stats{'new'}++;
}
}
my $fh;
if ($type eq 'gene' or $type eq 'transcript') {
$fh = $self->get_filehandle("${type}s_lost.txt", 'debug');
}
foreach my $sid (keys %all_sources) {
my $s_obj = $all_sources{$sid};
unless ($sources_mapped{$sid}) {
unless ($type eq 'exon') {
my $key = join("\t",
$s_obj->stable_id,
$s_obj->version,
'\N',
0,
$self->mapping_session_id,
$type,
0
);
$self->add_stable_id_event('new', $key);
}
my $status;
if ($s_obj->is_known) {
$stats{'lost_known'}++;
$status = 'known';
} else {
$stats{'lost_novel'}++;
$status = 'novel';
}
if ($type eq 'gene' or $type eq 'transcript') {
print $fh $s_obj->stable_id, "\t$status\n";
}
}
}
close($fh) if (defined($fh));
$self->write_stable_ids_to_file($type,\% all_targets);
$self->generate_mapping_stats($type,\% stats);
$self->logger->info("Done.\n\n"); } |
sub mapping_session_date
{ my $self = shift;
$self->{'_mapping_session_date'} = shift if (@_);
return $self->{'_mapping_session_date'}; } |
sub mapping_session_date_fmt
{ my $self = shift;
$self->{'_mapping_session_date_fmt'} = shift if (@_);
return $self->{'_mapping_session_date_fmt'}; } |
sub mapping_session_id
{ my $self = shift;
$self->{'_mapping_session_id'} = shift if (@_);
return $self->{'_mapping_session_id'}; } |
sub new
{ my $caller = shift;
my $class = ref($caller) || $caller;
my $self = $class->SUPER::new(@_);
my $stable_id_generator = $self->conf->param('plugin_stable_id_generator') ||
'Bio::EnsEMBL::IdMapping::StableIdGenerator::EnsemblGeneric';
$self->logger->debug("Using $stable_id_generator to generate stable Ids.\n");
inject($stable_id_generator);
my $generator_instance = $stable_id_generator->new(
-LOGGER => $self->logger,
-CONF => $self->conf,
-CACHE => $self->cache
);
$self->stable_id_generator($generator_instance);
return $self; } |
sub stable_id_generator
{ my $self = shift;
$self->{'_stable_id_generator'} = shift if (@_);
return $self->{'_stable_id_generator'};
}
1; } |
sub write_stable_id_events
{ my $self = shift;
my $event_type = shift;
throw("Need an event type (new|similarity).") unless ($event_type);
$self->logger->debug("Writing $event_type stable_id_events to file...\n");
my $fh = $self->get_filehandle("stable_id_event_${event_type}.txt", 'tables');
my $i = 0;
foreach my $event (@{ $self->get_all_stable_id_events($event_type) }) {
print $fh "$event\n";
$i++;
}
close($fh);
$self->logger->debug("Done writing $i entries.\n"); } |
write_stable_ids_to_file | description | prev | next | Top |
sub write_stable_ids_to_file
{ my $self = shift;
my $type = shift;
my $all_targets = shift;
$self->logger->info("Writing ${type} stable IDs to file...\n");
my $fh = $self->get_filehandle("${type}_stable_id.txt", 'tables');
my @sorted_targets = map { $all_targets->{$_} } sort { $a <=> $b }
keys %$all_targets;
foreach my $obj (@sorted_targets) {
my $created_date = $obj->created_date;
unless ($created_date) {
$created_date = $self->mapping_session_date;
}
my $modified_date = $obj->modified_date;
unless ($modified_date) {
$modified_date = $self->mapping_session_date;
}
my $row = join("\t",
$obj->id,
$obj->stable_id,
$obj->version,
strftime("%Y-%m-%d %T", localtime($created_date)),
strftime("%Y-%m-%d %T", localtime($modified_date)),
);
print $fh "$row\n";
}
close($fh);
$self->logger->info("Done writing ".scalar(@sorted_targets)." entries.\n\n"); } |
General documentation
Copyright (c) 1999-2009 The European Bioinformatics Institute and
Genome Research Limited. All rights reserved.
This software is distributed under a modified Apache license.
For license details, please see
/info/about/code_licence.html