Raw content of Bio::EnsEMBL::IdMapping::InternalIdMapper::EnsemblTranscriptGeneric
=head1 LICENSE
Copyright (c) 1999-2009 The European Bioinformatics Institute and
Genome Research Limited. All rights reserved.
This software is distributed under a modified Apache license.
For license details, please see
/info/about/code_licence.html
=head1 CONTACT
Please email comments or questions to the public Ensembl
developers list at .
Questions may also be sent to the Ensembl help desk at
.
=cut
=head1 NAME
=head1 SYNOPSIS
=head1 DESCRIPTION
=head1 METHODS
=cut
package Bio::EnsEMBL::IdMapping::InternalIdMapper::EnsemblTranscriptGeneric;
use strict;
use warnings;
no warnings 'uninitialized';
use Bio::EnsEMBL::IdMapping::InternalIdMapper::BaseMapper;
our @ISA = qw(Bio::EnsEMBL::IdMapping::InternalIdMapper::BaseMapper);
use Bio::EnsEMBL::Utils::Exception qw(throw warning);
use Bio::EnsEMBL::Utils::ScriptUtils qw(path_append);
#
# basic mapping
#
sub init_basic {
my $self = shift;
my $num = shift;
my $tsb = shift;
my $mappings = shift;
my $transcript_scores = shift;
$self->logger->info("Basic transcript mapping...\n", 0, 'stamped');
$mappings = $self->basic_mapping($transcript_scores,
"transcript_mappings$num");
$num++;
my $new_scores = $tsb->create_shrinked_matrix($transcript_scores, $mappings,
"transcript_matrix$num");
return ($new_scores, $mappings);
}
#
# handle cases with exact match but different translation
#
sub non_exact_translation {
my $self = shift;
my $num = shift;
my $tsb = shift;
my $mappings = shift;
my $transcript_scores = shift;
$self->logger->info("Exact Transcript non-exact Translation...\n", 0, 'stamped');
unless ($transcript_scores->loaded) {
$tsb->different_translation_rescore($transcript_scores);
$transcript_scores->write_to_file;
}
$mappings = $self->basic_mapping($transcript_scores,
"transcript_mappings$num");
$num++;
my $new_scores = $tsb->create_shrinked_matrix($transcript_scores, $mappings,
"transcript_matrix$num");
return ($new_scores, $mappings);
}
#
# reduce score for mappings of transcripts which do not belong to mapped
# genes
#
sub mapped_gene {
my $self = shift;
my $num = shift;
my $tsb = shift;
my $mappings = shift;
my $transcript_scores = shift;
my $gene_mappings = shift;
$self->logger->info("Transcripts in mapped genes...\n", 0, 'stamped');
unless ($transcript_scores->loaded) {
$tsb->non_mapped_gene_rescore($transcript_scores, $gene_mappings);
$transcript_scores->write_to_file;
}
$mappings = $self->basic_mapping($transcript_scores,
"transcript_mappings$num");
$num++;
my $new_scores = $tsb->create_shrinked_matrix($transcript_scores, $mappings,
"transcript_matrix$num");
return ($new_scores, $mappings);
}
#
# selectively rescore by penalising scores between transcripts with
# different internalIDs
#
sub internal_id {
my $self = shift;
my $num = shift;
my $tsb = shift;
my $mappings = shift;
my $transcript_scores = shift;
$self->logger->info("Retry with internalID disambiguation...\n", 0, 'stamped');
unless ($transcript_scores->loaded) {
$tsb->internal_id_rescore($transcript_scores);
$transcript_scores->write_to_file;
}
$mappings = $self->basic_mapping($transcript_scores,
"transcript_mappings$num");
$num++;
my $new_scores = $tsb->create_shrinked_matrix($transcript_scores, $mappings,
"transcript_matrix$num");
return ($new_scores, $mappings);
}
#
# handle ambiguities between transcripts in single genes
#
sub single_gene {
my $self = shift;
my $num = shift;
my $tsb = shift;
my $mappings = shift;
my $transcript_scores = shift;
$self->logger->info("Transcripts in single genes...\n", 0, 'stamped');
unless ($transcript_scores->loaded) {
$transcript_scores->write_to_file;
}
$mappings = $self->same_gene_transcript_mapping($transcript_scores,
"transcript_mappings$num");
$num++;
my $new_scores = $tsb->create_shrinked_matrix($transcript_scores, $mappings,
"transcript_matrix$num");
return ($new_scores, $mappings);
}
#
# modified basic mapper that maps transcripts that are ambiguous within one gene
#
sub same_gene_transcript_mapping {
my $self = shift;
my $matrix = shift;
my $mapping_name = shift;
# argument checks
unless ($matrix and
$matrix->isa('Bio::EnsEMBL::IdMapping::ScoredMappingMatrix')) {
throw('Need a Bio::EnsEMBL::IdMapping::ScoredMappingMatrix.');
}
throw('Need a name for serialising the mapping.') unless ($mapping_name);
# Create a new MappingList object. Specify AUTO_LOAD to load serialised
# existing mappings if found
my $dump_path = path_append($self->conf->param('basedir'), 'mapping');
my $mappings = Bio::EnsEMBL::IdMapping::MappingList->new(
-DUMP_PATH => $dump_path,
-CACHE_FILE => "${mapping_name}.ser",
-AUTO_LOAD => 1,
);
# checkpoint test: return a previously stored MappingList
if ($mappings->loaded) {
$self->logger->info("Read existing mappings from ${mapping_name}.ser.\n");
return $mappings;
}
my $sources_done = {};
my $targets_done = {};
# sort scoring matrix entries by descending score
my @sorted_entries = sort { $b->score <=> $a->score ||
$a->source <=> $b->source || $a->target <=> $b->target }
@{ $matrix->get_all_Entries };
while (my $entry = shift(@sorted_entries)) {
# $self->logger->debug("\nxxx4 ".$entry->to_string." ");
# we already found a mapping for either source or target yet
next if ($sources_done->{$entry->source} or
$targets_done->{$entry->target});
#$self->logger->debug('d');
my $other_sources = [];
my $other_targets = [];
my %source_genes = ();
my %target_genes = ();
if ($self->ambiguous_mapping($entry, $matrix, $other_sources, $other_targets)) {
#$self->logger->debug('a');
$other_sources = $self->filter_sources($other_sources, $sources_done);
$other_targets = $self->filter_targets($other_targets, $targets_done);
$source_genes{$self->cache->get_by_key('genes_by_transcript_id',
'source', $entry->source)} = 1;
$target_genes{$self->cache->get_by_key('genes_by_transcript_id',
'target', $entry->target)} = 1;
foreach my $other_source (@{ $other_sources }) {
$source_genes{$self->cache->get_by_key('genes_by_transcript_id',
'source', $other_source)} = 1;
}
foreach my $other_target (@{ $other_targets }) {
$target_genes{$self->cache->get_by_key('genes_by_transcript_id',
'target', $other_target)} = 1;
}
# only add mapping if only one source and target gene involved
if (scalar(keys %source_genes) == 1 and scalar(keys %target_genes) == 1) {
#$self->logger->debug('O');
$mappings->add_Entry($entry);
}
} else {
#$self->logger->debug('A');
# this is the best mapping, add it
$mappings->add_Entry($entry);
}
$sources_done->{$entry->source} = 1;
$targets_done->{$entry->target} = 1;
}
# create checkpoint
$mappings->write_to_file;
return $mappings;
}
1;