Bio::EnsEMBL::IdMapping::InternalIdMapper EnsemblTranscriptGeneric
SummaryIncluded librariesPackage variablesSynopsisDescriptionGeneral documentationMethods
Toolbar
WebCvsRaw content
Summary
Package variables
No package variables defined.
Included modules
Bio::EnsEMBL::IdMapping::InternalIdMapper::BaseMapper
Bio::EnsEMBL::Utils::Exception qw ( throw warning )
Bio::EnsEMBL::Utils::ScriptUtils qw ( path_append )
Inherit
Bio::EnsEMBL::IdMapping::InternalIdMapper::BaseMapper
Synopsis
Description
Methods
init_basic
No description
Code
internal_id
No description
Code
mapped_gene
No description
Code
non_exact_translation
No description
Code
same_gene_transcript_mapping
No description
Code
single_gene
No description
Code
Methods description
None available.
Methods code
init_basicdescriptionprevnextTop
sub init_basic {
  my $self = shift;
  my $num = shift;
  my $tsb = shift;
  my $mappings = shift;
  my $transcript_scores = shift;

  $self->logger->info("Basic transcript mapping...\n", 0, 'stamped');

  $mappings = $self->basic_mapping($transcript_scores,
    "transcript_mappings$num");
  $num++;
  my $new_scores = $tsb->create_shrinked_matrix($transcript_scores, $mappings,
    "transcript_matrix$num");

  return ($new_scores, $mappings);
}


#
# handle cases with exact match but different translation
#
}
internal_iddescriptionprevnextTop
sub internal_id {
  my $self = shift;
  my $num = shift;
  my $tsb = shift;
  my $mappings = shift;
  my $transcript_scores = shift;

  $self->logger->info("Retry with internalID disambiguation...\n", 0, 'stamped');
  
  unless ($transcript_scores->loaded) {
    $tsb->internal_id_rescore($transcript_scores);
    $transcript_scores->write_to_file;
  }

  $mappings = $self->basic_mapping($transcript_scores,
    "transcript_mappings$num");
  $num++;
  my $new_scores = $tsb->create_shrinked_matrix($transcript_scores, $mappings,
    "transcript_matrix$num");

  return ($new_scores, $mappings);
}


#
# handle ambiguities between transcripts in single genes
#
}
mapped_genedescriptionprevnextTop
sub mapped_gene {
  my $self = shift;
  my $num = shift;
  my $tsb = shift;
  my $mappings = shift;
  my $transcript_scores = shift;
  my $gene_mappings = shift;

  $self->logger->info("Transcripts in mapped genes...\n", 0, 'stamped');
  
  unless ($transcript_scores->loaded) {
  $tsb->non_mapped_gene_rescore($transcript_scores, $gene_mappings);
    $transcript_scores->write_to_file;
  }
  
  $mappings = $self->basic_mapping($transcript_scores,
    "transcript_mappings$num");
  $num++;
  my $new_scores = $tsb->create_shrinked_matrix($transcript_scores, $mappings,
    "transcript_matrix$num");

  return ($new_scores, $mappings);
}


#
# selectively rescore by penalising scores between transcripts with
# different internalIDs
#
}
non_exact_translationdescriptionprevnextTop
sub non_exact_translation {
  my $self = shift;
  my $num = shift;
  my $tsb = shift;
  my $mappings = shift;
  my $transcript_scores = shift;

  $self->logger->info("Exact Transcript non-exact Translation...\n", 0, 'stamped');
  
  unless ($transcript_scores->loaded) {
    $tsb->different_translation_rescore($transcript_scores);
    $transcript_scores->write_to_file;
  }
  
  $mappings = $self->basic_mapping($transcript_scores,
    "transcript_mappings$num");
  $num++;
  my $new_scores = $tsb->create_shrinked_matrix($transcript_scores, $mappings,
    "transcript_matrix$num");

  return ($new_scores, $mappings);
}


#
# reduce score for mappings of transcripts which do not belong to mapped
# genes
#
}
same_gene_transcript_mappingdescriptionprevnextTop
sub same_gene_transcript_mapping {
  my $self = shift;
  my $matrix = shift;
  my $mapping_name = shift;

  # argument checks
unless ($matrix and $matrix->isa('Bio::EnsEMBL::IdMapping::ScoredMappingMatrix')) { throw('Need a Bio::EnsEMBL::IdMapping::ScoredMappingMatrix.'); } throw('Need a name for serialising the mapping.') unless ($mapping_name); # Create a new MappingList object. Specify AUTO_LOAD to load serialised
# existing mappings if found
my $dump_path = path_append($self->conf->param('basedir'), 'mapping'); my $mappings = Bio::EnsEMBL::IdMapping::MappingList->new( -DUMP_PATH => $dump_path, -CACHE_FILE => "${mapping_name}.ser", -AUTO_LOAD => 1, ); # checkpoint test: return a previously stored MappingList
if ($mappings->loaded) { $self->logger->info("Read existing mappings from ${mapping_name}.ser.\n"); return $mappings; } my $sources_done = {}; my $targets_done = {}; # sort scoring matrix entries by descending score
my @sorted_entries = sort { $b->score <=> $a->score || $a->source <=> $b->source || $a->target <=> $b->target } @{ $matrix->get_all_Entries }; while (my $entry = shift(@sorted_entries)) { # $self->logger->debug("\nxxx4 ".$entry->to_string." ");
# we already found a mapping for either source or target yet
next if ($sources_done->{$entry->source} or $targets_done->{$entry->target}); #$self->logger->debug('d');
my $other_sources = []; my $other_targets = []; my %source_genes = (); my %target_genes = (); if ($self->ambiguous_mapping($entry, $matrix, $other_sources, $other_targets)) { #$self->logger->debug('a');
$other_sources = $self->filter_sources($other_sources, $sources_done); $other_targets = $self->filter_targets($other_targets, $targets_done); $source_genes{$self->cache->get_by_key('genes_by_transcript_id', 'source', $entry->source)} = 1; $target_genes{$self->cache->get_by_key('genes_by_transcript_id', 'target', $entry->target)} = 1; foreach my $other_source (@{ $other_sources }) { $source_genes{$self->cache->get_by_key('genes_by_transcript_id', 'source', $other_source)} = 1; } foreach my $other_target (@{ $other_targets }) { $target_genes{$self->cache->get_by_key('genes_by_transcript_id', 'target', $other_target)} = 1; } # only add mapping if only one source and target gene involved
if (scalar(keys %source_genes) == 1 and scalar(keys %target_genes) == 1) { #$self->logger->debug('O');
$mappings->add_Entry($entry); } } else { #$self->logger->debug('A');
# this is the best mapping, add it
$mappings->add_Entry($entry); } $sources_done->{$entry->source} = 1; $targets_done->{$entry->target} = 1; } # create checkpoint
$mappings->write_to_file; return $mappings; } 1;
}
single_genedescriptionprevnextTop
sub single_gene {
  my $self = shift;
  my $num = shift;
  my $tsb = shift;
  my $mappings = shift;
  my $transcript_scores = shift;

  $self->logger->info("Transcripts in single genes...\n", 0, 'stamped');
  
  unless ($transcript_scores->loaded) {
    $transcript_scores->write_to_file;
  }
  
  $mappings = $self->same_gene_transcript_mapping($transcript_scores,
    "transcript_mappings$num");
  $num++;
  my $new_scores = $tsb->create_shrinked_matrix($transcript_scores, $mappings,
    "transcript_matrix$num");

  return ($new_scores, $mappings);
}


#
# modified basic mapper that maps transcripts that are ambiguous within one gene
#
}
General documentation
LICENSETop
  Copyright (c) 1999-2009 The European Bioinformatics Institute and
Genome Research Limited. All rights reserved.
This software is distributed under a modified Apache license. For license details, please see /info/about/code_licence.html
CONTACTTop
  Please email comments or questions to the public Ensembl
developers list at <ensembl-dev@ebi.ac.uk>.
Questions may also be sent to the Ensembl help desk at <helpdesk@ensembl.org>.