Bio::EnsEMBL::IdMapping
TranscriptScoreBuilder
Toolbar
Summary
Package variables
Privates (from "my" definitions)
$transcript_score = ($source_transcript_score + $target_transcript_score) / ($source_transcript_length + $target_transcript_length)
Included modules
Inherit
Synopsis
Description
Combines ExonScoreBuilder, ExonDirectMapper and ExonerateRunner from
Java application.
Methods
build_scores | No description | Code |
different_translation_rescore | No description | Code |
flag_matrix_from_exon_scores | No description | Code |
non_mapped_gene_rescore | No description | Code |
score_matrix_from_flag_matrix | No description | Code |
score_transcripts | No description | Code |
scores_from_exon_scores | No description | Code |
Methods description
None available.
Methods code
sub build_scores
{ my $self = shift;
my $matrix = shift;
my $exon_matrix = shift;
unless ($matrix and
$matrix->isa('Bio::EnsEMBL::IdMapping::ScoredMappingMatrix')) {
throw('Need a transcript Bio::EnsEMBL::IdMapping::ScoredMappingMatrix.');
}
unless ($exon_matrix and
$exon_matrix->isa('Bio::EnsEMBL::IdMapping::ScoredMappingMatrix')) {
throw('Need a exon Bio::EnsEMBL::IdMapping::ScoredMappingMatrix.');
}
$self->flag_matrix_from_exon_scores($matrix, $exon_matrix);
my $final_matrix =
$self->score_matrix_from_flag_matrix($matrix, $exon_matrix);
return $final_matrix; } |
sub different_translation_rescore
{ my $self = shift;
my $matrix = shift;
unless ($matrix and
$matrix->isa('Bio::EnsEMBL::IdMapping::ScoredMappingMatrix')) {
throw('Need a Bio::EnsEMBL::IdMapping::ScoredMappingMatrix.');
}
my $i = 0;
foreach my $entry (sort { $b->score <=> $a->score }
@{ $matrix->get_all_Entries }) {
last if ($entry->score < 1);
my $source_tl = $self->cache->get_by_key('transcripts_by_id',
'source', $entry->source)->translation;
my $target_tl = $self->cache->get_by_key('transcripts_by_id',
'target', $entry->target)->translation;
next if (!$source_tl and !$target_tl);
if (!$source_tl or !$target_tl or
($source_tl->seq ne $target_tl->seq)) {
$matrix->set_score($entry->source, $entry->target, 0.98);
$i++;
}
}
$self->logger->debug("Non-perfect translations on perfect transcripts: $i\n", 1); } |
sub flag_matrix_from_exon_scores
{ my $self = shift;
my $matrix = shift;
my $exon_matrix = shift;
my $i;
my $num_transcripts =
scalar(keys %{ $self->cache->get_by_name('transcripts_by_id', 'source') });
my $progress_id = $self->logger->init_progress($num_transcripts, 100);
$self->logger->info("Creating flag matrix...\n", 1);
foreach my $source_transcript (values %{ $self->cache->get_by_name('transcripts_by_id', 'source') }) {
$self->logger->log_progress($progress_id, ++$i, 1);
foreach my $source_exon (@{ $source_transcript->get_all_Exons }) {
foreach my $target_exon_id (@{ $exon_matrix->get_targets_for_source($source_exon->id) }) {
foreach my $target_transcript (@{ $self->cache->get_by_key('transcripts_by_exon_id', 'target', $target_exon_id) }) {
$matrix->add_score($source_transcript->id, $target_transcript->id, 1);
}
}
}
}
$self->logger->info("\n");
return $matrix; } |
sub non_mapped_gene_rescore
{ my $self = shift;
my $matrix = shift;
my $gene_mappings = shift;
unless ($matrix and
$matrix->isa('Bio::EnsEMBL::IdMapping::ScoredMappingMatrix')) {
throw('Need a transcript Bio::EnsEMBL::IdMapping::ScoredMappingMatrix.');
}
unless ($gene_mappings and
$gene_mappings->isa('Bio::EnsEMBL::IdMapping::MappingList')) {
throw('Need a gene Bio::EnsEMBL::IdMapping::MappingList.');
}
my %gene_lookup = map { $_->source => $_->target }
@{ $gene_mappings->get_all_Entries };
my $i = 0;
foreach my $entry (@{ $matrix->get_all_Entries }) {
my $source_gene = $self->cache->get_by_key('genes_by_transcript_id',
'source', $entry->source);
my $target_gene = $self->cache->get_by_key('genes_by_transcript_id',
'target', $entry->target);
my $mapped_target = $gene_lookup{$source_gene->id};
if (!$mapped_target or ($mapped_target != $target_gene->id)) {
$matrix->set_score($entry->source, $entry->target, ($entry->score * 0.8));
$i++;
}
}
$self->logger->debug("Scored transcripts in non-mapped genes: $i\n", 1);
}
1; } |
sub score_matrix_from_flag_matrix
{ my $self = shift;
my $flag_matrix = shift;
my $exon_matrix = shift;
unless ($flag_matrix and
$flag_matrix->isa('Bio::EnsEMBL::IdMapping::ScoredMappingMatrix')) {
throw('Need a transcript Bio::EnsEMBL::IdMapping::ScoredMappingMatrix.');
}
unless ($exon_matrix and
$exon_matrix->isa('Bio::EnsEMBL::IdMapping::ScoredMappingMatrix')) {
throw('Need an exon Bio::EnsEMBL::IdMapping::ScoredMappingMatrix.');
}
my $transcript_score_threshold =
$self->conf->param('transcript_score_threshold') || 0;
my $matrix = Bio::EnsEMBL::IdMapping::ScoredMappingMatrix->new(
-DUMP_PATH => $flag_matrix->dump_path,
-CACHE_FILE => $flag_matrix->cache_file_name,
);
my $i;
my $num_transcripts =
scalar(keys %{ $self->cache->get_by_name('transcripts_by_id', 'source') });
my $progress_id = $self->logger->init_progress($num_transcripts, 100);
$self->logger->info("Creating score matrix from flag matrix...\n", 1);
my $fmt_d1 = "%-14s%-15s%-14s%-14s%-14s\n";
my $fmt_d2 = "%.6f";
foreach my $source_transcript (values %{ $self->cache->get_by_name('transcripts_by_id', 'source') }) {
$self->logger->log_progress($progress_id, ++$i, 1);
my %source_exons = map { $_->id => 1 }
@{ $source_transcript->get_all_Exons };
my $source_transcript_length = $source_transcript->length;
foreach my $target_transcript_id (@{ $flag_matrix->get_targets_for_source($source_transcript->id) }) {
my $target_transcript = $self->cache->get_by_key('transcripts_by_id', 'target', $target_transcript_id);
my $source_transcript_score = 0;
my $target_transcript_score = 0;
my $target_transcript_length = $target_transcript->length;
my %target_exons = map { $_->id => 1 }
@{ $target_transcript->get_all_Exons };
foreach my $source_exon (@{ $source_transcript->get_all_Exons }) {
my $max_source_score = -1;
foreach my $target_exon_id (@{ $exon_matrix->get_targets_for_source($source_exon->id) }) {
next unless ($target_exons{$target_exon_id});
my $score = $exon_matrix->get_score($source_exon->id,
$target_exon_id);
$max_source_score = $score if ($score > $max_source_score);
}
if ($max_source_score > 0) {
$source_transcript_score += ($max_source_score * $source_exon->length);
}
}
foreach my $target_exon (@{ $target_transcript->get_all_Exons }) {
my $max_target_score = -1;
foreach my $source_exon_id (@{ $exon_matrix->get_sources_for_target($target_exon->id) }) {
next unless ($source_exons{$source_exon_id});
my $score = $exon_matrix->get_score(
$source_exon_id, $target_exon->id);
$max_target_score = $score if ($score > $max_target_score);
}
if ($max_target_score > 0) {
$target_transcript_score += ($max_target_score * $target_exon->length);
}
}
if (($source_transcript_length + $target_transcript_length) > 0) {
if (($source_transcript_score > $source_transcript_length) or
($target_transcript_score > $target_transcript_length)) {
$self->logger->warning("Score > length for source ($source_transcript_score <> $source_transcript_length) or target ($target_transcript_score <> $target_transcript_length).\n", 1);
} else { } |
sub score_transcripts
{ my $self = shift;
my $exon_matrix = shift;
unless ($exon_matrix and
$exon_matrix->isa('Bio::EnsEMBL::IdMapping::ScoredMappingMatrix')) {
throw('Need a Bio::EnsEMBL::IdMapping::ScoredMappingMatrix.');
}
$self->logger->info("-- Scoring transcripts...\n\n", 0, 'stamped');
my $matrix = $self->scores_from_exon_scores($exon_matrix);
if ($self->logger->loglevel eq 'debug') {
$matrix->log('transcript', $self->conf->param('basedir'));
}
my $fmt = "%-40s%10.0f\n";
$self->logger->info("Scoring matrix:\n");
$self->logger->info(sprintf($fmt, "Total source transcripts:",
$self->cache->get_count_by_name('transcripts_by_id', 'source')), 1);
$self->logger->info(sprintf($fmt, "Total target transcripts:",
$self->cache->get_count_by_name('transcripts_by_id', 'target')), 1);
$self->log_matrix_stats($matrix);
$self->logger->info("\nDone with transcript scoring.\n\n");
return $matrix; } |
scores_from_exon_scores | description | prev | next | Top |
sub scores_from_exon_scores
{ my $self = shift;
my $exon_matrix = shift;
unless ($exon_matrix and
$exon_matrix->isa('Bio::EnsEMBL::IdMapping::ScoredMappingMatrix')) {
throw('Need a Bio::EnsEMBL::IdMapping::ScoredMappingMatrix.');
}
my $dump_path = path_append($self->conf->param('basedir'), 'matrix');
my $matrix = Bio::EnsEMBL::IdMapping::ScoredMappingMatrix->new(
-DUMP_PATH => $dump_path,
-CACHE_FILE => 'transcript_matrix.ser',
);
my $transcript_cache = $matrix->cache_file;
if (-s $transcript_cache) {
$self->logger->info("Reading transcript scoring matrix from file...\n", 0, 'stamped');
$self->logger->debug("Cache file $transcript_cache.\n", 1);
$matrix->read_from_file;
$self->logger->info("Done.\n\n", 0, 'stamped');
} else {
$self->logger->info("No transcript scoring matrix found. Will build new one.\n");
$self->logger->info("Transcript scoring...\n", 0, 'stamped');
$matrix = $self->build_scores($matrix, $exon_matrix);
$self->logger->info("Done.\n\n", 0, 'stamped');
$matrix->write_to_file;
}
return $matrix; } |
General documentation
Copyright (c) 1999-2009 The European Bioinformatics Institute and
Genome Research Limited. All rights reserved.
This software is distributed under a modified Apache license.
For license details, please see
/info/about/code_licence.html