Bio::EnsEMBL::Analysis::Tools::GeneBuildUtils TranslationUtils
Included librariesPackage variablesGeneral documentationMethods
Toolbar
WebCvsRaw content
Package variables
No package variables defined.
Included modules
Bio::EnsEMBL::Analysis::Tools::GeneBuildUtils qw ( id )
Bio::EnsEMBL::Analysis::Tools::GeneBuildUtils::TranscriptUtils qw ( Transcript_info )
Bio::EnsEMBL::Analysis::Tools::Logger(1)
Bio::EnsEMBL::Analysis::Tools::Logger(2) qw ( logger_info )
Bio::EnsEMBL::Analysis::Tools::Utilities qw ( write_seqfile )
Bio::EnsEMBL::Translation
Bio::EnsEMBL::Utils::Exception qw ( verbose throw warning stack_trace_dump )
Exporter
Inherit
Exporter
Synopsis
No synopsis!
Description
No description!
Methods
Translation_infoDescriptionCode
clone_TranslationDescriptionCode
compute_translationDescriptionCode
contains_internal_stopsDescriptionCode
ends_with_stopDescriptionCode
print_TranslationDescriptionCode
print_Translation_genomic_coordsDescriptionCode
print_peptideDescriptionCode
return_translationDescriptionCode
run_translateDescriptionCode
starts_with_metDescriptionCode
validate_Translation_coordsDescriptionCode
Methods description
Translation_infocode    nextTop
  Arg [1]   : Bio::EnsEMBL::Transcript
Arg [2] : string, indent
Function :
Returntype: returns a string of translation start exon,
start, end exon, end, cdna length
Exceptions: none
Example :
clone_TranslationcodeprevnextTop
  Arg [1]   : Bio::EnsEMBL::Transcript
Arg [2] : Bio::EnsEMBL::Transcript
Function : copies the translation of transcript 1 to
transcript2
Returntype:
Exceptions:
Example :
compute_translationcodeprevnextTop
  Arg [1]   : Bio::EnsEMBL::Transcript
Function : run run_translate and give transcript new
translation based on those results
Returntype: Bio::EnsEMBL::Transcript
Exceptions: Warns in unable to create translation
Example :
contains_internal_stopscodeprevnextTop
  Arg [1]   : Bio::EnsEMBL::Transcript
Function : counts how many internal stops there are
Returntype: int
Exceptions: none
Example :
ends_with_stopcodeprevnextTop
  Arg [1]   : Bio::EnsEMBL::Transcript
Function : checks if transcript ends with a stop codon
Returntype: boolean
Exceptions: none
Example :
print_TranslationcodeprevnextTop
  Arg [1]   : Bio::EnsEMBL::Transcript
Arg [2] : string, indent
Function : prints info about translation of the given
transcript including its coords, if starts with a met etc
Returntype: n/a
Exceptions: none
Example : print_Translation($transcript);
print_Translation_genomic_coordscodeprevnextTop
  Arg [1]   : Bio::EnsEMBL::Transcript
Arg [2] : string, indent
Function : print the genomic coordinates of the translation
Returntype: n/a
Exceptions: none
Example : print_Translation_genomic_coords($transcript);
print_peptidecodeprevnextTop
  Arg [1]   : Bio::EnsEMBL::Transcript
Function : prints the peptide sequence of the given
transcript
Returntype: n/a
Exceptions: none
Example :
return_translationcodeprevnextTop
  Arg [1]   : Bio::EnsEMBL::Transcript
Function : calculates translation for transcript
Returntype: Bio::EnsEMBL::Translation
Exceptions:
Example :
run_translatecodeprevnextTop
  Arg [1]   : Bio::EnsEMBL::Transcript
Arg [2] : boolean, do want predictions with met or
not
Function : run the program translate and find coordinates
of translations in transcripts cdna
Returntype: arrayref of an array or arrays, each element
containing an array of length, start and end
Exceptions:
Example :
starts_with_metcodeprevnextTop
  Arg [1]   : Bio::EnsEMBL::Transcript
Function : checks if peptide starts with a methoinine
Returntype: boolean
Exceptions: none
Example :
validate_Translation_coordscodeprevnextTop
  Arg [1]   : Bio::EnsEMBL::Transcript
Arg [2] : (optional) boolean, allow negative start coordinates
Function : check coordinates of translation of transcript
Returntype: boolean, 1 if translation is ok
Methods code
Translation_infodescriptionprevnextTop
sub Translation_info {
  my ($transcript, $indent) = @_;
  $indent = '' if(!$indent);
  my $translation = $transcript->translation;
  my $id = id($translation);
  my $start_id = id($translation->start_Exon);
  my $end_id = id($translation->end_Exon);
  my $cdna_length = length($transcript->translateable_seq);
  return $indent."TRANSLATION: ".$id." ".$start_id." ".
    $translation->start." ".$end_id." ".$translation->end." ".
      $cdna_length." ";
}
clone_TranslationdescriptionprevnextTop
sub clone_Translation {
  my ($transcript, $newtranscript, $clone_xrefs) = @_;
  $clone_xrefs = 1 if(!defined($clone_xrefs));
  #print "\n**CLONING TRANSLATION**\n\n";
my $newtranslation = Bio::EnsEMBL::Translation->new(); my $translation = $transcript->translation; my %new_exons; foreach my $exon(@{$newtranscript->get_all_Exons}){ my $id_string = $exon->start."-".$exon->end."-". $exon->strand; #print "Adding ".$id_string." exon to hash\n";
$new_exons{$id_string} = $exon; } my $start_exon = $translation->start_Exon; my $old_start_id_string = $start_exon->start."-". $start_exon->end."-".$start_exon->strand; my $new_start_Exon = $new_exons{$old_start_id_string}; throw("Failed to find exon ".$old_start_id_string." for ". id($transcript)) if(!$new_start_Exon); $newtranslation->start_Exon($new_start_Exon); $newtranslation->start($translation->start); my $end_exon = $translation->end_Exon; my $old_end_id_string = $end_exon->start."-". $end_exon->end."-".$end_exon->strand; my $new_end_Exon = $new_exons{$old_end_id_string}; throw($old_end_id_string." failed to get exon") if(!$new_end_Exon); $newtranslation->end_Exon($new_end_Exon); $newtranslation->end($translation->end); my $attribs = $translation->get_all_Attributes(); $newtranslation->add_Attributes(@$attribs); $newtranslation->dbID($translation->dbID); $newtranslation->stable_id($translation->stable_id); $newtranslation->version($translation->version); $newtranslation->created_date($translation->created_date); $newtranslation->modified_date($translation->modified_date); if ($clone_xrefs){ foreach my $DBEntry (@{$translation->get_all_DBEntries}){ $newtranslation->add_DBEntry($DBEntry); } } return $newtranslation; #print "\n\n**\n\n";
}
compute_translationdescriptionprevnextTop
sub compute_translation {
  my ($transcript) = @_;

  my @met_predictions = @{run_translate 
                            ($transcript, 1)};
  my @nomet_predictions = @{run_translate 
                              ($transcript)};

  my $orf;
  if(@met_predictions && @nomet_predictions){
    my $met_best = $met_predictions[0];
    my $no_met_best = $nomet_predictions[0];
    if($no_met_best->[0] > (2*$met_best->[0])){
      $orf = $no_met_best;
    }else{
      $orf = $met_best;
    }
  }elsif(@met_predictions){
    $orf = $met_predictions[0];
  }elsif(@nomet_predictions){
    $orf = $nomet_predictions[0];
  }else{
    warning(id($transcript)." has no translations ");
    return $transcript;
  }
  #Here we take the best prediction with a methionine unless 
#there aren't any of the best prediction without a
#methoinine is more than twice the length
my $orf_start = $orf->[1]; my $orf_end = $orf->[2]; my $translation = Bio::EnsEMBL::Translation->new(); logger_info("Best orf for ".id($transcript)." ".$orf->[0]. " long ".$orf_start." to ".$orf_end); my ($translation_start, $translation_end, $translation_start_Exon, $translation_end_Exon); my $exon_count = 0; my $pos = 1; foreach my $exon(@{$transcript->get_all_Exons}){ $exon_count++; logger_info("exon:$exon_count exon_length:".$exon->length." pos:$pos orf_start:$orf_start orf_end:$orf_end pos+:".($pos + $exon->length - 1)); if ( $orf_start >= $pos && $orf_start <= $pos + $exon->length - 1 ){ $translation_start = $orf_start - $pos+1; $translation_start_Exon = $exon; } if($orf_end >= $pos && $orf_end <= $pos + $exon->length - 1){ $translation_end = $orf_end - $pos + 1; $translation_end_Exon = $exon; } $pos += $exon->length; } if(!$translation_start || !$translation_end || !$translation_start_Exon || !$translation_end_Exon){ warning("problems making the translation ". "for ".id($transcript)); return $transcript; }else{ $translation->start($translation_start); $translation->end($translation_end); $translation->start_Exon($translation_start_Exon); $translation->end_Exon($translation_end_Exon); $transcript->translation($translation); } my $found_start = 0; my $found_end = 0; my $last_end_phase; my $first_exon = 1; # print "Setting phases on transcript after adding translation\n";
foreach my $exon(@{$transcript->get_all_Exons}){ $exon->phase(-1); $exon->end_phase(-1); # print " Have exon " . $exon->start . " " . $exon->end . "\n";
if($translation->start_Exon == $exon){ if($translation->start == 1 && $first_exon){ $exon->phase(0); # print " setting start phase on it to 0 (tstart = 1 and is start_Exon)\n";
} $found_start = 1; }elsif($found_start and not $found_end){ $exon->phase($last_end_phase); # print " setting start phase on it to last_end_phase ($last_end_phase)\n";
} my $end_phase; if($exon == $translation->start_Exon){ $end_phase = ($exon->end - ($exon->start + $translation->start - 1) +1 ) %3; # print " start_Exon end phase calculated ($end_phase)\n";
}else{ $end_phase = (($exon->length + $exon->phase) %3); # print " end phase calculated ($end_phase)\n";
} if(($exon == $translation->end_Exon && $exon->length == $translation->end)){ # print " setting end phase to $end_phase (end exon condition)\n";
$exon->end_phase($end_phase); } $found_end = 1 if($exon == $translation->end_Exon); if (($found_start and !$found_end)) { # print " setting end phase to $end_phase (found_start and not found_end condition)\n";
$exon->end_phase($end_phase); } $last_end_phase = $exon->end_phase; $first_exon = 0; } return $transcript;
}
contains_internal_stopsdescriptionprevnextTop
sub contains_internal_stops {
  my ($transcript) = @_;
  my $pep = $transcript->translate->seq;
  my $num = $pep =~ /\*/;
  logger_info(Translation_info($transcript)." contains internal stops ".$pep)
    if($num);
  return $num;
}
ends_with_stopdescriptionprevnextTop
sub ends_with_stop {
  my ($transcript) = @_;
  my $cdna_seq = uc($transcript->translateable_seq);
  return 1 if($cdna_seq =~ /(TAA|TGA|TAG)$/);
  return 0;
}
print_TranslationdescriptionprevnextTop
sub print_Translation {
  my ($transcript,$indent) = @_;
  $indent = '' if(!$indent);
  print Translation_info($transcript, $indent)."\n";
  print_Translation_genomic_coords($transcript, $indent);
  warning("Transcript is less than 3bp can't print ".
          "any more info") if($transcript->length < 3);
  return if($transcript->length < 3);
  my $met = starts_with_met($transcript);
  print $indent."peptide starts with a methionine\n" if($met);
  my $stop = ends_with_stop($transcript);
  print $indent."peptide ends with a stop codon\n" if($stop);
  my $num_stops = contains_internal_stops($transcript);
  print $indent."peptide contains ".$num_stops.
    " internal stop codons\n" if($num_stops);
}
print_Translation_genomic_coordsdescriptionprevnextTop
sub print_Translation_genomic_coords {
  my ($transcript, $indent) = @_;
  print $indent."genomic coords ".
    $transcript->coding_region_start." ".
      $transcript->coding_region_end."\n";
}
print_peptidedescriptionprevnextTop
sub print_peptide {
  my ($transcript) = @_;
  warning("Transcript is less than 3bp can't print ".
          "Its peptide") if($transcript->length < 3);
  return if($transcript->length < 3);
  print ">";
  print Translation_info($transcript, '')."\n";
  print $transcript->translate->seq."\n";
}
return_translationdescriptionprevnextTop
sub return_translation {
  my ($trans) = @_;
  $trans = compute_translation($trans);
  return $trans->translation;
}
run_translatedescriptionprevnextTop
sub run_translate {
  my ($trans,$met) = @_;

  my $trans_id = id($trans);
  my $seq = $trans->seq;
  $seq->display_id($trans_id);

  my $file = write_seqfile($seq);
  my $command = "/usr/local/ensembl/bin/translate";
  $command .= " -m " if($met);
  $command .= " ".$file." | ";
  logger_info($command);
  open ( ORF, $command ) || throw( "Error running translate" );
  
  my @orf_predictions;
 ORF:
  while ( <ORF> ){
    chomp;
    next ORF unless /\>/;
    my @entries = split;
    next ORF unless ( $entries[3] && $entries[5] );
    my $id = $entries[1];
    my $orf_length = $entries[3];
    $orf_length =~s/\,//;
$entries[5] =~/(\d+)\.\.(\d+)/; my $orf_start = $1; my $orf_end = $2; if ($orf_start>=$orf_end ) { # print "can't compute translation for this transcript as translation start >= translation end : $orf_start >= $orf_end \n " ;
next ORF ; } # Check if there's a stop codon and add it
if ($orf_end + 3 <= $seq->length) { my $codon = uc($seq->subseq($orf_end+1,$orf_end+3)); if ($codon eq 'TAA' || $codon eq 'TAG' || $codon eq 'TGA' ) { $orf_end+=3; } } my @prediction = ($orf_length,$orf_start,$orf_end); push( @orf_predictions,\@ prediction ); } my @sorted_predictions = map { $_->[1] } sort { $b->[0] <=> $a->[0] } map { [$_->[0], $_] } @orf_predictions; unlink $file; return\@ sorted_predictions;
}
starts_with_metdescriptionprevnextTop
sub starts_with_met {
  my ($transcript) = @_;
  my $pep = $transcript->translate->seq;
  return 1 if($pep =~ /^M/);
  return 0;
}
validate_Translation_coordsdescriptionprevnextTop
sub validate_Translation_coords {
  my ($transcript, $allow_negative_start) = @_;
  my $translation = $transcript->translation;
  if(!$allow_negative_start && ($translation->start < 1)){
    warning(Translation_info($transcript)." has a start  ".$translation->start.
            " less than 1");
    return 0;
  }
  if($translation->end < 0 || $translation->end > $translation->end_Exon->length){
    warning(Translation_info($transcript)." has an end ".$translation->end.
            " less than 1 or greater than ".$translation->end_Exon->length);
    return 0;
  }
  return 1;
}

1;
}
General documentation
No general documentation available.