Raw content of Bio::EnsEMBL::Analysis::Tools::GeneBuildUtils::TranslationUtils package Bio::EnsEMBL::Analysis::Tools::GeneBuildUtils::TranslationUtils; use strict; use warnings; use Exporter; use Bio::EnsEMBL::Utils::Exception qw(verbose throw warning stack_trace_dump); use Bio::EnsEMBL::Analysis::Tools::Logger; use Bio::EnsEMBL::Analysis::Tools::GeneBuildUtils qw(id); use Bio::EnsEMBL::Analysis::Tools::GeneBuildUtils::TranscriptUtils qw(Transcript_info); use Bio::EnsEMBL::Translation; use Bio::EnsEMBL::Analysis::Tools::Utilities qw(write_seqfile); use Bio::EnsEMBL::Analysis::Tools::Logger qw(logger_info); use vars qw (@ISA @EXPORT); @ISA = qw(Exporter); @EXPORT = qw(print_Translation clone_Translation print_peptide Translation_info starts_with_met ends_with_stop contains_internal_stops print_Translation_genomic_coords run_translate compute_translation return_translation validate_Translation_coords ); =head2 print_Translation Arg [1] : Bio::EnsEMBL::Transcript Arg [2] : string, indent Function : prints info about translation of the given transcript including its coords, if starts with a met etc Returntype: n/a Exceptions: none Example : print_Translation($transcript); =cut sub print_Translation{ my ($transcript,$indent) = @_; $indent = '' if(!$indent); print Translation_info($transcript, $indent)."\n"; print_Translation_genomic_coords($transcript, $indent); warning("Transcript is less than 3bp can't print ". "any more info") if($transcript->length < 3); return if($transcript->length < 3); my $met = starts_with_met($transcript); print $indent."peptide starts with a methionine\n" if($met); my $stop = ends_with_stop($transcript); print $indent."peptide ends with a stop codon\n" if($stop); my $num_stops = contains_internal_stops($transcript); print $indent."peptide contains ".$num_stops. " internal stop codons\n" if($num_stops); } =head2 print_peptide Arg [1] : Bio::EnsEMBL::Transcript Function : prints the peptide sequence of the given transcript Returntype: n/a Exceptions: none Example : =cut sub print_peptide{ my ($transcript) = @_; warning("Transcript is less than 3bp can't print ". "Its peptide") if($transcript->length < 3); return if($transcript->length < 3); print ">"; print Translation_info($transcript, '')."\n"; print $transcript->translate->seq."\n"; } =head2 Translation_info Arg [1] : Bio::EnsEMBL::Transcript Arg [2] : string, indent Function : Returntype: returns a string of translation start exon, start, end exon, end, cdna length Exceptions: none Example : =cut sub Translation_info{ my ($transcript, $indent) = @_; $indent = '' if(!$indent); my $translation = $transcript->translation; my $id = id($translation); my $start_id = id($translation->start_Exon); my $end_id = id($translation->end_Exon); my $cdna_length = length($transcript->translateable_seq); return $indent."TRANSLATION: ".$id." ".$start_id." ". $translation->start." ".$end_id." ".$translation->end." ". $cdna_length." "; } =head2 print_Translation_genomic_coords Arg [1] : Bio::EnsEMBL::Transcript Arg [2] : string, indent Function : print the genomic coordinates of the translation Returntype: n/a Exceptions: none Example : print_Translation_genomic_coords($transcript); =cut sub print_Translation_genomic_coords{ my ($transcript, $indent) = @_; print $indent."genomic coords ". $transcript->coding_region_start." ". $transcript->coding_region_end."\n"; } =head2 starts_with_met Arg [1] : Bio::EnsEMBL::Transcript Function : checks if peptide starts with a methoinine Returntype: boolean Exceptions: none Example : =cut sub starts_with_met{ my ($transcript) = @_; my $pep = $transcript->translate->seq; return 1 if($pep =~ /^M/); return 0; } =head2 ends_with_stop Arg [1] : Bio::EnsEMBL::Transcript Function : checks if transcript ends with a stop codon Returntype: boolean Exceptions: none Example : =cut sub ends_with_stop{ my ($transcript) = @_; my $cdna_seq = uc($transcript->translateable_seq); return 1 if($cdna_seq =~ /(TAA|TGA|TAG)$/); return 0; } =head2 contains_internal_stops Arg [1] : Bio::EnsEMBL::Transcript Function : counts how many internal stops there are Returntype: int Exceptions: none Example : =cut sub contains_internal_stops{ my ($transcript) = @_; my $pep = $transcript->translate->seq; my $num = $pep =~ /\*/; logger_info(Translation_info($transcript)." contains internal stops ".$pep) if($num); return $num; } =head2 clone_Translation Arg [1] : Bio::EnsEMBL::Transcript Arg [2] : Bio::EnsEMBL::Transcript Function : copies the translation of transcript 1 to transcript2 Returntype: Exceptions: Example : =cut sub clone_Translation{ my ($transcript, $newtranscript, $clone_xrefs) = @_; $clone_xrefs = 1 if(!defined($clone_xrefs)); #print "\n**CLONING TRANSLATION**\n\n"; my $newtranslation = Bio::EnsEMBL::Translation->new(); my $translation = $transcript->translation; my %new_exons; foreach my $exon(@{$newtranscript->get_all_Exons}){ my $id_string = $exon->start."-".$exon->end."-". $exon->strand; #print "Adding ".$id_string." exon to hash\n"; $new_exons{$id_string} = $exon; } my $start_exon = $translation->start_Exon; my $old_start_id_string = $start_exon->start."-". $start_exon->end."-".$start_exon->strand; my $new_start_Exon = $new_exons{$old_start_id_string}; throw("Failed to find exon ".$old_start_id_string." for ". id($transcript)) if(!$new_start_Exon); $newtranslation->start_Exon($new_start_Exon); $newtranslation->start($translation->start); my $end_exon = $translation->end_Exon; my $old_end_id_string = $end_exon->start."-". $end_exon->end."-".$end_exon->strand; my $new_end_Exon = $new_exons{$old_end_id_string}; throw($old_end_id_string." failed to get exon") if(!$new_end_Exon); $newtranslation->end_Exon($new_end_Exon); $newtranslation->end($translation->end); my $attribs = $translation->get_all_Attributes(); $newtranslation->add_Attributes(@$attribs); $newtranslation->dbID($translation->dbID); $newtranslation->stable_id($translation->stable_id); $newtranslation->version($translation->version); $newtranslation->created_date($translation->created_date); $newtranslation->modified_date($translation->modified_date); if ($clone_xrefs){ foreach my $DBEntry (@{$translation->get_all_DBEntries}){ $newtranslation->add_DBEntry($DBEntry); } } return $newtranslation; #print "\n\n**\n\n"; } =head2 return_translation Arg [1] : Bio::EnsEMBL::Transcript Function : calculates translation for transcript Returntype: Bio::EnsEMBL::Translation Exceptions: Example : =cut sub return_translation{ my ($trans) = @_; $trans = compute_translation($trans); return $trans->translation; } =head2 compute_translation Arg [1] : Bio::EnsEMBL::Transcript Function : run run_translate and give transcript new translation based on those results Returntype: Bio::EnsEMBL::Transcript Exceptions: Warns in unable to create translation Example : =cut sub compute_translation{ my ($transcript) = @_; my @met_predictions = @{run_translate ($transcript, 1)}; my @nomet_predictions = @{run_translate ($transcript)}; my $orf; if(@met_predictions && @nomet_predictions){ my $met_best = $met_predictions[0]; my $no_met_best = $nomet_predictions[0]; if($no_met_best->[0] > (2*$met_best->[0])){ $orf = $no_met_best; }else{ $orf = $met_best; } }elsif(@met_predictions){ $orf = $met_predictions[0]; }elsif(@nomet_predictions){ $orf = $nomet_predictions[0]; }else{ warning(id($transcript)." has no translations "); return $transcript; } #Here we take the best prediction with a methionine unless #there aren't any of the best prediction without a #methoinine is more than twice the length my $orf_start = $orf->[1]; my $orf_end = $orf->[2]; my $translation = Bio::EnsEMBL::Translation->new(); logger_info("Best orf for ".id($transcript)." ".$orf->[0]. " long ".$orf_start." to ".$orf_end); my ($translation_start, $translation_end, $translation_start_Exon, $translation_end_Exon); my $exon_count = 0; my $pos = 1; foreach my $exon(@{$transcript->get_all_Exons}){ $exon_count++; logger_info("exon:$exon_count exon_length:".$exon->length." pos:$pos orf_start:$orf_start orf_end:$orf_end pos+:".($pos + $exon->length - 1)); if ( $orf_start >= $pos && $orf_start <= $pos + $exon->length - 1 ){ $translation_start = $orf_start - $pos+1; $translation_start_Exon = $exon; } if($orf_end >= $pos && $orf_end <= $pos + $exon->length - 1){ $translation_end = $orf_end - $pos + 1; $translation_end_Exon = $exon; } $pos += $exon->length; } if(!$translation_start || !$translation_end || !$translation_start_Exon || !$translation_end_Exon){ warning("problems making the translation ". "for ".id($transcript)); return $transcript; }else{ $translation->start($translation_start); $translation->end($translation_end); $translation->start_Exon($translation_start_Exon); $translation->end_Exon($translation_end_Exon); $transcript->translation($translation); } my $found_start = 0; my $found_end = 0; my $last_end_phase; my $first_exon = 1; # print "Setting phases on transcript after adding translation\n"; foreach my $exon(@{$transcript->get_all_Exons}){ $exon->phase(-1); $exon->end_phase(-1); # print " Have exon " . $exon->start . " " . $exon->end . "\n"; if($translation->start_Exon == $exon){ if($translation->start == 1 && $first_exon){ $exon->phase(0); # print " setting start phase on it to 0 (tstart = 1 and is start_Exon)\n"; } $found_start = 1; }elsif($found_start and not $found_end){ $exon->phase($last_end_phase); # print " setting start phase on it to last_end_phase ($last_end_phase)\n"; } my $end_phase; if($exon == $translation->start_Exon){ $end_phase = ($exon->end - ($exon->start + $translation->start - 1) +1 ) %3; # print " start_Exon end phase calculated ($end_phase)\n"; }else{ $end_phase = (($exon->length + $exon->phase) %3); # print " end phase calculated ($end_phase)\n"; } if(($exon == $translation->end_Exon && $exon->length == $translation->end)){ # print " setting end phase to $end_phase (end exon condition)\n"; $exon->end_phase($end_phase); } $found_end = 1 if($exon == $translation->end_Exon); if (($found_start and !$found_end)) { # print " setting end phase to $end_phase (found_start and not found_end condition)\n"; $exon->end_phase($end_phase); } $last_end_phase = $exon->end_phase; $first_exon = 0; } return $transcript; } =head2 run_translate Arg [1] : Bio::EnsEMBL::Transcript Arg [2] : boolean, do want predictions with met or not Function : run the program translate and find coordinates of translations in transcripts cdna Returntype: arrayref of an array or arrays, each element containing an array of length, start and end Exceptions: Example : =cut sub run_translate{ my ($trans,$met) = @_; my $trans_id = id($trans); my $seq = $trans->seq; $seq->display_id($trans_id); my $file = write_seqfile($seq); my $command = "/usr/local/ensembl/bin/translate"; $command .= " -m " if($met); $command .= " ".$file." | "; logger_info($command); open ( ORF, $command ) || throw( "Error running translate" ); my @orf_predictions; ORF: while ( <ORF> ){ chomp; next ORF unless /\>/; my @entries = split; next ORF unless ( $entries[3] && $entries[5] ); my $id = $entries[1]; my $orf_length = $entries[3]; $orf_length =~s/\,//; $entries[5] =~/(\d+)\.\.(\d+)/; my $orf_start = $1; my $orf_end = $2; if ($orf_start>=$orf_end ) { # print "can't compute translation for this transcript as translation start >= translation end : $orf_start >= $orf_end \n " ; next ORF ; } # Check if there's a stop codon and add it if ($orf_end + 3 <= $seq->length) { my $codon = uc($seq->subseq($orf_end+1,$orf_end+3)); if ($codon eq 'TAA' || $codon eq 'TAG' || $codon eq 'TGA' ) { $orf_end+=3; } } my @prediction = ($orf_length,$orf_start,$orf_end); push( @orf_predictions, \@prediction ); } my @sorted_predictions = map { $_->[1] } sort { $b->[0] <=> $a->[0] } map { [$_->[0], $_] } @orf_predictions; unlink $file; return \@sorted_predictions; } =head2 validate_Translation_coords Arg [1] : Bio::EnsEMBL::Transcript Arg [2] : (optional) boolean, allow negative start coordinates Function : check coordinates of translation of transcript Returntype: boolean, 1 if translation is ok =cut sub validate_Translation_coords{ my ($transcript, $allow_negative_start) = @_; my $translation = $transcript->translation; if(!$allow_negative_start && ($translation->start < 1)){ warning(Translation_info($transcript)." has a start ".$translation->start. " less than 1"); return 0; } if($translation->end < 0 || $translation->end > $translation->end_Exon->length){ warning(Translation_info($transcript)." has an end ".$translation->end. " less than 1 or greater than ".$translation->end_Exon->length); return 0; } return 1; } 1;