Raw content of Bio::LiveSeq::Translation # $Id: Translation.pm,v 1.12 2002/09/25 08:57:52 heikki Exp $ # # bioperl module for Bio::LiveSeq::Translation # # Cared for by Joseph Insana <insana@ebi.ac.uk> <jinsana@gmx.net> # # Copyright Joseph Insana # # You may distribute this module under the same terms as perl itself # # POD documentation - main docs before the code =head1 NAME Bio::LiveSeq::Translation - Translation class for LiveSeq =head1 SYNOPSIS #documentation needed =head1 DESCRIPTION This stores informations about aminoacids translations of transcripts. The implementation is that a Translation object is the translation of a Transcript object, with different possibilities of manipulation, different coordinate system and eventually its own ranges (protein domains). =head1 AUTHOR - Joseph A.L. Insana Email: Insana@ebi.ac.uk, jinsana@gmx.net Address: EMBL Outstation, European Bioinformatics Institute Wellcome Trust Genome Campus, Hinxton Cambs. CB10 1SD, United Kingdom =head1 APPENDIX The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _ =cut # Let the code begin... package Bio::LiveSeq::Translation; $VERSION=1.8; # Version history: # Thu Mar 23 14:41:52 GMT 2000 v.1.0 begun # Sat Mar 25 04:08:59 GMT 2000 v 1.2 valid(), label(), position() # Tue Mar 28 03:37:17 BST 2000 v 1.3 added inheritance from Transcript, subseq relies on it! # Fri Mar 31 16:53:53 BST 2000 v 1.4 new seq() function that checks for stop codons: it now returns only up to the stop but doesn't continue if stop not found # Fri Mar 31 18:45:07 BST 2000 v 1.41 now it asks for Transcript->downstream_seq # Fri Mar 31 19:20:04 BST 2000 v 1.49 seq() now works correctly # Thu Apr 13 00:10:29 BST 2000 v 1.5 start and end now take the information from Transcript # Thu Apr 27 16:18:55 BST 2000 v 1.6 translation_table info added # Thu May 11 17:30:41 BST 2000 v 1.66 position method updated so to return a position also for labels not in frame (not at 1st triplet position) # Mon May 22 14:59:14 BST 2000 v 1.7 labelsubseq added # Mon May 22 15:22:12 BST 2000 v 1.71 labelsubseq tweaked for cases where startlabel==endlabel (no useless follow() query!) # Mon May 22 15:28:49 BST 2000 v 1.74 modified seq() so that the "*" is printed # Wed Jun 7 04:02:18 BST 2000 v 1.75 added offset() # Thu Jun 29 15:10:22 BST 2000 v 1.76 bug corrected for elongation mutations, if stop codon is not found downstream # Wed Mar 28 16:37:37 BST 2001 v 1.8 carp -> warn,throw (coded methods in SeqI) use strict; #use Carp qw(croak carp cluck); use vars qw($VERSION @ISA); use Bio::LiveSeq::SeqI 3.2; # uses SeqI, inherits from it use Bio::PrimarySeq; @ISA=qw(Bio::LiveSeq::Transcript ); =head2 new Title : new Usage : $protein = Bio::LiveSeq::Translation->new(-transcript => $transcr); Function: generates a new Bio::LiveSeq::Translation Returns : reference to a new object of class Translation Errorcode -1 Args : reference to an object of class Transcript =cut sub new { my ($thing, %args) = @_; my $class = ref($thing) || $thing; my ($obj,%translation); my $transcript=$args{-transcript}; $obj = \%translation; $obj = bless $obj, $class; unless ($transcript) { $obj->throw("$class not initialised because no -transcript given"); } unless (ref($transcript) eq "Bio::LiveSeq::Transcript") { $obj->throw("$class not initialised because no object of class Transcript given"); } #my $startbase = $transcript->start; #my $endbase = $transcript->end; my $strand = $transcript->strand; my $seq = $transcript->{'seq'}; $obj->{'strand'}=$strand; $obj->{'seq'}=$seq; $obj->{'transcript'}=$transcript; $obj->{'alphabet'}="protein"; $transcript->{'translation'}=$obj;# set the Translation ref into its Transcript return $obj; } =head2 get_Transcript Title : valid Usage : $transcript = $obj->get_Transcript() Function: retrieves the reference to the object of class Transcript (if any) attached to a LiveSeq object Returns : object reference Args : none =cut sub get_Transcript { my $self=shift; return ($self->{'transcript'}); } # These get redefined here, overriding the SeqI ones sub change { my ($self)=@_; $self->warn("Cannot change a Translation object!\nChanges have to be issued at the nucleotide level!"); return (-1); } sub positionchange { my ($self)=@_; $self->warn("Cannot change a Translation object!\nChanges have to be issued at the nucleotide level!"); return (-1); } sub labelchange { my ($self)=@_; $self->warn("Cannot change a Translation object!\nChanges have to be issued at the nucleotide level!"); return (-1); } # this just returns the translation of the transcript, without checking for # stop codons sub transl_seq { my $self=shift; my $transcript=$self->get_Transcript; my $translation=$transcript->translate(undef, undef, undef, $self->translation_table)->seq; return $translation; } # version 1.74 -> now the "*" is printed sub seq { my $self=shift; my $proteinseq; my $transcript=$self->get_Transcript; my $translation=$transcript->translate(undef, undef, undef, $self->translation_table)->seq; my $stop_pos=index($translation,"*"); if ($stop_pos == -1) { # no stop present, continue downstream my $downstreamseq=$transcript->downstream_seq(); #carp "the downstream is: $downstreamseq"; # debug my $cdnaseq=$transcript->seq(); my $extendedseq = new Bio::PrimarySeq(-seq => "$cdnaseq$downstreamseq", -alphabet => 'dna' ); $translation=$extendedseq->translate(undef, undef, undef, $self->translation_table)->seq; #carp "the new translation is: $translation"; # debug $stop_pos=index($translation,"*"); if ($stop_pos == -1) { # still no stop present, return warning $self->warn("Warning: no stop codon found in the retrieved sequence downstream of Transcript ",1); undef $stop_pos; $proteinseq=$translation; } else { $proteinseq=substr($translation,0,$stop_pos+1); #carp "the new stopped translation is: $proteinseq, because the stop is at position $stop_pos"; # debug } } else { $proteinseq=substr($translation,0,$stop_pos+1); } return $proteinseq; } sub length { my $self=shift; my $seq=$self->seq; my $length=length($seq); return $length; } sub all_labels { my $self=shift; return $self->get_Transcript->all_labels; } # counts in triplet. Only a label matching the beginning of a triplet coding # for an aminoacid is considered valid when setting coordinate_start # (i.e. only in frame!) sub valid { my ($self,$label)=@_; my $i; my @labels=$self->get_Transcript->all_labels; my $length=$#labels; while ($i <= $length) { if ($label == $labels[$i]) { return (1); # found } $i=$i+3; } return (0); # not found } # returns the label to the first nucleotide of the triplet coding for $position aminoacid sub label { my ($self,$position)=@_; my $firstlabel=$self->coordinate_start; # this is in_frame checked if ($position > 0) { $position=$position*3-2; } else { # if position = 0 this will be caught by Transcript, error thrown $position=$position*3; } return $self->get_Transcript->label($position,$firstlabel); # check for coord_start different } # returns position (aminoacids numbering) of a particular label # used to return 0 for not in frame labels # now returns the position anyway (after version 1.66) sub position { my ($self,$label)=@_; my $firstlabel=$self->coordinate_start; # this is in_frame checked my $position=$self->get_Transcript->position($label,$firstlabel); use integer; my $modulus=$position % 3; if ($position == 0) { return (0); } elsif ($position > 0) { if ($modulus != 1) { $self->warn("Attention! Label $label is not in frame ". "(1st position of triplet) with protein",1) if $self->verbose > 0; # ignorable if ($modulus == 2) { return ($position / 3 + 1); } else { # i.e. modulus == 0 return ($position / 3); } } return ($position / 3 + 1); } else { # pos < 0 if ($modulus != 0) { $self->warn("Attention! Label $label is not in frame ". "(1st position of triplet) with protein",1) if $self->verbose > 0; # ignorable return ($position / 3 - 1); # ok for both other positions } return ($position / 3); } $self->throw( "WEIRD: execution shouldn't have reached here"); return (0); # this should never happen, but just in case } # note: it inherits subseq and labelsubseq from Transcript! sub start { my $self=shift; return ($self->{'transcript'}->start); } sub end { my $self=shift; return ($self->{'transcript'}->end); } =head2 aa_ranges Title : aa_ranges Usage : @proteinfeatures = $translation->aa_ranges() Function: to retrieve all the LiveSeq AARange objects attached to a Translation, usually created out of a SwissProt database entry crossreferenced from an EMBL CDS feature. Returns : an array Args : none =cut # returns an array of obj_ref of AARange objects attached to the Translation sub aa_ranges { my $self=shift; return ($self->{'aa_ranges'}); } sub translation_table { my $self=shift; $self->get_Transcript->translation_table(@_); } # returns all aminoacids "affected" i.e. all aminoacids coded by any codon # "touched" by the range selected between the labels, even if only partially. # it's not optimized for performance but it's useful sub labelsubseq { my ($self,$start,$length,$end)=@_; my ($pos1,$pos2); my $transcript=$self->get_Transcript; if ($start) { unless ($transcript->valid($start)) { $self->warn("Start label not valid"); return (-1); } $pos1=$self->position($start); } if ($end) { if ($end == $start) { $length=1; } else { unless ($transcript->valid($end)) { $self->warn("End label not valid"); return (-1); } unless ($transcript->follows($start,$end) == 1) { $self->warn("End label does not follow Start label!"); return (-1); } $pos2=$self->position($end); $length=$pos2-$pos1+1; } } my $sequence=$self->seq; return (substr($sequence,$pos1-1,$length)); } # return the offset in aminoacids from LiveSeq protein sequence and SwissProt # sequence (usually as a result of an INIT_MET or a gap) sub offset { my $self=shift; return ($self->{'offset'}); } 1;