Raw content of FlyBaseToolbox #!/usr/local/ensembl/bin/perl -w package FlyBaseToolbox; use strict; use warnings; use Bio::EnsEMBL::Utils::Argument qw(rearrange); use Bio::EnsEMBL::Utils::Exception qw(throw warning deprecate verbose); use Bio::EnsEMBL::SimpleFeature; use Bio::EnsEMBL::Transcript; use Bio::EnsEMBL::Translation; use Bio::EnsEMBL::Exon; use Bio::EnsEMBL::Gene; use Exporter; our(@EXPORT,@ISA); @ISA = ('Exporter'); @EXPORT =qw(add_TranslationObject convert_to_exon_coords setExonPhases); $|=1; # alternative tools / methods ############################# sub add_TranslationObject{ my ($cds_id,$nw_transcript,$cds_start,$cds_end,$cds_strand) = @_; my $seq_start = -1; my $seq_end = -1 ; my ($first_coding_exon, $last_coding_exon); # getting first and last coding exon ################################################# for my $ex (@{$nw_transcript->get_all_Exons()}) { # forward strand #################################### if ($nw_transcript->strand == 1) { if ($ex->start <= $cds_start && $cds_start <= $ex->end) { # EXON CONTAINS TSS, coord-conversion $seq_start = convert_to_exon_coords($cds_start, $ex->start,$ex->end,$ex->strand ); $first_coding_exon = $ex; # set start-phase of first coding exon $first_coding_exon->phase("0"); print "set first coding exon ".$ex->stable_id." 1\n"; } if ($ex->start <= $cds_end && $cds_end <= $ex->end) { # EXON CONTAINS TES, coord-conversion and add the stop-codon (3bp.) to the seq-end $seq_end = convert_to_exon_coords($cds_end, $ex->start,$ex->end,$ex->strand ) + 3 ; $last_coding_exon = $ex; print "set last coding exon ".$ex->stable_id." 1\n"; } } # reverse strand ##################################### elsif ($nw_transcript->strand == -1) { if ( ($ex->start <= $cds_end) && ($cds_end <= $ex->end)) { $first_coding_exon = $ex; $seq_start = ($ex->end - $cds_end) + 1 ; print "set first coding exon ".$ex->stable_id." -1\n"; } if ( ($ex->start <= $cds_start) && ($cds_start <= $ex->end)) { $last_coding_exon = $ex; $seq_end = ($last_coding_exon->end - $cds_start)+ 1 + 3 ; print "set last coding exon ".$ex->stable_id." -1\n"; } } else { throw("strand of transcript is not '1' or '-1'."); } } my $tl = Bio::EnsEMBL::Translation->new( -START_EXON => $first_coding_exon, -END_EXON => $last_coding_exon, -SEQ_START => $seq_start, -SEQ_END => $seq_end, -STABLE_ID => $cds_id, -VERSION => "3", ); # add translation $nw_transcript->translation($tl); print "first ".$first_coding_exon->stable_id." \n"; print "last ".$last_coding_exon->stable_id." \n"; # set end-phase for case if single exon contains 2 UTR-regions at end throw("There is a CDS-Start or CDS-End, but I can't find an exon which spans these points\n") unless ($first_coding_exon && $last_coding_exon ); if ($first_coding_exon eq $last_coding_exon) { my $tl_end = $nw_transcript->translation->end; if ($first_coding_exon->length - $tl_end > 0) { # single-exon gene with UTR $first_coding_exon->end_phase("-1"); } else { my $endphase = ($first_coding_exon->length - $seq_start) % 3 ; $first_coding_exon->end_phase($endphase); } } # consistency checks throw("There seems to be no cds-region-start in CDS-ID $cds_id\n") if ($seq_start eq "-1"); throw("There seems to be no cds-region-end in CDS-ID $cds_id\n") if ($seq_end eq "-1") ; return $nw_transcript; } sub setExonPhases{ my ( $self,$transcript) = @_; my $fce = $transcript->translation->start_Exon; my $lce = $transcript->translation->end_Exon; my @all_exons = @{$transcript->get_all_Exons}; my $end_phase; for my $exon (@all_exons) { ##### start processing forward strand ##### if ($exon->strand == 1 ) { if ($exon->start < $fce->start && $exon->end < $fce->end) { # 5'-complete UTR $exon->phase("-1"); $exon->end_phase("-1"); } if ($exon->start >= $fce->start && $exon->end <=$lce->end) { # exon is fce if ($exon eq $fce) { # set startphase of first exon $exon->phase( 0 ) ; # set end-phase $end_phase = ($exon->length - $transcript->translation->start + 1 )%3; $exon->end_phase($end_phase); } # exon is middle exon if ( $exon ne $fce && $exon ne $lce) { $exon->phase($end_phase); $end_phase =( $exon->phase + $exon->length ) % 3; $exon->end_phase($end_phase); } # exon is lce if ($exon eq $lce) { # only change phase if single-exon-gene if ($exon ne $fce) { $exon->phase($end_phase); } # check if exon contains UTR-tailing end if ( ($exon->length - $transcript->translation->end ) == 0) { $exon->end_phase( ($end_phase + $exon->length) % 3); } else { # 3'UTR $exon->end_phase("-1"); } } } if ($exon->start > $lce->end) { # 3'-complete UTR $exon->end_phase("-1"); $exon->phase("-1"); } } else { ##### start processing reverse strand ##### # first coding exon if ($exon eq $fce) { $exon->phase( 0 ) ; $end_phase = ($exon->length - ($transcript->translation->start -1) ) % 3; $exon->end_phase($end_phase); } # middle exon if ($exon ne $fce && $exon ne $lce) { $exon->phase($end_phase); $end_phase = ($exon->phase + $exon->length) % 3; $exon->end_phase($end_phase); } # exon is lce if ($exon eq $lce) { if ($exon ne $fce) { $exon->phase($end_phase); } # check if exon contains UTR-tailing end if ( ($exon->length - $transcript->translation->end ) == 0) { $exon->end_phase( ($end_phase + $exon->length) % 3); } else { # exon has 3' UTR-end $exon->end_phase("-1"); } } # now the UTR's if ($exon->start > $fce->start) { # 5' UTR $exon->phase("-1"); $exon->end_phase("-1"); } # 3'UTR if ($exon->start < $lce->start) { $exon->phase("-1"); $exon->end_phase("-1"); } } #end REVSTRAND } #allexons return $transcript; } #=pod #=head3 alternative methods #=head2 add_DBEntry # Usage : adds a DBEntry to the object # Function : # Return-Value: #=cut #sub add_DBEntry { # my ($self,$obj) = @_; # my $dbentry = new Bio::EnsEMBL::DBEntry( # -adaptor => $adaptor, # -primary_id => $pid, # -dbname => $dbname, # -release => $release, # -display_id => $did, # -description => $description # ); #} =pod =head2 convert_to_exon_coords Usage : convert_to_exon_coords ($cds_coord, $exon_start, $exon_end ) Function : checks if the cds-coordiante (start or end) lies in an exon Return-Value: returns the coordiante of the TSS in exon_coordiantes if given coordinate is inside the exon, and "-1" if cds_coord is not =cut sub convert_to_exon_coords{ my ( $cds_coord, $exon_start, $exon_end, $exon_strand ) = @_; my $cds_coord_converted = "-1"; warn_inconsistency("Exon-start-coord >= exon_end_coord: $exon_start\t$exon_end\n") if ($exon_start >= $exon_end); # check if cds-coordinate is inside exon if ($cds_coord >= $exon_start) { if ($cds_coord <= $exon_end) { # cds_coord is in exon, convert coordiantes to exon-coords if ($exon_strand eq "+1") { $cds_coord_converted = $cds_coord - $exon_start +1 ; } elsif ( $exon_strand eq "-1") { # on rev-strand, $exon_end is the start and $exon_start is end $cds_coord_converted = $cds_coord - $exon_start +1 ; } else { # exon is on unkown strand... what now ? warn_inconsistency("Exon is on unknown strand, coords: $exon_start:$exon_end:$exon_strand\n"); $cds_coord_converted =-1; } } } throw ("coord-error\n") if ($cds_coord_converted eq "0"); return $cds_coord_converted; }