Raw content of Deletion use strict; use warnings; package Deletion; use InterimExon; use StatMsg; use Length; use Utils qw(print_exon); use Bio::EnsEMBL::Utils::Exception qw(throw info); ############################################################################### # process_delete # # processes a deletion in an exon ############################################################################### sub process_delete { my $cdna_del_pos_ref = shift; my $del_len = shift; my $exon = shift; my $transcript = shift; my $entire_delete = shift; my $del_start = $$cdna_del_pos_ref + 1; my $del_end = $del_start + $del_len - 1; info((($entire_delete) ? 'entire ' : '')."delete ($del_len) at " . $$cdna_del_pos_ref); info("BEFORE cds: ". $transcript->cdna_coding_start().'-'. $transcript->cdna_coding_end()); info("BEFORE del_start = $del_start"); # sanity check, deletion should be completely in # or adjacent to exon boundaries if(!$entire_delete && ($del_start < $exon->cdna_start() - 1 || $del_start > $exon->cdna_end() + 1)) { throw("Unexpected: deletion is outside of exon boundary\n" . " del_start = $del_start\n" . " cdna_exon_start =". $exon->cdna_start() . " cdna_exon_end =". $exon->cdna_end()); } # break delete into composite parts and deal with each part seperately # # deal with five prime UTR portion of delete # if($del_start < $transcript->cdna_coding_start()) { my $utr_del_len; if($del_end >= $transcript->cdna_coding_start()) { $utr_del_len = $transcript->cdna_coding_start() - $del_start; } else { $utr_del_len = $del_len; } process_five_prime_utr_delete($cdna_del_pos_ref, $utr_del_len, $exon, $transcript); # take away the processed part of the deletion $del_start = $$cdna_del_pos_ref + 1; $del_len -= $utr_del_len; $del_end = $del_start + $del_len - 1; } if($del_len == 0) { # no deletion left $exon->fix_phase($transcript) if(!$entire_delete); return; } # # deal with CDS portion of delete # if($del_end >= $transcript->cdna_coding_start() && $del_start <= $transcript->cdna_coding_end()) { my $cds_del_len; if($del_end > $transcript->cdna_coding_end()) { $cds_del_len = $transcript->cdna_coding_end() - $del_start + 1; } else { $cds_del_len = $del_len; } process_cds_delete($cdna_del_pos_ref, $cds_del_len, $exon, $transcript, $entire_delete); # take away the processed part of the deletion # the cdna start is in the same place because $del_start += $$cdna_del_pos_ref + 1; $del_len -= $cds_del_len; $del_end = $del_start + $del_len - 1; } if($del_len == 0) { # no deletion left $exon->fix_phase($transcript) if(!$entire_delete);; return; } # # deal with 3prime portion of delete # # sanity check: if($del_start <= $transcript->cdna_coding_end()) { throw("Unexpected. 3' UTR delete starts before coding end."); } process_three_prime_utr_delete($cdna_del_pos_ref, $del_len, $exon, $transcript); $exon->fix_phase($transcript) if(!$entire_delete); return; } ############################################################################### # process_five_prime_utr_delete # # processes a deletion in the five prime utr of a transcript ############################################################################### sub process_five_prime_utr_delete { my $cdna_del_pos_ref = shift; my $del_len = shift; my $exon = shift; my $transcript = shift; info("delete ($del_len) in 5' utr"); # shift up the CDS $transcript->move_cdna_coding_start(-$del_len); $transcript->move_cdna_coding_end(-$del_len); # create a status message and add it to the exon my $code = StatMsg::EXON | StatMsg::DELETE | StatMsg::FIVE_PRIME | StatMsg::UTR | Length::length2code($del_len); $exon->add_StatMsg(StatMsg->new($code)); return; } ############################################################################### # process_three_prime_utr_delete # # processes a deletion in the three prime utr of a transcript ############################################################################### sub process_three_prime_utr_delete { my $cdna_del_pos_ref = shift; my $del_len = shift; my $exon = shift; my $transcript = shift; #do not have to do anything... info("delete ($del_len) in 3' utr"); # create a status message and add it to the exon my $code = StatMsg::EXON | StatMsg::DELETE | StatMsg::THREE_PRIME | StatMsg::UTR | Length::length2code($del_len); $exon->add_StatMsg(StatMsg->new($code)); return; } ############################################################################### # process_cds_delete # # processes a deletion in the cds of a transcript ############################################################################### sub process_cds_delete { my $cdna_del_pos_ref = shift; my $del_len = shift; my $exon = shift; my $transcript = shift; my $entire_delete = shift; info("delete ($del_len) in cds"); my $del_start = $$cdna_del_pos_ref + 1; my $del_end = $del_start + $del_len - 1; my $code = StatMsg::EXON | StatMsg::DELETE | StatMsg::CDS | Length::length2code($del_len); my $frameshift = $del_len % 3; # # case 1: delete is all of CDS # if($del_start == $transcript->cdna_coding_start() && $del_end == $transcript->cdna_coding_end()) { info("delete ($del_len) is all of cds"); $code |= StatMsg::ENTIRE; # move up CDS end to account for CDS deletion $transcript->move_cdna_coding_end(-$del_len); } # # case 2: delete is at start of CDS # elsif($del_start == $transcript->cdna_coding_start()) { info("delete ($del_len) at start of cds"); $code |= StatMsg::FIVE_PRIME; # move up CDS end to account for CDS deletion $transcript->move_cdna_coding_end(-$del_len); if($frameshift) { $code |= StatMsg::FRAMESHIFT if($frameshift); # move down CDS start to put reading frame back (shrink CDS) info("shifting cds start to restore reading frame"); $transcript->move_cdna_coding_start(3 - $frameshift); } } # # case 3: delete is at end of CDS # elsif($del_end == $transcript->cdna_coding_end()) { info("delete ($del_len) at end of cds"); $code |= StatMsg::THREE_PRIME; # move up CDS end to account for CDS deletion $transcript->move_cdna_coding_end(-$del_len); if($frameshift) { $code |= StatMsg::FRAMESHIFT if($frameshift); # move up CDS end to put reading frame back (shrink CDS) info("shifting cds end to restore reading frame"); $transcript->move_cdna_coding_end($frameshift-3); } } # # case 4: delete is in middle of CDS # elsif($del_end > $transcript->cdna_coding_start() && $del_start < $transcript->cdna_coding_end()) { info("delete ($del_len) in middle of cds"); $code |= StatMsg::MIDDLE; # move up CDS end to account for CDS deletion $transcript->move_cdna_coding_end(-$del_len); if($frameshift && !$entire_delete) { info("BEFORE CDS DELETE:"); print_exon($exon, $transcript); $code |= StatMsg::FRAMESHIFT if($frameshift); # this is going to require splitting the exon # to make a frameshift deletion #first exon is going to end right before deletion my $first_len = $del_start - $exon->cdna_start(); my $intron_len = 3 - $frameshift; #reduce the length of the CDS by the length of the new intron $transcript->move_cdna_coding_end(-$intron_len); # the next match that is added to the cdna position will have too much # sequence because we used part of the sequence to create the frameshift # intron, compensate by reducing cdna position by intron len $$cdna_del_pos_ref -= $intron_len; info("introducing frameshift intron ($intron_len) " . "to maintain reading frame"); # very short exons can be entirely consumed by the intron if($intron_len == $exon->length()) { # still adjust this 0 length intron, because its length # is used in transcript splitting calculations $exon->cdna_end($exon->cdna_end - $intron_len); if($exon->strand() == 1) { $exon->end($exon->end - $intron_len); } else { $exon->start($exon->start + $intron_len); } $code |= StatMsg::ALL_INTRON; $exon->fail(1); } elsif($intron_len > $exon->length()) { $code |= StatMsg::CONFUSED | StatMsg::ALL_INTRON; # still adjust this negative length exon b/c its length is used # in transcript splitting calculations $exon->cdna_end($exon->cdna_end - $intron_len); if($exon->strand() == 1) { $exon->end($exon->end - $intron_len); } else { $exon->start($exon->start + $intron_len); } $exon->fail(1); } elsif($first_len + $intron_len >= $exon->length()) { # we may have encountered a delete at the very end of the exon # in this case we have to take the intron out of the end of this exon # since we are not creating a second one if($exon->strand() == 1) { $exon->end($exon->end - $intron_len); } else { $exon->start($exon->start + $intron_len); } $exon->cdna_end($exon->cdna_end - $intron_len); } else { # second exon is going to start right after 'frameshift intron' if($exon->strand == 1) { # end the current exon at the beginning of the deletion # watch out though, because we may be at the very beginning of # the exon in which case we do not want to create one if($first_len) { my $first_exon = InterimExon->new(); # Copy the original exon and adjust the coords as necessary # Note that these exons will share stat msgs which is what # we want. %{$first_exon} = %{$exon}; $first_exon->cdna_end($exon->cdna_start() + $first_len - 1); $first_exon->end($first_exon->start() + $first_len - 1); $transcript->add_Exon($first_exon); info("FIRST EXON:"); info($first_exon, $transcript); $exon->add_StatMsg(StatMsg->new(StatMsg::EXON | StatMsg::SPLIT)); $exon->cdna_start($first_exon->cdna_end() + 1); $first_exon->set_split_phases($exon, $transcript); } # start next exon after new intron $exon->start($exon->start() + $first_len + $intron_len); $exon->cdna_end($exon->cdna_end - $intron_len); } else { if($first_len) { my $first_exon = InterimExon->new(); # copy the original exon and adjust the coords as necessary # these exons will share stat msgs %{$first_exon} = %{$exon}; $first_exon->cdna_end($exon->cdna_start + $first_len - 1); $first_exon->start($exon->end() - $first_len + 1); $transcript->add_Exon($first_exon); info("FIRST EXON:"); print_exon($first_exon, $transcript); $exon->add_StatMsg(StatMsg->new(StatMsg::EXON | StatMsg::SPLIT)); $exon->cdna_start($first_exon->cdna_end() + 1); $first_exon->set_split_phases($exon, $transcript); } # start next exon after new intron $exon->end($exon->end() - ($first_len + $intron_len)); $exon->cdna_end($exon->cdna_end - $intron_len); } } info("AFTER CDS DELETE:"); print_exon($exon, $transcript); } } # sanity check: else { throw("Unexpected: CDS delete appears to be outside of CDS:\n" . " del_start = $del_start\n". " del_end = $del_end\n" . " cdna_coding_start = ".$transcript->cdna_coding_start() . "\n". " cdna_coding_end = ".$transcript->cdna_coding_end() . "\n"); } $exon->add_StatMsg(StatMsg->new($code)); return; } 1;