Raw content of Insertion
use strict;
use warnings;
package Insertion;
use InterimExon;
use Length;
use StatMsg;
use Bio::EnsEMBL::Utils::Exception qw(throw info);
use Utils qw(print_exon);
sub process_insert {
my $cdna_ins_pos_ref = shift; #basepair to left of insert
my $insert_len = shift;
my $exon = shift;
my $transcript = shift;
info("insert ($insert_len) at " .$$cdna_ins_pos_ref);
my $code = StatMsg::EXON | StatMsg::INSERT |
Length::length2code($insert_len);
# sanity check, insert should be completely in exon boundaries
if($$cdna_ins_pos_ref < $exon->cdna_start() ||
$$cdna_ins_pos_ref >= $exon->cdna_end()) {
# because some small (<3bp) matches can be completely eaten away by the
# introduction of frameshift introns it is possible to get an insert
# immediately before a newly created (i.e.) split intron
if($$cdna_ins_pos_ref < $exon->cdna_start &&
$$cdna_ins_pos_ref + 3 >= $exon->cdna_start ) {
### TBD not sure what should be done with this situation
$exon->add_StatMsg(StatMsg->new($code | StatMsg::CONFUSED));
$exon->fail(1);
return;
}
throw("Unexpected: insertion is outside of exon boundary\n" .
" ins_left = $$cdna_ins_pos_ref\n" .
" ins_right = " . ($$cdna_ins_pos_ref+1) . "\n" .
" cdna_exon_start = ". $exon->cdna_start()."\n" .
" cdna_exon_end = ". $exon->cdna_end()."\n");
}
#
# case 1: insert in CDS
#
if($$cdna_ins_pos_ref >= $transcript->cdna_coding_start() &&
$$cdna_ins_pos_ref < $transcript->cdna_coding_end()) {
info("insertion in cds ($insert_len)");
info("BEFORE CDS INSERT:");
print_exon($exon, $transcript);
$code |= StatMsg::CDS;
# adjust CDS end accordingly
$transcript->move_cdna_coding_end($insert_len);
my $frameshift = $insert_len % 3;
if($frameshift) {
$code |= StatMsg::FRAMESHIFT;
# need to create frameshift intron to get reading frame back on track
# exon needs to be split into two
info("introducing frameshift intron to maintain reading frame");
# first exon ends right before insert
my $first_len = $$cdna_ins_pos_ref - $exon->cdna_start() + 1;
# copy the original exon and adjust coords of each to perform 'split'
# share stat msgs between them - if one part of split transcript is
# rejected, they all should be
my $first_exon = InterimExon->new();
%{$first_exon} = %{$exon};
$exon->add_StatMsg(StatMsg->new(StatMsg::EXON | StatMsg::SPLIT));
# frame shift intron eats into start of inserted region
# second exon is going to start right after 'frameshift intron'
# which in cdna coords is immediately after last exon
$first_exon->cdna_end($first_exon->cdna_start + $first_len - 1);
$exon->cdna_start($first_exon->cdna_end + 1);
$exon->cdna_end($exon->cdna_end - $frameshift);
# decrease the length of the CDS by the length of new intron
$transcript->move_cdna_coding_end(-$frameshift);
$first_exon->set_split_phases($exon, $transcript);
# the insert length will be added to the cdna_position
# but part of the insert was used to create the intron and is not cdna
# anymore, so adjust the cdna_position to compensate
$$cdna_ins_pos_ref -= $frameshift;
### TBD may have to check we have not run up to end of CDS here
if($exon->strand() == 1) {
# end the first exon at the beginning of the insert
$first_exon->end($first_exon->start() + $first_len -1 );
# start the next exon after the frameshift intron
$exon->start($exon->start() + $first_len + $frameshift);
} else {
$first_exon->start($first_exon->end() - $first_len + 1);
# start the next exon after the frameshift intron
$exon->end($exon->end() - ($first_len + $frameshift));
}
$transcript->add_Exon($first_exon);
}
info("AFTER CDS INSERT:");
print_exon($exon, $transcript);
}
#
# case 2: insert in 5 prime UTR (or between 5prime UTR and CDS)
#
elsif($$cdna_ins_pos_ref < $transcript->cdna_coding_start()) {
info("insertion ($insert_len) in 5' utr");
$code |= StatMsg::FIVE_PRIME | StatMsg::UTR;
#shift the coding region down as result of insert
$transcript->move_cdna_coding_start($insert_len);
$transcript->move_cdna_coding_end($insert_len);
}
#
# case 3: insert in 3 prime UTR (or between 3prime UTR and CDS)
#
elsif($$cdna_ins_pos_ref >= $transcript->cdna_coding_end()) {
info("insert ($insert_len) in 3' utr");
$code |= StatMsg::THREE_PRIME | StatMsg::UTR;
#do not have to do anything
}
#
# default: sanity check
#
else {
throw("Unexpected insert case encountered");
}
$exon->add_StatMsg(StatMsg->new($code));
$exon->fix_phase($transcript);
return;
}
1;