ensembl Insertion
Package variablesGeneral documentationMethods
Toolbar
WebCvsRaw content
Package variables
No package variables defined.
Included modules
Bio::EnsEMBL::Utils::Exception qw ( throw info )
InterimExon
Length
StatMsg
Utils qw ( print_exon )
Synopsis
No synopsis!
Description
No description!
Methods
process_insert
No description
Code
Methods description
None available.
Methods code
process_insertdescriptionprevnextTop
sub process_insert {
  my $cdna_ins_pos_ref = shift;   #basepair to left of insert
my $insert_len = shift; my $exon = shift; my $transcript = shift; info("insert ($insert_len) at " .$$cdna_ins_pos_ref); my $code = StatMsg::EXON | StatMsg::INSERT | Length::length2code($insert_len); # sanity check, insert should be completely in exon boundaries
if($$cdna_ins_pos_ref < $exon->cdna_start() || $$cdna_ins_pos_ref >= $exon->cdna_end()) { # because some small (<3bp) matches can be completely eaten away by the
# introduction of frameshift introns it is possible to get an insert
# immediately before a newly created (i.e.) split intron
if($$cdna_ins_pos_ref < $exon->cdna_start && $$cdna_ins_pos_ref + 3 >= $exon->cdna_start ) { ### TBD not sure what should be done with this situation
$exon->add_StatMsg(StatMsg->new($code | StatMsg::CONFUSED)); $exon->fail(1); return; } throw("Unexpected: insertion is outside of exon boundary\n" . " ins_left = $$cdna_ins_pos_ref\n" . " ins_right = " . ($$cdna_ins_pos_ref+1) . "\n" . " cdna_exon_start = ". $exon->cdna_start()."\n" . " cdna_exon_end = ". $exon->cdna_end()."\n"); } #
# case 1: insert in CDS
#
if($$cdna_ins_pos_ref >= $transcript->cdna_coding_start() && $$cdna_ins_pos_ref < $transcript->cdna_coding_end()) { info("insertion in cds ($insert_len)"); info("BEFORE CDS INSERT:"); print_exon($exon, $transcript); $code |= StatMsg::CDS; # adjust CDS end accordingly
$transcript->move_cdna_coding_end($insert_len); my $frameshift = $insert_len % 3; if($frameshift) { $code |= StatMsg::FRAMESHIFT; # need to create frameshift intron to get reading frame back on track
# exon needs to be split into two
info("introducing frameshift intron to maintain reading frame"); # first exon ends right before insert
my $first_len = $$cdna_ins_pos_ref - $exon->cdna_start() + 1; # copy the original exon and adjust coords of each to perform 'split'
# share stat msgs between them - if one part of split transcript is
# rejected, they all should be
my $first_exon = InterimExon->new(); %{$first_exon} = %{$exon}; $exon->add_StatMsg(StatMsg->new(StatMsg::EXON | StatMsg::SPLIT)); # frame shift intron eats into start of inserted region
# second exon is going to start right after 'frameshift intron'
# which in cdna coords is immediately after last exon
$first_exon->cdna_end($first_exon->cdna_start + $first_len - 1); $exon->cdna_start($first_exon->cdna_end + 1); $exon->cdna_end($exon->cdna_end - $frameshift); # decrease the length of the CDS by the length of new intron
$transcript->move_cdna_coding_end(-$frameshift); $first_exon->set_split_phases($exon, $transcript); # the insert length will be added to the cdna_position
# but part of the insert was used to create the intron and is not cdna
# anymore, so adjust the cdna_position to compensate
$$cdna_ins_pos_ref -= $frameshift; ### TBD may have to check we have not run up to end of CDS here
if($exon->strand() == 1) { # end the first exon at the beginning of the insert
$first_exon->end($first_exon->start() + $first_len -1 ); # start the next exon after the frameshift intron
$exon->start($exon->start() + $first_len + $frameshift); } else { $first_exon->start($first_exon->end() - $first_len + 1); # start the next exon after the frameshift intron
$exon->end($exon->end() - ($first_len + $frameshift)); } $transcript->add_Exon($first_exon); } info("AFTER CDS INSERT:"); print_exon($exon, $transcript); } #
# case 2: insert in 5 prime UTR (or between 5prime UTR and CDS)
#
elsif($$cdna_ins_pos_ref < $transcript->cdna_coding_start()) { info("insertion ($insert_len) in 5' utr"); $code |= StatMsg::FIVE_PRIME | StatMsg::UTR; #shift the coding region down as result of insert
$transcript->move_cdna_coding_start($insert_len); $transcript->move_cdna_coding_end($insert_len); } #
# case 3: insert in 3 prime UTR (or between 3prime UTR and CDS)
#
elsif($$cdna_ins_pos_ref >= $transcript->cdna_coding_end()) { info("insert ($insert_len) in 3' utr"); $code |= StatMsg::THREE_PRIME | StatMsg::UTR; #do not have to do anything
} #
# default: sanity check
#
else { throw("Unexpected insert case encountered"); } $exon->add_StatMsg(StatMsg->new($code)); $exon->fix_phase($transcript); return; } 1;
}
General documentation
No general documentation available.