Raw content of InterimExon use strict; use warnings; package InterimExon; use Bio::EnsEMBL::Utils::Exception qw(info warning throw); use StatMsg; # # errors which are fatal for exons # my @FATAL = (StatMsg::DELETE | StatMsg::CDS | StatMsg::LONG, StatMsg::INSERT | StatMsg::CDS | StatMsg::LONG, StatMsg::DELETE | StatMsg::CDS | StatMsg::MEDIUM | StatMsg::FRAMESHIFT, StatMsg::INSERT | StatMsg::CDS | StatMsg::MEDIUM | StatMsg::FRAMESHIFT, StatMsg::STRAND_FLIP, StatMsg::INVERT, StatMsg::SCAFFOLD_SPAN, StatMsg::CONFUSED); sub new { my $class = shift; return bless {'StatMsgs' => [], 'fail' => 0}, $class; } # # returns true if this exon has a 'fatal' error # sub is_fatal { my $self = shift; foreach my $msg (@{$self->get_all_StatMsgs}) { foreach my $code (@FATAL) { if(($msg->code() & $code) == $code) { #info("Code is Fatal: ". StatMsg::code2str($msg->code())); return 1; } } #info("Code is NON fatal=". StatMsg::code2str($msg->code())); } return 0; } sub add_StatMsg { my $self = shift; my $statMsg = shift; push @{$self->{'StatMsgs'}}, $statMsg; } sub get_all_StatMsgs { my $self = shift; return $self->{'StatMsgs'}; } sub last_StatMsg { my $self = shift; my @msgs = @{$self->{'StatMsgs'}}; return undef if(!@msgs); return $msgs[$#msgs]; } sub flush_StatMsgs { my $self = shift; $self->{'StatMsgs'} = []; } sub stable_id { my $self = shift; $self->{'stable_id'} = shift if(@_); return $self->{'stable_id'}; } sub start { my $self = shift; $self->{'start'} = shift if(@_); return $self->{'start'}; } sub end { my $self = shift; $self->{'end'} = shift if(@_); return $self->{'end'}; } sub length { my $self = shift; return $self->end() - $self->start() + 1; } sub strand { my $self = shift; $self->{'strand'} = shift if(@_); return $self->{'strand'}; } sub seq_region { my $self = shift; $self->{'seq_region'} = shift if(@_); return $self->{'seq_region'}; } sub cdna_start { my $self = shift; $self->{'cdna_start'} = shift if(@_); return $self->{'cdna_start'}; } sub cdna_end { my $self = shift; $self->{'cdna_end'} = shift if(@_); return $self->{'cdna_end'}; } sub slice { my $self = shift; $self->{'slice'} = shift if(@_); return $self->{'slice'}; } sub start_phase { my $self = shift; $self->{'start_phase'} = shift if(@_); return $self->{'start_phase'}; } sub end_phase { my $self = shift; $self->{'end_phase'} = shift if(@_); return $self->{'end_phase'}; } sub fail { my $self = shift; if(@_) { my $fail = shift; #warning("Setting ".$self->stable_id." to failed.\n") if($fail); $self->{'fail'} = $fail; } return $self->{'fail'}; } # # This fixes the start and end phases which can get messed up when the # UTR and CDS move a bit. We maintain correct phase throughout the program # but there are a couple of problems that may arise: # * UTR at one end of an exon may be completely deleted so the start/end phase # needs to change from -1 to 0. # * CDS may shrink due to a deletion at 5prime or 3prime end of CDS. This # that exons which had start or end phase to have a start phase of -1 now. sub fix_phase { my $exon = shift; my $transcript = shift; # do not deal with failed exons, we have no chimp coords for these anyway # since they were completely lost. return if($exon->fail()); if(!defined($transcript->cdna_coding_start)) { throw("cdna coding start not defined!"); } if(!defined($transcript->cdna_coding_end)) { throw("cdna coding end not defined."); } if(!defined($exon->cdna_end())) { throw("exons cdna coding end not defined."); } if(!defined($exon->cdna_start())) { throw("exons cdna coding start not defined."); } if($exon->start_phase() == -1) { if($transcript->cdna_coding_start() == $exon->cdna_start()) { $exon->start_phase(0); } } else { if($transcript->cdna_coding_start() > $exon->cdna_start()) { $exon->start_phase(-1); } } if($exon->end_phase() == -1) { if($transcript->cdna_coding_end() == $exon->cdna_end()) { # no utr left at end of this exon anymore my $cds_len = $transcript->cdna_coding_end - $transcript->cdna_coding_start + 1; $exon->end_phase($cds_len % 3); } } else { if($transcript->cdna_coding_end() < $exon->cdna_end) { # exon end is now utr $exon->end_phase(-1); } } } # # Fixes exons phases of a newly split exon. The end_phase of the first exon # and the start_phase of the second exon are set. # # sub set_split_phases { my $first_exon = shift; my $second_exon = shift; my $transcript = shift; # need to set first exon end phase and second exon start phase if($first_exon->cdna_end > $transcript->cdna_coding_end()) { $first_exon->end_phase(-1); $second_exon->start_phase(-1); return; } if($first_exon->cdna_end() < $transcript->cdna_coding_start()) { $first_exon->end_phase(-1); # beginning of CDS could be right at start of second exon if($second_exon->cdna_start() == $transcript->cdna_coding_start()) { $second_exon->start_phase(0); } else { $second_exon->start_phase(-1); } return; } my $phase; if($first_exon->cdna_start() < $transcript->cdna_coding_start()) { # first exon is partially 5prime UTR my $coding_len = $first_exon->cdna_end() - $transcript->cdna_coding_start(); $phase = $coding_len % 3; } else { # first exon should be all CDS my $sphase; # sometimes start phase may be -1 even though this is all CDS. # this is because we have not fixed the start phase yet and it should # be 0 due to deletion of the utr if($first_exon->start_phase() == -1) { $sphase = 0; } else { $sphase = $first_exon->start_phase(); } $phase = ($first_exon->length() + $sphase) % 3; } $first_exon->end_phase($phase); $second_exon->start_phase($phase); return; } 1;