None available.
sub check_iexons
{ my $itranscript = shift;
my $itranscript_array = shift;
my $prev_start = undef;
my $prev_end = undef;
my $transcript_seq_region = undef;
my $transcript_strand = undef;
info("checking exons for : " . $itranscript->stable_id());
my $first = 1;
foreach my $iexon (@{$itranscript->get_all_Exons}) {
if ($iexon->fail() || $iexon->is_fatal()) {
info(" failed/fatal exon, splitting transcript");
my $first_transcript = split_itrans($itranscript, $iexon);
if ($first_transcript) {
$itranscript_array ||= [];
push @$itranscript_array, $first_transcript;
}
return check_iexons($itranscript, $itranscript_array);
}
if($first && $iexon->cdna_start != 1) {
print_exon($iexon);
throw("Unexpected: first exon does not have cdna_start = 1");
}
$first = 0;
if ($iexon->end() < $iexon->start()) {
throw("Unexpected: exon start less than end:\n" .
$iexon->stable_id().": ".$iexon->start().'-'.$iexon->end());
}
if($iexon->length != $iexon->cdna_end - $iexon->cdna_start + 1) {
throw("Unexpected: exon cdna length != exon length:\n" .
$iexon->stable_id().": ".$iexon->start().'-'.$iexon->end() ."\n" .
" " . $iexon->cdna_start.'-'.$iexon->cdna_end());
}
if (!defined($transcript_seq_region)) {
$transcript_seq_region = $iexon->seq_region();
} elsif ($transcript_seq_region ne $iexon->seq_region()) {
info(" scaffold span, splitting transcript");
my $stat_msg = StatMsg->new(StatMsg::TRANSCRIPT|StatMsg::SCAFFOLD_SPAN);
$itranscript->add_StatMsg($stat_msg);
my $keep_exon = 1;
my $first_transcript = split_itrans($itranscript, $iexon, $keep_exon);
if ($first_transcript) {
$itranscript_array ||= [];
push @$itranscript_array, $first_transcript;
}
return check_iexons($itranscript, $itranscript_array);
}
if((defined($prev_end) && $iexon->strand() == 1 &&
$prev_end > $iexon->start()) ||
(defined($prev_start) && $iexon->strand() == -1 &&
$prev_start < $iexon->end())) {
info(" inversion, splitting transcript");
my $stat_msg = StatMsg->new(StatMsg::TRANSCRIPT | StatMsg::INVERT);
$itranscript->add_StatMsg($stat_msg);
my $keep_exon = 1;
my $first_transcript = split_itrans($itranscript, $iexon, $keep_exon);
if ($first_transcript) {
$itranscript_array ||= [];
push @$itranscript_array, $first_transcript;
}
return check_iexons($itranscript, $itranscript_array);
}
if (!defined($transcript_strand)) {
$transcript_strand = $iexon->strand();
} elsif ($transcript_strand != $iexon->strand()) {
info(" strand flip, splitting transcript");
my $stat_msg = StatMsg->new(StatMsg::TRANSCRIPT | StatMsg::STRAND_FLIP);
$itranscript->add_StatMsg($stat_msg);
my $keep_exon = 1;
my $first_transcript = split_itrans($itranscript, $iexon, $keep_exon);
if ($first_transcript) {
$itranscript_array ||= [];
push @$itranscript_array, $first_transcript;
}
return check_iexons($itranscript, $itranscript_array);
}
my $intron_len = 0;
if(defined($prev_start)) {
if($iexon->strand() == 1) {
$intron_len = $iexon->start - $prev_end + 1;
} else {
$intron_len = $prev_start - $iexon->end + 1;
}
}
if($intron_len > MAX_INTRON_LEN) {
info(" very long intron, splitting transcripts");
my $keep_exon = 1;
my $first_transcript = split_itrans($itranscript, $iexon, $keep_exon);
if($first_transcript) {
push @$itranscript_array, $first_transcript;
}
return check_iexons($itranscript, $itranscript_array);
}
$prev_end = $iexon->end();
$prev_start = $iexon->start();
}
$itranscript_array ||= [];
my $total_exons = scalar(@{$itranscript->get_all_Exons});
if ($total_exons > 0) {
push @$itranscript_array, $itranscript;
} else {
info(" no exons left in transcript");
}
return $itranscript_array;
}
} |
sub make_Transcript
{ my $itrans = shift;
my $slice_adaptor = shift;
my $transcript = Bio::EnsEMBL::Transcript->new();
$transcript->stable_id($itrans->stable_id);
$transcript->version($itrans->version);
info("making final transcript for ". $itrans->stable_id);
my $translation;
if($itrans->cdna_coding_end - $itrans->cdna_coding_start + 1 < 3) {
$translation = undef;
} else {
$translation = Bio::EnsEMBL::Translation->new();
$transcript->translation($translation);
}
foreach my $iexon (@{$itrans->get_all_Exons}) {
my $slice =
$slice_adaptor->fetch_by_region('chromosome', $iexon->seq_region,
undef, undef,undef, 'CHIMP1');
my $exon = Bio::EnsEMBL::Exon->new
(-START => $iexon->start(),
-END => $iexon->end(),
-STRAND => $iexon->strand(),
-PHASE => $iexon->start_phase(),
-END_PHASE => $iexon->end_phase(),
-STABLE_ID => $iexon->stable_id(),
-SLICE => $slice);
$transcript->add_Exon($exon);
if ($translation) {
if ($iexon->cdna_start() <= $itrans->cdna_coding_start() &&
$iexon->cdna_end() >= $itrans->cdna_coding_start()) {
my $translation_start =
$itrans->cdna_coding_start() - $iexon->cdna_start() + 1;
$translation->start_Exon($exon);
$translation->start($translation_start);
}
if ($iexon->cdna_start() <= $itrans->cdna_coding_end() &&
$iexon->cdna_end() >= $itrans->cdna_coding_end()) {
my $translation_end =
$itrans->cdna_coding_end() - $iexon->cdna_start() + 1;
$translation->end_Exon($exon);
$translation->end($translation_end);
}
}
}
if($translation && !$translation->start_Exon()) {
print STDERR "Could not find translation start exon in transcript.\n";
print STDERR "FIRST EXON:\n";
print_exon($itrans->get_all_Exons->[0]);
print STDERR "LAST EXON:\n";
print_exon($itrans->get_all_Exons->[-1], $itrans);
throw("Unexpected: Could not find translation start exon in transcript\n");
}
if($translation && !$translation->end_Exon()) {
print STDERR "Could not find translation end exon in transcript.\n";
print STDERR "FIRST EXON:\n";
print_exon($itrans->get_all_Exons->[0]);
print STDERR "LAST EXON:\n";
print_exon($itrans->get_all_Exons->[-1], $itrans);
throw("Unexpected: Could not find translation end exon in transcript\n");
}
return $transcript;
}
1; } |
sub split_itrans
{ my $itrans = shift;
my $bad_exon = shift; my $keep_exon = shift;
my @remaining_exons = @{$itrans->get_all_Exons()};
my @first_exons;
my $first_trans = InterimTranscript->new();
$first_trans->stable_id($itrans->stable_id());
$first_trans->version($itrans->version);
my $cur_exon = shift(@remaining_exons);
info("==FIRST TRANSCRIPT:\n");
while($cur_exon && $cur_exon != $bad_exon) {
print_exon($cur_exon);
push @first_exons, $cur_exon;
$first_trans->add_Exon($cur_exon);
$cur_exon = shift(@remaining_exons);
}
if(!$cur_exon) {
throw("unexpected: could not find bad exon in transcript");
}
info("==BAD EXON: ". (($keep_exon) ? 'keeping' : 'discarding'));
print_exon($bad_exon);
if($keep_exon) {
unshift @remaining_exons, $bad_exon;
}
$itrans->flush_Exons();
info("==SECOND TRANSCRIPT:\n");
foreach my $exon (@remaining_exons) {
print_exon($exon);
$itrans->add_Exon($exon);
}
if(@first_exons) {
my $first_ex = $first_exons[0];
my $last_ex = $first_exons[$#first_exons];
$first_trans->cdna_coding_start($itrans->cdna_coding_start());
$first_trans->cdna_coding_end($itrans->cdna_coding_end());
if($last_ex->cdna_end() < $first_trans->cdna_coding_start() ||
$first_ex->cdna_start() > $first_trans->cdna_coding_end()) {
$first_trans->cdna_coding_start(1);
$first_trans->cdna_coding_end(0);
}
elsif($last_ex->cdna_end() >= $first_trans->cdna_coding_start() &&
$last_ex->cdna_end() < $first_trans->cdna_coding_end()) {
$first_trans->cdna_coding_end($last_ex->cdna_end());
}
}
if(@remaining_exons) {
my $first_ex = $remaining_exons[0];
my $last_ex = $remaining_exons[$#remaining_exons];
my $cdna_shift = 0;
foreach my $ex (@first_exons) {
$cdna_shift += $ex->length();
info("cdna_shift = $cdna_shift\n");
}
if(!$keep_exon &&
defined($bad_exon->cdna_start()) &&
defined($bad_exon->cdna_end())) {
$cdna_shift += $bad_exon->length();
}
info("Shifting CDNA of second transcript by $cdna_shift\n");
if($cdna_shift) {
foreach my $ex (@remaining_exons) {
if(!$ex->fail()) {
$ex->cdna_start($ex->cdna_start() - $cdna_shift);
$ex->cdna_end($ex->cdna_end() - $cdna_shift);
}
}
$itrans->move_cdna_coding_start(-$cdna_shift);
$itrans->move_cdna_coding_end(-$cdna_shift);
if($itrans->cdna_coding_start() < 1) {
$itrans->cdna_coding_start(1);
}
if($itrans->cdna_coding_end() < 1) {
$itrans->cdna_coding_end(0);
}
}
}
return (@first_exons) ? $first_trans : undef;
}
} |