Raw content of Bio::EnsEMBL::Analysis::Runnable::Pseudogene2x # # You may distribute this module under the same terms as perl itself # =pod =head1 NAME Adaptation of Pseudogene calling code for 2x genomes =cut package Bio::EnsEMBL::Analysis::Runnable::Pseudogene2x; use strict; use vars qw(@ISA); use Bio::EnsEMBL::Analysis::Runnable::Pseudogene; use Bio::EnsEMBL::Analysis::Config::Pseudogene; @ISA = qw(Bio::EnsEMBL::Analysis::Runnable::Pseudogene); # Object preamble - inherits from Bio::EnsEMBL::Root; sub transcript_evidence{ my ($self,$transcript,$gene) =@_; my $repeat_blocks = $self->get_repeats($gene); my $results; my @exons = @{$transcript->get_all_Exons}; @exons = sort {$a->start <=> $b->start} @exons; my $prev_exon = undef; my $total_intron_len = 0; my $covered_intron_len = 0; my $total_exon_len = 0; my $covered_exon_len = 0; my $n_real_intron = 0; my $n_frameshift_intron = 0; my $covered_introns = 0 ; my $covered_exons = 0 ; my $num_introns = 0; # ignore small terminal exons; they are mostly # alignment artefacts rather than indicators of pseudogenes while(@exons and $exons[0]->length <= $self->PS_FRAMESHIFT_INTRON_LENGTH()) { shift @exons; } while(@exons and $exons[-1]->length <= $self->PS_FRAMESHIFT_INTRON_LENGTH()) { pop @exons; } #print STDERR "TRANSCRIPT ", $transcript->dbID, "\n"; foreach my $exon (@exons) { # need to check whether the exon like on a gap or not my $exon_in_gap = 0; if ($self->seqlevel) { my $overlaps = 0; foreach my $c (@{$self->seqlevel}) { if ($exon->start <= $c->end and $exon->end >= $c->start) { $overlaps = 1; last; } } $exon_in_gap = 1 if not $overlaps; } if (defined($prev_exon)) { my $intron = Bio::EnsEMBL::Feature->new( -START => $prev_exon->end+1, -END => $exon->start-1, -STRAND => $exon->strand ); # ignore the intron if it lies completely in a sequence gap; my $intron_in_gap = 0; if ($self->seqlevel) { my $overlaps = 0; foreach my $c (@{$self->seqlevel}) { if ($intron->start <= $c->end and $intron->end >= $c->start) { $overlaps = 1; last; } } $intron_in_gap = 1 if not $overlaps; } if (not $intron_in_gap) { if ($intron->length > $self->PS_FRAMESHIFT_INTRON_LENGTH()) { $n_real_intron++; # need to restrict ourselves to parts of intron that are # on real sequence my @seq_intron_bits = $self->_intersect_list($intron, @{$self->seqlevel}); foreach my $bit (@seq_intron_bits) { $total_intron_len+=$bit->length; $covered_intron_len+=$self->_len_covered($bit,$repeat_blocks); } } else { $n_frameshift_intron++; } $num_introns++; #print STDERR "Assessed REAL intron ", $intron->length, "\n"; } else { #print STDERR "Skipping GAP intron ", $intron->start, " ", $intron->end, "\n"; } } if (not $exon_in_gap) { my $seq_feature_exon = Bio::EnsEMBL::Feature->new( -START => $exon->start, -END => $exon->end, -STRAND => $exon->strand ); my @seq_exon_bits = $self->_intersect_list($seq_feature_exon, @{$self->seqlevel}); foreach my $bit (@seq_exon_bits) { $total_exon_len+=$bit->length; $covered_exon_len+=$self->_len_covered($bit,$repeat_blocks); } #print STDERR "Assessed REAL exon\n"; } else { #print STDERR "Skipping GAP exon\n"; } $prev_exon = $exon; } #calculate percentage coverage if ($total_intron_len > 0) { $covered_introns = (($covered_intron_len/$total_intron_len)*100); } if ($total_exon_len > 0) { $covered_exons = (($covered_exon_len/$total_exon_len)*100) } #print "COVERED INTRONS = $covered_introns\n"; #print "COVERED EXONS = $covered_exons\n"; $results = { 'num_introns' => $num_introns, 'total_exon_len' => $total_exon_len, 'covered_exons' => $covered_exons, 'total_intron_len' => $total_intron_len, 'covered_introns' => $covered_introns, 'real_introns' => $n_real_intron, 'frameshift_introns' => $n_frameshift_intron }; return $results; } sub _intersect_list { my ($self, $f1, @blocks) = @_; my @ret; foreach my $f (@blocks) { if ($f->end < $f1->start) { next; } elsif ($f->start > $f1->end) { next; } else { my $start = $f1->start; my $end = $f1->end; $start = $f->start if $f->start > $start; $end = $f->end if $f->end < $end; push @ret, Bio::EnsEMBL::Feature->new(-start => $start, -end => $end, -strand => $f1->strand); } } @ret = sort { $a->start <=> $b->start } @ret; return @ret; } sub seqlevel { my ($self, $val) = @_; if (defined $val) { $self->{_seqlevel} = $val; } return $self->{_seqlevel}; } return 1;