Bio::EnsEMBL::Analysis::RunnableDB IgSegBuilder
SummaryIncluded librariesPackage variablesGeneral documentationMethods
Toolbar
WebCvsRaw content
Summary
Bio::EnsEMBL::Analysis::RunnableDB::IgSegBuilder
Package variables
No package variables defined.
Included modules
Bio::EnsEMBL::Analysis::Config::GeneBuild::IgSegBuilder
Bio::EnsEMBL::Analysis::RunnableDB::BaseGeneBuild
Bio::EnsEMBL::Gene
Bio::EnsEMBL::Utils::Exception qw ( throw warning )
Bio::SeqIO
Inherit
Bio::EnsEMBL::Analysis::RunnableDB::BaseGeneBuild
Synopsis
No synopsis!
Description
No description!
Methods
C_LOGICS
No description
Code
C_OUTPUT_BIOTYPE
No description
Code
C_transcripts
No description
Code
D_J_PROXIMITY_THRESHOLD
No description
Code
D_LOGICS
No description
Code
D_OUTPUT_BIOTYPE
No description
Code
D_transcripts
No description
Code
J_LOGICS
No description
Code
J_OUTPUT_BIOTYPE
No description
Code
J_transcripts
No description
Code
LV_LOGICS
No description
Code
LV_OUTPUT_BIOTYPE
No description
Code
LV_transcripts
No description
Code
OUTPUTDB_DATABASES_NAME
No description
Code
SUPPORTING_FEATURE_OUTPUT_LOGIC
No description
Code
TRANDB_DATABASES_NAME
No description
Code
_by_percent_id
No description
Code
_by_total_exon_length
No description
Code
adjust_gene_3
No description
Code
adjust_gene_5
No description
Code
adjust_transcript
No description
Code
cluster_by_genomic_overlap
No description
Code
fetch_input
No description
Code
gene_close_to_others
No description
Code
new
No description
Code
prune_C_transcripts
No description
Code
prune_D_J_transcripts
No description
Code
prune_Exons
No description
Code
prune_LV_transcripts
No description
Code
run
No description
Code
set_gene_stop_codon
No description
Code
supporting_feature_analysis
No description
Code
transfer_exon_supporting_features
No description
Code
transfer_to_local_slices
No description
Code
trim_gene_translation3
No description
Code
trim_gene_translation5
No description
Code
write_output
No description
Code
Methods description
None available.
Methods code
C_LOGICSdescriptionprevnextTop
sub C_LOGICS {
  my ($self,$value) = @_;
  
  if (defined $value) {
    $self->{_c_logic} = $value;
  }
  return $self->{_c_logic};
}
C_OUTPUT_BIOTYPEdescriptionprevnextTop
sub C_OUTPUT_BIOTYPE {
  my ($self, $val) = @_;

  if (defined $val) {
    $self->{_output_c_biotype} = $val;
  }

  return $self->{_output_c_biotype};
}
C_transcriptsdescriptionprevnextTop
sub C_transcripts {
  my ($self,$value) = @_;
  
  if (defined $value) {
    $self->{_c_transcripts} = $value;
  }
  return $self->{_c_transcripts};
}
D_J_PROXIMITY_THRESHOLDdescriptionprevnextTop
sub D_J_PROXIMITY_THRESHOLD {
  my ($self, $val) = @_;

  if (defined $val) {
    $self->{_dj_prox} = $val;
  }

  return $self->{_dj_prox};

}


1;
}
D_LOGICSdescriptionprevnextTop
sub D_LOGICS {
  my ($self,$value) = @_;
  
  if (defined $value) {
    $self->{_d_logic} = $value;
  }
  return $self->{_d_logic};
}
D_OUTPUT_BIOTYPEdescriptionprevnextTop
sub D_OUTPUT_BIOTYPE {
  my ($self, $val) = @_;

  if (defined $val) {
    $self->{_output_d_biotype} = $val;
  }

  return $self->{_output_d_biotype};
}
D_transcriptsdescriptionprevnextTop
sub D_transcripts {
  my ($self,$value) = @_;
  
  if (defined $value) {
    $self->{_d_transcripts} = $value;
  }
  return $self->{_d_transcripts};
}
J_LOGICSdescriptionprevnextTop
sub J_LOGICS {
  my ($self,$value) = @_;
  
  if (defined $value) {
    $self->{_j_logic} = $value;
  }
  return $self->{_j_logic};
}
J_OUTPUT_BIOTYPEdescriptionprevnextTop
sub J_OUTPUT_BIOTYPE {
  my ($self, $val) = @_;

  if (defined $val) {
    $self->{_output_j_biotype} = $val;
  }

  return $self->{_output_j_biotype};
}
J_transcriptsdescriptionprevnextTop
sub J_transcripts {
  my ($self,$value) = @_;
  
  if (defined $value) {
    $self->{_j_transcripts} = $value;
  }
  return $self->{_j_transcripts};
}
LV_LOGICSdescriptionprevnextTop
sub LV_LOGICS {
  my ($self,$value) = @_;
  
  if (defined $value) {
    $self->{_lv_logic} = $value;
  }
  return $self->{_lv_logic};
}
LV_OUTPUT_BIOTYPEdescriptionprevnextTop
sub LV_OUTPUT_BIOTYPE {
  my ($self, $val) = @_;

  if (defined $val) {
    $self->{_output_lv_biotype} = $val;
  }

  return $self->{_output_lv_biotype};
}
LV_transcriptsdescriptionprevnextTop
sub LV_transcripts {
  my ($self,$value) = @_;
  
  if (defined $value) {
    $self->{_lv_transcripts} = $value;
  }
  return $self->{_lv_transcripts};
}
OUTPUTDB_DATABASES_NAMEdescriptionprevnextTop
sub OUTPUTDB_DATABASES_NAME {
  my ($self, $val) = @_;

  if (defined $val) {
    $self->{_outputdb_name} = $val;
  }

  return $self->{_outputdb_name};
}
SUPPORTING_FEATURE_OUTPUT_LOGICdescriptionprevnextTop
sub SUPPORTING_FEATURE_OUTPUT_LOGIC {
  my ($self, $val) = @_;

  if (defined $val) {
    $self->{_supporting_logic} = $val;
  }

  return $self->{_supporting_logic};
}
TRANDB_DATABASES_NAMEdescriptionprevnextTop
sub TRANDB_DATABASES_NAME {
  my ($self, $val) = @_;

  if (defined $val) {
    $self->{_transcriptdb_name} = $val;
  }

  return $self->{_transcriptdb_name};
}
_by_percent_iddescriptionprevnextTop
sub _by_percent_id {
  my $alen = 0;
  my $blen = 0;
  foreach my $e (@{$a->get_all_Exons}) {
    $alen += $e->length;
  }
  foreach my $e (@{$b->get_all_Exons}) {
    $blen += $e->length;
  }
  
  my ($aevi) = @{$a->get_all_supporting_features};
  my ($bevi) = @{$b->get_all_supporting_features};

  return ($bevi->percent_id <=> $aevi->percent_id or $blen <=> $alen);
}




############################################################
# containers
############################################################
}
_by_total_exon_lengthdescriptionprevnextTop
sub _by_total_exon_length {
  my $alen = 0;
  my $blen = 0;
  foreach my $e (@{$a->get_all_Exons}) {
    $alen += $e->length;
  }
  foreach my $e (@{$b->get_all_Exons}) {
    $blen += $e->length;
  }
  
  my ($aevi) = @{$a->get_all_supporting_features};
  my ($bevi) = @{$b->get_all_supporting_features};

  return ($blen <=> $alen or $bevi->percent_id <=> $aevi->percent_id);
}
adjust_gene_3descriptionprevnextTop
sub adjust_gene_3 {
  my ($self, $gene, $motif) = @_;

  my $seq = uc($gene->slice->seq);

  # we will consider adding/removing a maximum of
# 2 bps to the transcript end (to allow for
# a partial codon at the end sometimes being
# included in the translation, and sometimes not)
foreach my $t (@{$gene->get_all_Transcripts}) { my $reg_start = $t->end - 1; my $reg_end = $t->end + 2 + length($motif); my $subseq = substr($seq, $reg_start - 1, $reg_end - $reg_start + 1); # favour extension so take the last match
if ((my $subseqoff = index($subseq, $motif)) >= 0) { while((my $anotheroff = index($subseq, $motif, $subseqoff + 1)) >= 0) { $subseqoff = $anotheroff; } my $newend = $subseqoff + $reg_start - 1; $self->adjust_transcript($t, 0, $newend - $t->end); } } } #########################################################
}
adjust_gene_5descriptionprevnextTop
sub adjust_gene_5 {
  my ($self, $gene, $motif) = @_;

  my $seq = uc($gene->slice->seq);

  # we will consider adding/removing a maximum of
# 2 bps to the transcript start (to allow for
# a partial codon at the start sometimes being
# included in the translation, and sometimes not)
my (@adjusted_trans); foreach my $t (@{$gene->get_all_Transcripts}) { my $reg_end = $t->start + 1; my $reg_start = $t->start - 2 - length($motif); my $subseq = substr($seq, $reg_start - 1, $reg_end - $reg_start + 1); # favour extension so take the first match
if ((my $subseqoff = index($subseq, $motif)) >= 0) { my $newstart = $subseqoff + length($motif) + $reg_start; $self->adjust_transcript($t, $t->start - $newstart, 0); } } } #########################################################
}
adjust_transcriptdescriptionprevnextTop
sub adjust_transcript {
  my ($self, $tran, $offset5, $offset3) = @_;

  # offsets in the following are how much to EXTEND transcript
# by at each end (-> negative offsets are truncations)
my @exons = @{$tran->get_all_Exons}; if ($offset5) { $exons[0]->start($exons[0]->start - $offset5); $exons[0]->phase( ($exons[0]->phase - $offset5) % 3 ); if ($tran->translation) { $tran->translation->end($exons[-1]->length); } $tran->recalculate_coordinates; } if ($offset3) { $exons[-1]->end($exons[-1]->end + $offset3); $exons[-1]->end_phase( ($exons[-1]->end_phase + $offset3) % 3 ); if ($tran->translation) { $tran->translation->end($exons[-1]->length); } $tran->recalculate_coordinates; }
}
cluster_by_genomic_overlapdescriptionprevnextTop
sub cluster_by_genomic_overlap {
  my ($self, $tran) = @_;

  my %tran_groups;
  foreach my $t (@$tran) {
    my $sr = $t->slice->seq_region_name;

    push @{$tran_groups{$sr}}, $t;
  }

  my @genes;

  foreach my $grp (values %tran_groups) {
    my @c;
    foreach my $t (sort {$a->start <=> $b->start} @$grp) {
      if (not @c or $t->start > $c[-1]->{end} + 1) {
        push @c, {
          start => $t->start,
          end   => $t->end,
          trans => [$t],
        };
      } else {
        push @{$c[-1]->{trans}}, $t;
        if ($c[-1]->{end} < $t->end) {
          $c[-1]->{end} = $t->end;
        }
      }

    }
    foreach my $c (@c) {
      # all transcripts in cluster should be same strand; if
# not, use voting to decide correct strand and remove
# transcripts from other strand
my @t = @{$c->{trans}}; my @forward = grep { $_->strand > 0 } @t; my @reverse = grep { $_->strand < 0 } @t; my @all; if (scalar(@forward) > scalar(@reverse)) { @all = @forward; } elsif (scalar(@reverse) > scalar(@forward)) { @all = @reverse; } else { # equal number of forward and reverse transcripts in this locus;
# arbitrarily choose forward
@all = @forward; } my $gene = Bio::EnsEMBL::Gene->new(); map { $gene->add_Transcript($_) } @all; push @genes, $gene; } } return\@ genes; } ########################################################
# the following method transfers the genes to a set
# of smaller slices; some slices are reverse strand
# to ensure that all gene and sequence is forward
# strand (makes motif checking easier)
#
}
fetch_inputdescriptionprevnextTop
sub fetch_input {
  my($self) = @_;
  
  my $trandb = $self->get_dbadaptor($self->TRANDB_DATABASES_NAME);

  my $slice = $trandb->get_SliceAdaptor->fetch_by_name($self->input_id);
  my $tlslice = $trandb->get_SliceAdaptor->fetch_by_region('toplevel',
                                                           $slice->seq_region_name);
  $self->query($tlslice);

  my (@lv, @d, @j, @c);

  my @groups = 
      (
       { logics => $self->LV_LOGICS, result =>\@ lv, },
       { logics => $self->D_LOGICS,  result =>\@ d,  },
       { logics => $self->J_LOGICS,  result =>\@ j,  },
       { logics => $self->C_LOGICS,  result =>\@ c,  },
       );

  foreach my $grp (@groups) {
    my ($logics, $result) = ($grp->{logics}, $grp->{result});

    if ($logics) {
      foreach my $logic (@$logics) {
        foreach my $t (@{$slice->get_all_Transcripts(1, $logic)}) {
          map { $_->get_all_supporting_features } ($t, @{$t->get_all_Exons});
          $t = $t->transfer($tlslice);


          if ($t->coding_region_start > $t->start or $t->coding_region_end < $t->end) {
            my @e = @{$t->get_all_translateable_Exons};
            my $tr = Bio::EnsEMBL::Translation->new(-start_Exon => $e[0],
                                                    -end_Exon => $e[-1],
                                                    -start => 1,
                                                    -end => $e[-1]->length);
            my @sfs = @{$t->get_all_supporting_features};
            $t = Bio::EnsEMBL::Transcript
                ->new(-analysis => $t->analysis,
                      -exons    => $t->get_all_translateable_Exons);
            $t->add_supporting_features(@sfs);
            $t->translation($tr);
            
            if ($t->get_all_Exons->[0]->phase < 0) {
              $t->get_all_Exons->[0]->phase(0);
            }
            if ($t->get_all_Exons->[-1]->end_phase < 0) {
              $t->get_all_Exons->[-1]->end_phase(0);
            }
          }
          push @{$result}, $t;
        }
      }                       
    }
  }

  $self->LV_transcripts(\@lv);
  $self->D_transcripts(\@d);
  $self->J_transcripts(\@j);
  $self->C_transcripts(\@c);
}

#############################################################
}
gene_close_to_othersdescriptionprevnextTop
sub gene_close_to_others {
  my ($self, $gene, @glists) = @_;

  # return true if gene is within 2Mb of at least
# one other gene in each of the given lists
if (not $self->D_J_PROXIMITY_THRESHOLD) { return 1; } else { foreach my $list (@glists) { my $member_close = 0; foreach my $g (@$list) { if (abs($g->start - $gene->start) <= $self->D_J_PROXIMITY_THRESHOLD) { $member_close = 1; last; } } if (not $member_close) { return 0; } } } return 1;
}
newdescriptionprevnextTop
sub new {
  my ($class,@args) = @_;
  my $self = $class->SUPER::new(@args);    
  
  $self->read_and_check_config($IGSEG_CONFIG_BY_LOGIC);
           
  # other stuff here
return $self; } ############################################################
}
prune_C_transcriptsdescriptionprevnextTop
sub prune_C_transcripts {
  my ($self, $gene) = @_;

  my @tran = @{$gene->get_all_Transcripts};
  @tran = grep { $_->translate->seq !~ /\*/ } @tran;
  @tran = sort _by_total_exon_length @tran;

  my (@newtran, @redun_tran, @intron_lists, %all_exons);
  foreach my $tran (@tran) {
    my @exons = sort {$a->start <=> $b->start} @{$tran->get_all_Exons};
    my @introns = sort {$a->start <=> $b->start} @{$tran->get_all_Introns};
    my $is_redundant = 0;
    foreach my $il (@intron_lists) {
      my $all_found = 1;
      foreach my $ti (@introns) {
        my $found_in_other = 0;
        foreach my $oi (@$il) {
          if ($ti->start == $oi->start and
              $ti->end   == $oi->end) {
            $found_in_other = 1;
            last;
          }
        }
        if (not $found_in_other) {
          $all_found = 0;
        }        
      }
      if ($all_found) {
        $is_redundant = 1;
        last;
      }
    }
    if (not $is_redundant) {
      push @newtran, $tran;
      push @intron_lists,\@ introns;
      map { $all_exons{$_->start . ":" . $_->end} = $_} @exons;
    } else {
      # all introns have been seen in another transcript,
# but perhaps the terminal exons contribute something
# unique?
my $all_covered = 1; foreach my $tex ($exons[0], $exons[-1]) { my $covered = 0; foreach my $eid (keys %all_exons) { if ($all_exons{$eid}->start <= $tex->start and $all_exons{$eid}->end >= $tex->end) { # the exon is completely covered by another
$covered = 1; last; } } if (not $covered) { $all_covered = 0; last; } } if (not $all_covered) { push @newtran, $tran; push @intron_lists,\@ introns; map { $all_exons{$_->start . ":" . $_->end} = $_} @exons; } else { push @redun_tran, $tran; } } } $self->transfer_exon_supporting_features(\@redun_tran,\@ newtran); return Bio::EnsEMBL::Gene->new(-transcripts =>\@ newtran); } ########################################################
}
prune_D_J_transcriptsdescriptionprevnextTop
sub prune_D_J_transcripts {
  my ($self, $gene) = @_;

  my ($best, @others) = sort _by_total_exon_length @{$gene->get_all_Transcripts};

  if (scalar(@{$best->get_all_Exons}) > 1) {
    my ($exon, $min, $max);
    foreach my $e (@{$best->get_all_Exons}) {
      if (not defined $exon) {
        $exon = $e;
        $min = $e->start;
        $max = $e->end;
      } else {
        $min = $e->start if $e->start < $min;
        $max = $e->end   if $e->end > $max;
      }
    }
    $exon->start($min);
    $exon->end($max);

    $best->flush_Exons;
    $best->add_Exon($exon);
  }
  
  # remove translation
$best->translation(undef); $best->recalculate_coordinates; map { $_->phase(-1); $_->end_phase(-1) } @{$best->get_all_Exons}; $self->transfer_exon_supporting_features(\@others, [$best]); return Bio::EnsEMBL::Gene->new(-transcripts => [$best]); } #########################################################
}
prune_ExonsdescriptionprevnextTop
sub prune_Exons {
  my ($self, $gene) = @_;

  my %gene_exons;

  foreach my $tran (@{$gene->get_all_Transcripts}) {
    my @t_exons;
    foreach my $e (@{$tran->get_all_Exons}) {
      my $k = join(":",
                   $e->start,
                   $e->end,
                   $e->strand,
                   $e->phase,
                   $e->end_phase);
      if (not exists $gene_exons{$k}) {
        $gene_exons{$k} = $e;
      } else {
        my %sf;
        map { $sf{$_} = $_ } @{$gene_exons{$k}->get_all_supporting_features};
        foreach my $osf (@{$e->get_all_supporting_features}) {
          if (not exists $sf{$osf}) {
            $gene_exons{$k}->add_supporting_features($osf);
          }
        }
      }
      push @t_exons, $gene_exons{$k};

      if ($tran->translation) {
        if ($e == $tran->translation->start_Exon) {
          $tran->translation->start_Exon($gene_exons{$k});
        }
        if ($e == $tran->translation->end_Exon) {
          $tran->translation->end_Exon($gene_exons{$k});
        }
      }
    }
    
    $tran->flush_Exons;
    map { $tran->add_Exon($_) } @t_exons;
  }
}


#############################################################
}
prune_LV_transcriptsdescriptionprevnextTop
sub prune_LV_transcripts {
  my ($self, $gene) = @_;

  my @ftran;  
  my @tran = @{$gene->get_all_Transcripts};
  @tran = grep { $_->translate->seq !~ /\*/ } @tran;

  #
# remove transcripts with frameshift introns
#
TRAN: foreach my $t (@tran) { foreach my $i (@{$t->get_all_Introns}) { if ($i->length <= 10) { next TRAN; } } push @ftran, $t; } @tran = @ftran; @ftran = (); #
# if there are transcripts with a single intron, remove others
#
foreach my $t (@tran) { if (scalar(@{$t->get_all_Introns}) == 1) { push @ftran, $t; } } if (@ftran) { @tran = @ftran; @ftran = (); } #
# if there are transcripts with all-consensus introns, remove others
#
my $seq = uc($gene->slice->seq); foreach my $t (@tran) { my $all_consensus = 1; foreach my $i (@{$t->get_all_Introns}) { my $left = substr($seq, $i->start - 1, 2); my $right = substr($seq, $i->end - 2, 2); if ($left ne 'GT' or $right ne 'AG') { $all_consensus = 0; last; } } push @ftran, $t if $all_consensus; } if (@ftran) { @tran = @ftran; @ftran = (); } #
# if there are transcripts that begin with ATG, remove others
#
foreach my $t (@tran) { my $start = substr($seq, $t->start - 1, 3); push @ftran, $t if $start eq 'ATG'; } if (@ftran) { @tran = @ftran; @ftran = (); } #
# finally, remove redundant transcripts; only those with
# unique introns are kept
#
@tran = sort _by_total_exon_length @tran; my @redun_tran; foreach my $t (@tran) { my @intr = @{$t->get_all_Introns}; my $redundant = 0; foreach my $kt (@ftran) { my @kintr = @{$kt->get_all_Introns}; my $matches = 0; foreach my $i (@intr) { foreach my $ki (@kintr) { if ($i->start == $ki->start and $i->end == $ki->end) { $matches++; last; } } } if ($matches == scalar(@intr)) { $redundant = 1; last; } } if (not $redundant) { push @ftran, $t; } else { push @redun_tran, $t; } } #
# finally, check that the results are not trivial variants
# differing by only a small number of amino acids
#
@tran = @ftran; @ftran = (); @tran = sort _by_percent_id @tran; foreach my $t (@tran) { my $redundant = 0; foreach my $ft (@ftran) { my $all_covered = 1; foreach my $i (@{$t->get_all_Introns}) { # find correponding intron
my $found_matching = 0; foreach my $fi (@{$ft->get_all_Introns}) { if ($i->overlaps($fi) and abs($i->start - $fi->start <= 9) and abs($i->end - $fi->end <= 9)) { $found_matching = 1; last; } } if (not $found_matching) { $all_covered = 0; last; } } if ($all_covered) { $redundant = 1; last; } } if (not $redundant) { push @ftran, $t; } else { push @redun_tran, $t; } } $self->transfer_exon_supporting_features(\@redun_tran,\@ ftran); return Bio::EnsEMBL::Gene->new(-transcripts =>\@ ftran); } ########################################################
}
rundescriptionprevnextTop
sub run {
  my ($self) = @_;

  my @genes;

  # LV segments...
my @lv_genes = @{$self->cluster_by_genomic_overlap($self->LV_transcripts)}; @lv_genes = @{$self->transfer_to_local_slices(\@lv_genes)}; foreach my $g (@lv_genes) { $self->adjust_gene_3($g, "CAC"); $self->trim_gene_translation3($g); $g = $self->prune_LV_transcripts($g); if (@{$g->get_all_Transcripts}) { $g = $g->transfer($self->query); $g->biotype($self->LV_OUTPUT_BIOTYPE); map { $_->biotype($self->LV_OUTPUT_BIOTYPE) } @{$g->get_all_Transcripts}; push @genes, $g; } } @lv_genes = @genes; @genes = (); # C segments...
my @c_genes = @{$self->cluster_by_genomic_overlap($self->C_transcripts)}; @c_genes = @{$self->transfer_to_local_slices(\@c_genes)}; foreach my $g (@c_genes) { $self->adjust_gene_5($g, "AG"); $self->set_gene_stop_codon($g); $g = $self->prune_C_transcripts($g); if (@{$g->get_all_Transcripts}) { $g = $g->transfer($self->query); $g->biotype($self->C_OUTPUT_BIOTYPE); map { $_->biotype($self->C_OUTPUT_BIOTYPE) } @{$g->get_all_Transcripts}; push @genes, $g; } } @c_genes = @genes; @genes = (); # D segments...
my @d_genes = @{$self->cluster_by_genomic_overlap($self->D_transcripts)}; @d_genes = grep { $self->gene_close_to_others($_,\@ lv_genes,\@ c_genes) } @d_genes; @d_genes = @{$self->transfer_to_local_slices(\@d_genes)}; foreach my $g (@d_genes) { $self->adjust_gene_5($g, "GTG"); $self->adjust_gene_3($g, "CAC");; $g = $self->prune_D_J_transcripts($g); if (@{$g->get_all_Transcripts}) { $g = $g->transfer($self->query); $g->biotype($self->D_OUTPUT_BIOTYPE); map { $_->biotype($self->D_OUTPUT_BIOTYPE) } @{$g->get_all_Transcripts}; push @genes, $g; } } @d_genes = @genes; @genes = (); # J segments...
my @j_genes = @{$self->cluster_by_genomic_overlap($self->J_transcripts)}; @j_genes = grep { $self->gene_close_to_others($_,\@ lv_genes,\@ c_genes) } @j_genes; @j_genes = @{$self->transfer_to_local_slices(\@j_genes)}; foreach my $g (@j_genes) { $self->adjust_gene_5($g, "GTG"); $self->adjust_gene_3($g, "GT"); $g = $self->prune_D_J_transcripts($g); if (@{$g->get_all_Transcripts}) { $g = $g->transfer($self->query); $g->biotype($self->J_OUTPUT_BIOTYPE); map { $_->biotype($self->J_OUTPUT_BIOTYPE) } @{$g->get_all_Transcripts}; push @genes, $g; } } @j_genes = @genes; @genes = (); foreach my $g (@lv_genes, @c_genes, @j_genes, @d_genes) { $self->prune_Exons($g); push @genes, $g; } @genes = sort { $a->start <=> $b->start } @genes; $self->output(\@genes); } ############################################################
# helper methods
############################################################
}
set_gene_stop_codondescriptionprevnextTop
sub set_gene_stop_codon {
  my ($self, $gene) = @_;

  my $seq = uc($gene->slice->seq);
  
  my %stops = (TAA => 1,
               TGA => 1,
               TAG => 1);

  foreach my $t (@{$gene->get_all_Transcripts}) {
    my $reg_start = $t->end - 2;

    my $subseq = substr($seq, $t->end - 3, 3);

    if (not exists $stops{$subseq}) {
      my $next_seq = substr($seq, $t->end, 3);

      if (exists $stops{$next_seq}) {
        $self->adjust_transcript($t, 0, 3); 
      }
    }
  }
}

#########################################################
}
supporting_feature_analysisdescriptionprevnextTop
sub supporting_feature_analysis {
  my ($self,$value) = @_;
  
  if (defined $value) {
    $self->{_support_ana} = $value;
  }
  return $self->{_support_ana};
} 


############################################################
# config holders
############################################################
}
transfer_exon_supporting_featuresdescriptionprevnextTop
sub transfer_exon_supporting_features {
  my ($self, $src_trans, $tgt_trans) = @_;

  my @src_sf;

  foreach my $t (@{$src_trans}) {
    foreach my $e (@{$t->get_all_Exons}) {
      push @src_sf, @{$e->get_all_supporting_features};
    }
  }

  @src_sf = sort { $a->start <=> $b->start } @src_sf;

  foreach my $t (@{$tgt_trans}) {
    my @e = sort { $a->start <=> $b->start } @{$t->get_all_Exons};

    foreach my $e (sort { $a->start <=> $b->start } @{$t->get_all_Exons}) {
      foreach my $sf (@src_sf) {
        if ($sf->start <= $e->end and $sf->end >= $e->start) {
          $e->add_supporting_features($sf);
        } elsif ($sf->start > $e->end) {
          last;
        }
      }
    }
  }
}


#######
}
transfer_to_local_slicesdescriptionprevnextTop
sub transfer_to_local_slices {
  my ($self, $genes) = @_;
  
  my @groups;
  foreach my $g (sort { $a->start <=> $b->start } @$genes) {
    if (not @groups or 
        $groups[-1]->{strand} != $g->strand or
        $g->start > $groups[-1]->{end} + 100000) {
      push @groups, {
        start => $g->start,
        end   => $g->end, 
        strand => $g->strand,
        genes => [],
      };
    } elsif ($g->end > $groups[-1]->{end}) {
      $groups[-1]->{end} = $g->end;
    }
    push @{$groups[-1]->{genes}}, $g;
  }

  my @retgenes;
  foreach my $grp (@groups) {
    my $gstart = $grp->{start} - 10; 
    $gstart = 1 if $gstart < 1;
    my $gend   = $grp->{end} + 10; 
    $gstart = $self->query->end if $gstart > $self->query->end;
    
    my $local_slice = $self->get_dbadaptor($self->TRANDB_DATABASES_NAME)->
        get_SliceAdaptor->fetch_by_region($self->query->coord_system->name,
                                          $self->query->seq_region_name,
                                          $gstart,
                                          $gend,
                                          $grp->{strand});
    push @retgenes, map { $_->transfer($local_slice) } @{$grp->{genes}}; 
  }

  return\@ retgenes;
}


#############################################################
}
trim_gene_translation3descriptionprevnextTop
sub trim_gene_translation3 {
  my ($self, $gene) = @_;
  
  my $seq = uc($gene->slice->seq);
  
  my $codons_to_check = 2;
  
  foreach my $t (@{$gene->get_all_Transcripts}) {
    my $exon = $t->get_all_Exons->[-1];
    my $spare = $exon->end_phase;
    my $adjust = 0;
    
    for(my $i=0; $i < $codons_to_check; $i++) {
      my $start = $exon->end - $spare - 2 - (3 * $i);
      my $codon = substr($seq, $start - 1, 3);
      if ($codon eq 'TAA' or 
          $codon eq 'TGA' or 
          $codon eq 'TAG') {
        $adjust = (3 * $i) + 3 + $spare;
      }
    }
    
    if ($adjust) {
      $t->translation->end($exon->length - $adjust);
    }
  }
}

#########################################################
}
trim_gene_translation5descriptionprevnextTop
sub trim_gene_translation5 {
  my ($self, $gene) = @_;
  
  my $seq = uc($gene->slice->seq);
  
  my $codons_to_check = 2;
  
  foreach my $t (@{$gene->get_all_Transcripts}) {
    my $exon = $t->get_all_Exons->[0];
    my $spare = (3 - $exon->phase) % 3;
    my $adjust = 0;
    
    for(my $i=0; $i < $codons_to_check; $i++) {
      my $start = $exon->start + $spare + (3 * $i);
      my $codon = substr($seq, $start - 1, 3);
      if ($codon eq 'TAA' or 
          $codon eq 'TGA' or 
          $codon eq 'TAG') {
        $adjust = (3 * $i) + 3 + $spare;
      }
    }
    
    if ($adjust) {
      $t->translation->start($adjust);
    }
  }
}

#########################################################
}
write_outputdescriptionprevnextTop
sub write_output {
  my ($self) = @_;

  my $out_db = $self->get_dbadaptor($self->OUTPUTDB_DATABASES_NAME);
  my $ga = $out_db->get_GeneAdaptor;

  my $sf_ana = $self->analysis;
  if ($self->SUPPORTING_FEATURE_OUTPUT_LOGIC) {
    $sf_ana = $out_db->get_AnalysisAdaptor
        ->fetch_by_logic_name($self->SUPPORTING_FEATURE_OUTPUT_LOGIC);
    if (not defined $sf_ana) {
      $sf_ana = $self->analysis;
    }
  }

  foreach my $g (@{$self->output}) {
    $g->analysis($self->analysis);
    foreach my $t (@{$g->get_all_Transcripts}) {
      $t->analysis($self->analysis);
      foreach my $sf (@{$t->get_all_supporting_features}) {
        $sf->analysis($sf_ana);
      }
      foreach my $e (@{$t->get_all_Exons}) {
        foreach my $sf (@{$e->get_all_supporting_features}) {
          $sf->analysis($sf_ana);
        }
      }
    }
  }

  foreach my $g (@{$self->output}) {
    $ga->store($g);
  }
}

#############################################################
}
General documentation
No general documentation available.