Bio::EnsEMBL::Analysis::Runnable
HavanaAdder
Toolbar
Summary
Bio::EnsEMBL::Analysis::Runnable::HavanaAdder
Package variables
Privates (from "my" definitions)
%coding_exon_cache;
Included modules
Bio::EnsEMBL::Analysis::Config::GeneBuild::General qw ( GB_INPUTID_REGEX )
Bio::EnsEMBL::Analysis::Config::HavanaAdder qw ( GB_ENSEMBL_INPUT_GENETYPE GB_HAVANA_INPUT_GENETYPE GB_HAVANA_INPUT_TRANSCRIPTTYPES MERGED_TRANSCRIPT_OUTPUT_TYPE HAVANA_LOGIC_NAME MERGED_GENE_LOGIC_NAME MERGED_TRANSCRIPT_LOGIC_NAME )
Inherit
Synopsis
# This is the main analysis database
my $genebuilder = new Bio::EnsEMBL::Analysis::Runnable::HavanaAdder
(
'-slice' => $self->query,
'-input_id' => $self->input_id,
);
Description
This module reads your favourite annotations (ensembl, protein_coding,...)
on the one hand, and manually curated plus features on the other hand.
The product of Bio::EnsEMBL::Analysis::Runnable::HavanaAdder is a combination of
both annotations where redundant transcripts are eliminated.
The resulting transcripts are combined into genes. For more details, follow the list of methods called
by build_Genes() method and the description in each one.
Methods
Methods description
Description : It clusters transcripts according to genomic overlap Args : Array of Bio::EnsEMBL::Transcript Return : Array of Bio::EnsEMBL::Analysis::Tools::Algorithms::TranscriptCluster |
Example : my @genes = $self->build_Genes Description: builds genes. It is like the run method in Runnables. It calls everything that needs to be done. Returns : none Args : none Caller : Bio::EnsEMBL::Analysis::RunnableDB::HavanaAdder |
Description : It separates transcripts according to strand and then clusters each set of transcripts by calling _cluster_Transcripts_by_genomic_range() Args : Array of Bio::EnsEMBL::Transcript Return : Array of Bio::EnsEMBL::Analysis::Tools::Algorithms::TranscriptCluster |
Example : my @genes = $self->cluster_into_Genes(@transcripts); Description : it clusters transcripts into genes according to exon overlap. It will take care of difficult cases like transcripts within introns. It also unify exons that are shared among transcripts. Returns : a beautiful list of geen objects Args : a list of transcript objects |
Descripton: this holds/returns the final genes produced after clustering transcripts and sharing common exons |
Description: get/set for the type(s) of genes (usually TGE_gw, similarity_genewise and combined_e2g genes) to be used in the genebuilder they get set in new() Does not include the ab inition predictions |
Description: retrieves ensembl and havana gene annotations with supporting evidence. ReturnType : none, but $self->combined_Transcripts is filled Args : none |
Example : my $exons1 = $self->get_coding_exons_for_gene($tran); Description : It returns the coding exons of a transcript and stores them in a hash to safe computer time Returns : An ArrayRef than contain Exon objects. Args : a transcript object |
Function: get/set for input id Returns : string Args : string (it expects a string of the format chr_name.start_coord-end_coord |
Description: prunes out duplicated features Returntype : array of Bio::EnsEMBL::SeqFeature Args : array of Bio::EnsEMBL::SeqFeature |
Title : transfer_supporting_evidence Usage : $self->transfer_supporting_evidence($source_exon, $target_exon) Function: Transfers supporting evidence from source_exon to target_exon, after checking the coordinates are sane and that the evidence is not already in place. Returns : nothing, but $target_exon has additional supporting evidence |
Methods code
sub _cluster_Transcripts_by_genomic_range
{ my ($self,@mytranscripts) = @_;
my @transcripts = sort { $a->start <=> $b->start ? $a->start <=> $b->start : $b->end <=> $a->end } @mytranscripts;
my $cluster=Bio::EnsEMBL::Analysis::Tools::Algorithms::TranscriptCluster->new();
my $count = 0;
my @cluster_starts;
my @cluster_ends;
my @clusters;
$cluster->put_Transcripts( $transcripts[0] );
$cluster_starts[$count] = $transcripts[0]->start;
$cluster_ends[$count] = $transcripts[0]->end;
push( @clusters, $cluster );
LOOP1:
for (my $c=1; $c<=$#transcripts; $c++){
if ( !( $transcripts[$c]->end < $cluster_starts[$count] ||
$transcripts[$c]->start > $cluster_ends[$count] ) ){
$cluster->put_Transcripts( $transcripts[$c] );
if ($transcripts[$c]->start < $cluster_starts[$count]) {
$cluster_starts[$count] = $transcripts[$c]->start;
}
if ( $transcripts[$c]->end > $cluster_ends[$count]) {
$cluster_ends[$count] = $transcripts[$c]->end;
}
}
else{
$count++;
$cluster = Bio::EnsEMBL::Analysis::Tools::Algorithms::TranscriptCluster->new();
$cluster->put_Transcripts( $transcripts[$c] );
$cluster_starts[$count] = $transcripts[$c]->start;
$cluster_ends[$count] = $transcripts[$c]->end;
push(@clusters,$cluster);
}
}
return @clusters;
}
} |
sub _make_shared_exons_unique
{ my ( $self, @genes ) = @_;
my @pruned_genes;
foreach my $gene ( @genes ){
my $new_gene = $self->prune_Exons($gene);
push( @pruned_genes, $new_gene );
}
return @pruned_genes;
}
} |
sub _merge_redundant_transcripts
{ my ($self, $genes) = @_;
GENE:
foreach my $gene(@$genes){
my @transcripts = @{$gene->get_all_Transcripts};
my @havana;
my @ensembl;
TRANSCRIPT:foreach my $transcript(@transcripts){
foreach my $htranscript (@{$GB_HAVANA_INPUT_TRANSCRIPTTYPES}){
if($transcript->biotype eq $htranscript."_havana"){
push(@havana, $transcript);
next TRANSCRIPT;
}
}
push(@ensembl, $transcript);
}
if (!scalar(@havana)){
next GENE;
}
foreach my $ht(@havana){
$self->add_havana_attribute($ht,$ht);
$ht->flush_supporting_features;
my $delete_trans = 0;
my @t_pair;
foreach my $et(@ensembl){
my $delete_t = $self->are_matched_pair($ht, $et);
if ($delete_t){
if ($delete_t == $et){
$delete_trans = $delete_t;
@t_pair = ($ht, $et);
}elsif($delete_trans != $et && $delete_t == $ht){
$delete_trans = $delete_t;
@t_pair = ($ht, $et);
}elsif($delete_trans == 0){
$delete_trans = $delete_t;
@t_pair = ($ht, $et);
}
}
}
if($delete_trans && $delete_trans != 0){
$self->set_transcript_relation($delete_trans, @t_pair);
$t_pair[0]->biotype($MERGED_TRANSCRIPT_OUTPUT_TYPE);
$t_pair[1]->biotype($MERGED_TRANSCRIPT_OUTPUT_TYPE);
$self->_remove_transcript_from_gene($gene, $delete_trans) unless $delete_trans == 1;
}else{
$self->add_ottt_xref($ht);
}
}
}
}
} |
sub _remove_transcript_from_gene
{ my ($self, $gene, $trans_to_del) = @_;
my @newtrans;
foreach my $trans (@{$gene->get_all_Transcripts}) {
if ($trans != $trans_to_del) {
push @newtrans,$trans;
}
}
$gene->{_transcript_array} = [];
foreach my $trans (@newtrans) {
$gene->add_Transcript($trans);
}
return scalar(@newtrans);
}
} |
sub add_havana_attribute
{ my ($self, $transcript, $trans_to_add_attrib) = @_;
my %evidence;
my %t_evidence;
foreach my $tsf (@{$transcript->get_all_supporting_features}){
$t_evidence{$tsf->hseqname} = 1;
}
foreach my $te_key (keys %t_evidence){
if($te_key->isa("Bio::EnsEMBL::DnaPepAlignFeature")){
my $attribute = Bio::EnsEMBL::Attribute->new
(-CODE => 'tp_otter_support',
-NAME => 'tp otter support',
-DESCRIPTION => 'Evidence ID that was used as protein transcript supporting feature for building a gene in Vega',
-VALUE => $ev_key);
$trans_to_add_attrib->add_Attributes($attribute);
}
if($te_key->isa("Bio::EnsEMBL::DnaDnaAlignFeature")){
my $attribute = Bio::EnsEMBL::Attribute->new
(-CODE => 'td_otter_support',
-NAME => 'td otter support',
-DESCRIPTION => 'Evidence ID that was used as cdna transcript supporting feature for building a gene in Vega',
-VALUE => $ev_key);
$trans_to_add_attrib->add_Attributes($attribute);
}
}
foreach my $exon (@{$transcript->get_all_Exons}){
foreach my $sf (@{$exon->get_all_supporting_features}){
$evidence{$sf->hseqname} = 1;
}
}
foreach my $ev_key (keys %evidence){
if($ev_key->isa("Bio::EnsEMBL::DnaPepAlignFeature")){
my $attribute = Bio::EnsEMBL::Attribute->new
(-CODE => 'ep_otter_support',
-NAME => 'ep otter support',
-DESCRIPTION => 'Evidence ID that was used as protein exon supporting feature for building a gene in Vega',
-VALUE => $ev_key);
$trans_to_add_attrib->add_Attributes($attribute);
}
if($ev_key->isa("Bio::EnsEMBL::DnaPepAlignFeature")){
my $attribute = Bio::EnsEMBL::Attribute->new
(-CODE => 'ed_otter_support',
-NAME => 'ed otter support',
-DESCRIPTION => 'Evidence ID that was used as cdna exon supporting feature for building a gene in Vega',
-VALUE => $ev_key);
$trans_to_add_attrib->add_Attributes($attribute);
}
} } |
sub add_ottt_xref
{ my($self, $ht) = @_;
foreach my $entry(@{ $ht->get_all_DBEntries}){
if ($entry->dbname eq 'Vega_transcript'){
if($entry->primary_id eq $entry->display_id){
print "I am adding an OTTT xref to the transcript\n";
print "OTTT TO ADD: ",$entry->primary_id,"\n";
my $xref_ottt = new Bio::EnsEMBL::DBEntry
(
-primary_id =>$entry->primary_id,
-display_id =>$ht->display_id,
-priority => 1,
-xref_priority => 0,
-version => 1,
-release => 1,
-dbname => 'OTTT'
);
$xref_ottt->status("XREF");
$ht->add_DBEntry($xref_ottt);
}
}
} } |
sub are_matched_pair
{ my($self, $havana, $ensembl) = @_;
my @hexons = @{$havana->get_all_Exons};
my @eexons = @{$ensembl->get_all_Exons};
my @thexons = @{$havana->get_all_translateable_Exons};
my @teexons = @{$ensembl->get_all_translateable_Exons};
return 0 unless scalar(@hexons) == scalar(@eexons);
return 0 unless($havana->translation->genomic_start == $ensembl->translation->genomic_start &&
$havana->translation->genomic_end == $ensembl->translation->genomic_end);
if(scalar(@hexons == 1)){
if ($hexons[0]->start == $eexons[0]->start &&
$hexons[0]->end == $eexons[0]->end &&
$hexons[0]->strand == $eexons[0]->strand &&
$thexons[0]->coding_region_start($havana) == $teexons[0]->coding_region_start($ensembl) &&
$thexons[0]->coding_region_end($havana) == $teexons[0]->coding_region_end($ensembl)
){
return $ensembl;
}elsif($hexons[0]->start <= $eexons[0]->start &&
$hexons[0]->end >= $eexons[0]->end &&
$hexons[0]->strand == $eexons[0]->strand &&
$eexons[0]->start == $teexons[0]->coding_region_start($ensembl) &&
$eexons[0]->end == $teexons[0]->coding_region_end($ensembl)
){
return $ensembl;
}elsif((($hexons[0]->start != $eexons[0]->start ||
$hexons[0]->end != $eexons[0]->end) &&
$hexons[0]->strand == $eexons[0]->strand) &&
($eexons[0]->start != $teexons[0]->coding_region_start($ensembl) ||
$eexons[0]->end != $teexons[0]->coding_region_end($ensembl))
){
return $havana;
}else{
return 0;
}
}
else{
for(my $i=1; $i<=($#thexons-1); $i++){
return 0 unless ($thexons[$i]->start == $teexons[$i]->start &&
$thexons[$i]->end == $teexons[$i]->end &&
$thexons[$i]->strand == $teexons[$i]->strand
);
}
for(my $i=1; $i<=($#hexons-1); $i++){
return 1 unless ($hexons[$i]->start == $eexons[$i]->start &&
$hexons[$i]->end == $eexons[$i]->end &&
$hexons[$i]->strand == $eexons[$i]->strand
);
}
if ($hexons[0]->start == $eexons[0]->start &&
$hexons[0]->end == $eexons[0]->end &&
$hexons[0]->strand == $eexons[0]->strand &&
$hexons[-1]->start == $eexons[-1]->start &&
$hexons[-1]->end == $eexons[-1]->end &&
$hexons[-1]->strand == $eexons[-1]->strand
){
return $ensembl;
}elsif ( $hexons[0]->strand == 1 &&
$hexons[0]->end == $eexons[0]->end &&
$hexons[0]->strand == $eexons[0]->strand &&
$hexons[-1]->start == $eexons[-1]->start &&
$hexons[-1]->strand == $eexons[-1]->strand &&
$eexons[0]->start == $teexons[0]->coding_region_start($ensembl) &&
$eexons[-1]->end == $teexons[-1]->coding_region_end($ensembl) &&
($hexons[-1]->end != $eexons[-1]->end ||
$hexons[0]->start != $eexons[0]->start)
){
return $ensembl;
}elsif ( $hexons[0]->strand == 1 &&
$hexons[0]->end == $eexons[0]->end &&
$hexons[0]->strand == $eexons[0]->strand &&
$hexons[-1]->start == $eexons[-1]->start &&
$hexons[-1]->strand == $eexons[-1]->strand &&
($eexons[0]->start != $teexons[0]->coding_region_start($ensembl) ||
$eexons[-1]->end != $teexons[-1]->coding_region_end($ensembl)) &&
($hexons[-1]->end != $eexons[-1]->end ||
$hexons[0]->start != $eexons[0]->start)
){
return $havana;
}elsif ( $hexons[0]->strand == -1 &&
$hexons[0]->start == $eexons[0]->start &&
$hexons[0]->strand == $eexons[0]->strand &&
$hexons[-1]->end == $eexons[-1]->end &&
$hexons[-1]->strand == $eexons[-1]->strand &&
$eexons[-1]->start == $teexons[-1]->coding_region_start($ensembl) &&
$eexons[0]->end == $teexons[0]->coding_region_end($ensembl) &&
($hexons[0]->end != $eexons[0]->end ||
$hexons[-1]->start != $eexons[-1]->start)
){
return $ensembl;
}elsif ( $hexons[0]->strand == -1 &&
$hexons[0]->start == $eexons[0]->start &&
$hexons[0]->strand == $eexons[0]->strand &&
$hexons[-1]->end == $eexons[-1]->end &&
$hexons[-1]->strand == $eexons[-1]->strand &&
($eexons[-1]->start != $teexons[-1]->coding_region_start($ensembl) ||
$eexons[0]->end != $teexons[0]->coding_region_end($ensembl)) &&
($hexons[0]->end != $eexons[0]->end ||
$hexons[-1]->start != $eexons[-1]->start)
){
return $havana;
}else{
return 1;
}
}
print " WEIRD CASE WE DID NOT THOUGHT ABOUT, CHECK RULES!\n";
return 0; } |
sub build_Genes
{ my ($self) = @_;
print STDERR "Building genes...\n";
$self->get_Genes;
my @all_transcripts = $self->combined_Transcripts;
my @preliminary_genes = $self->cluster_into_Genes(@all_transcripts);
$self->_merge_redundant_transcripts(\@preliminary_genes);
my @genes = $self->_make_shared_exons_unique( @preliminary_genes );
print STDERR scalar(@genes)." genes built\n";
$self->final_genes( @genes ); } |
sub by_transcript_high
{ my $alow;
my $blow;
my $ahigh;
my $bhigh;
if ($a->start_Exon->strand == 1) {
$alow = $a->start_Exon->start;
$ahigh = $a->end_Exon->end;
}
else {
$alow = $a->end_Exon->start;
$ahigh = $a->start_Exon->end;
}
if ($b->start_Exon->strand == 1) {
$blow = $b->start_Exon->start;
$bhigh = $b->end_Exon->end;
}
else {
$blow = $b->end_Exon->start;
$bhigh = $b->start_Exon->end;
}
if ($ahigh != $bhigh) {
return $ahigh <=> $bhigh;
}
else {
return $alow <=> $blow;
}
}
} |
sub check_Clusters
{ my ($self, $num_transcripts, $clusters) = @_;
my $ntrans = 0;
my $cluster_num = 0;
my %trans_check_hash;
foreach my $cluster (@$clusters) {
$ntrans += scalar(@$cluster);
foreach my $trans (@$cluster) {
if (defined($trans_check_hash{$trans})) {
$self->throw("Transcript " . $trans->dbID . " added twice to clusters\n");
}
$trans_check_hash{$trans} = 1;
}
if (!scalar(@$cluster)) {
$self->throw("Empty cluster");
}
}
if ($ntrans != $num_transcripts) {
$self->throw("Not all transcripts have been added into clusters $ntrans and " . $num_transcripts. "\n ");
}
return;
}
} |
sub check_transcript_in_discarded_db
{ my ($self, $tran) = @_;
my @exons = @{$tran->get_all_Exons};
my $discardedslice = $self->discarded_db->get_SliceAdaptor->fetch_by_region('toplevel',$tran->slice->seq_region_name,$tran->seq_region_start,$tran->seq_region_end);
DGENE:
foreach my $dgene (@{$discardedslice->get_all_Genes}){
DTRANS:foreach my $dtran (@{$dgene->get_all_Transcripts}){
my @dexons = @{$dtran->get_all_Exons};
if(scalar(@exons) == scalar(@dexons)){
for (my $i=0; $i < scalar(@exons); $i++){
if ($exons[$i]->seq_region_start != $dexons[$i]->seq_region_start ||
$exons[$i]->strand != $dexons[$i]->strand ||
$exons[$i]->seq_region_end != $dexons[$i]->seq_region_end){
next DTRANS;
}
}
print "transcript found in discarded db\n";
return 0;
}else{
next DGENE;
}
}
}
return 1; } |
sub clear_coding_exons_cache
{ %coding_exon_cache = (); } |
sub cluster_Transcripts
{ my ($self,@transcripts) = @_;
my @forward_transcripts;
my @reverse_transcripts;
foreach my $transcript (@transcripts){
my @exons = @{ $transcript->get_all_Exons };
if ( $exons[0]->strand == 1 ){
push( @forward_transcripts, $transcript );
}
else{
push( @reverse_transcripts, $transcript );
}
}
my @forward_clusters;
my @reverse_clusters;
if ( @forward_transcripts ){
@forward_clusters = $self->_cluster_Transcripts_by_genomic_range( @forward_transcripts );
}
if ( @reverse_transcripts ){
@reverse_clusters = $self->_cluster_Transcripts_by_genomic_range( @reverse_transcripts );
}
my @clusters;
if ( @forward_clusters ){
push( @clusters, @forward_clusters);
}
if ( @reverse_clusters ){
push( @clusters, @reverse_clusters);
}
return @clusters;
}
} |
sub cluster_into_Genes
{ my ($self, @transcripts_unsorted) = @_;
my $num_trans = scalar(@transcripts_unsorted);
$self->clear_coding_exons_cache;
my @transcripts_unsorted_translation;
foreach my $tran(@transcripts_unsorted){
if ($tran->translation){
push (@transcripts_unsorted_translation, $tran);
}
}
my @transcripts = sort { $a->coding_region_start <=> $b->coding_region_start ? $a->coding_region_start <=> $b->coding_region_start : $b->coding_region_end <=> $a->coding_region_end } @transcripts_unsorted_translation;
my @clusters;
foreach my $tran (@transcripts) {
my @matching_clusters;
CLUSTER:
foreach my $cluster (@clusters) {
foreach my $cluster_transcript (@$cluster) {
if ($tran->coding_region_end >= $cluster_transcript->coding_region_start &&
$tran->coding_region_start <= $cluster_transcript->coding_region_end) {
my $exons1 = get_coding_exons_for_transcript($tran);
my $cluster_exons = get_coding_exons_for_transcript($cluster_transcript);
foreach my $exon1 (@{$exons1}) {
foreach my $cluster_exon (@{$cluster_exons}) {
if ($exon1->overlaps($cluster_exon) && $exon1->strand == $cluster_exon->strand) {
push (@matching_clusters, $cluster);
next CLUSTER;
}
}
}
}
}
}
if (scalar(@matching_clusters) == 0) {
my @newcluster;
push(@newcluster,$tran);
push(@clusters,\@newcluster);
}
elsif (scalar(@matching_clusters) == 1) {
push @{$matching_clusters[0]}, $tran;
}
else {
my @new_clusters;
my @merged_cluster;
foreach my $clust (@matching_clusters) {
push @merged_cluster, @$clust;
}
push @merged_cluster, $tran;
push @new_clusters,\@merged_cluster;
foreach my $clust (@clusters) {
my $found = 0;
MATCHING:
foreach my $m_clust (@matching_clusters) {
if ($clust == $m_clust) {
$found = 1;
last MATCHING;
}
}
if (!$found) {
push @new_clusters,$clust;
}
}
@clusters = @new_clusters;
}
}
$self->check_Clusters(scalar(@transcripts),\@ clusters);
my @genes;
foreach my $cluster(@clusters){
my $count = 0;
my $gene = new Bio::EnsEMBL::Gene;
foreach my $transcript (@$cluster){
$gene->add_Transcript($transcript);
}
push( @genes, $gene );
}
return @genes;
}
} |
sub combined_Transcripts
{ my ($self,@transcripts) = @_;
if (!defined($self->{_genewise_andthelike_transcripts})) {
$self->{_genewise_andthelike_transcripts} = [];
}
if (scalar @transcripts > 0) {
push(@{$self->{_genewise_andthelike_transcripts}},@transcripts);
}
return @{$self->{_genewise_andthelike_transcripts}};
}
} |
sub discarded_db
{ my ($self, $discarded_db) = @_;
if ( $discarded_db ){
$self->{_discarded_db} = $discarded_db;;
}
return $self->{_discarded_db};
}
} |
sub ensembl_db
{ my ($self,$ensembl_db) = @_;
if ( $ensembl_db ){
$self->{_ensembl_db} = $ensembl_db;
}
return $self->{_ensembl_db}; } |
sub features
{ my ($self,@features) = @_;
if (!defined($self->{_feature})) {
$self->{_feature} = [];
}
if ( scalar @features ) {
push(@{$self->{_feature}},@features);
}
return @{$self->{_feature}};
}
} |
sub fetch_sequence
{ my ($self, $name, $db) = @_;
my $sa = $db->get_SliceAdaptor;
my $slice = $sa->fetch_by_name($name);
return $slice;
}
1; } |
sub final_genes
{ my ($self, @genes) = @_;
if ( @genes ){
push( @{$self->{_final_genes}}, @genes );
}
return @{$self->{_final_genes}};
}
} |
sub flush_xref
{ my ($self, $transcript) = @_;
my @newxrefs;
foreach my $tran_xref (@{$transcript->get_all_DBEntries}){
if ($tran_xref->dbname ne "shares_CDS_and_UTR_with_OTTT" &&
$tran_xref->dbname ne "shares_CDS_with_OTTT" &&
$tran_xref->dbname ne "shares_CDS_with_ENST" &&
$tran_xref->dbname ne "OTTT"){
push (@newxrefs, $tran_xref);
}
}
$transcript->{dbentries} = [];
foreach my $newxref (@newxrefs) {
$transcript->add_DBEntry($newxref);
}
}
} |
sub gene_types
{ my ($self,$type) = @_;
if (defined($type)) {
push(@{$self->{_gene_types}},$type);
}
return @{$self->{_gene_types}};
}
} |
sub get_Genes
{ my ($self) = @_;
my @transcripts;
my @genes;
my $ensemblslice = $self->fetch_sequence($self->input_id, $self->ensembl_db);
my $havanaslice = $self->fetch_sequence($self->input_id, $self->havana_db);
print STDERR "Fetching ensembl genes\n";
EGENE:
foreach my $egene (@{$ensemblslice->get_all_Genes_by_type($GB_ENSEMBL_INPUT_GENETYPE)}){
if ($egene->analysis->logic_name() eq $HAVANA_LOGIC_NAME){
next EGENE;
}else{
push (@genes,$egene);
}
}
print STDERR "Retrieved ".scalar(@genes)." genes of type ".$GB_ENSEMBL_INPUT_GENETYPE."\n";
print STDERR "Fetching havana genes\n";
my @hgenes = @{$havanaslice->get_all_Genes_by_type($GB_HAVANA_INPUT_GENETYPE)};
print STDERR "Retrieved ".scalar(@hgenes)." genes of type ".$GB_HAVANA_INPUT_GENETYPE."\n";
foreach my $hgene(@hgenes){
my $biotype = $hgene->biotype."_havana";
$hgene->biotype($biotype);
foreach my $htran (@{$hgene->get_all_Transcripts}) {
my $tbiotype = $htran->biotype."_havana";
$htran->biotype($tbiotype);
}
}
push(@genes, @hgenes);
foreach my $gene(@genes){
TRANSCRIPT:
foreach my $tran (@{$gene->get_all_Transcripts}) {
if($gene->analysis->logic_name() eq $MERGED_GENE_LOGIC_NAME &&
$tran->analysis->logic_name() eq $HAVANA_LOGIC_NAME){
next TRANSCRIPT;
}elsif($tran->analysis->logic_name() eq $MERGED_TRANSCRIPT_LOGIC_NAME){
my $share_enst = 0;
my $share_cds_and_utr = 0;
my @dbentries = @{ $tran->get_all_DBEntries };
foreach my $dbentry (@dbentries){
if ($dbentry->dbname eq "shares_CDS_with_ENST"){
$share_enst = 1;
}
if ($dbentry->dbname eq "shares_CDS_and_UTR_with_OTTT"){
$share_cds_and_utr = 1;
}
}
if ($share_enst == 1 && $share_cds_and_utr == 0){
next TRANSCRIPT;
}
}
$self->flush_xref($tran);
if($self->check_transcript_in_discarded_db($tran) != 0){
push(@transcripts, $tran);
}
}
}
print STDERR "Finished fetching genes\n";
$self->combined_Transcripts(@transcripts); } |
sub get_coding_exons_for_transcript
{ my ($trans) = @_;
if (exists($coding_exon_cache{$trans})) {
return $coding_exon_cache{$trans};
} else {
my %coding_hash;
next if (!$trans->translation);
foreach my $exon (@{$trans->get_all_translateable_Exons}) {
$coding_hash{$exon} = $exon;
}
my @coding = sort { $a->start <=> $b->start } values %coding_hash;
$coding_exon_cache{$trans} =\@ coding;
return $coding_exon_cache{$trans};
}
}
}
} |
sub havana_db
{ my ($self,$havana_db) = @_;
if ( $havana_db ){
$self->{_havana_db} = $havana_db;
}
return $self->{_havana_db}; } |
sub input_id
{ my ($self,$id) = @_;
if (defined($id)) {
$self->{_input_id} = $id;
}
return $self->{_input_id};
}
} |
sub new
{ my ($class,@args) = @_;
my $self = $class->SUPER::new(@args);
my ($slice,$input_id) = $self->_rearrange([qw(SLICE INPUT_ID)],
@args);
$self->throw("Must input a slice to HavanaAdder") unless defined($slice);
$self->{_final_genes} = [];
$self->{_gene_types} = [];
$self->query($slice);
$self->gene_types($GB_ENSEMBL_INPUT_GENETYPE);
$self->gene_types($GB_HAVANA_INPUT_GENETYPE);
$self->input_id($input_id);
return $self;
}
} |
sub prune_Exons
{ my ($self,$gene) = @_;
my @unique_Exons;
foreach my $tran (@{$gene->get_all_Transcripts}) {
my @newexons;
foreach my $exon (@{$tran->get_all_Exons}) {
my $found;
UNI:foreach my $uni (@unique_Exons) {
if ($uni->start == $exon->start &&
$uni->end == $exon->end &&
$uni->strand == $exon->strand &&
$uni->phase == $exon->phase &&
$uni->end_phase == $exon->end_phase
) {
$found = $uni;
last UNI;
}
}
if (defined($found)) {
push(@newexons,$found);
if ($exon == $tran->translation->start_Exon){
$tran->translation->start_Exon($found);
}
if ($exon == $tran->translation->end_Exon){
$tran->translation->end_Exon($found);
}
} else {
push(@newexons,$exon);
push(@unique_Exons, $exon);
}
}
$tran->flush_Exons;
foreach my $exon (@newexons) {
$tran->add_Exon($exon);
}
}
return $gene;
}
} |
sub prune_features
{ my ($self,$feature_hash) = @_;
my @pruned;
ID:
foreach my $id (keys %{ $feature_hash }) {
my @features = @{$feature_hash->{$id}};
@features = sort {$a->start <=> $b->start} @features;
unless ( @features ){
print STDERR "No features here for id: $id\n";
next ID;
}
while ( @features && !defined $features[0] ){
shift @features;
}
my $prev = -1;
FEATURE:
foreach my $f (@features) {
if ($prev != -1 && $f->hseqname eq $prev->hseqname &&
$f->start == $prev->start &&
$f->end == $prev->end &&
$f->hstart == $prev->hstart &&
$f->hend == $prev->hend &&
$f->strand == $prev->strand &&
$f->hstrand == $prev->hstrand)
{
if ( $f->score > $prev->score ){
$prev->score( $f->score );
}
next FEATURE;
}
else {
push(@pruned,$f);
$prev = $f;
}
}
}
return @pruned;
}
} |
sub query
{ my ($self,$slice) = @_;
if (defined($slice)) {
$self->{_query} = $slice;
}
return $self->{_query};
}
} |
sub set_transcript_relation
{ my($self, $delete_t, @t_pair) = @_;
if ($delete_t == 1){
foreach my $entry(@{ $t_pair[0]->get_all_DBEntries}){
if ($entry->dbname eq 'Vega_transcript'){
if($entry->primary_id eq $entry->display_id){
my $newentry = new Bio::EnsEMBL::DBEntry
(
-primary_id => $entry->primary_id,
-display_id => $entry->display_id,
-priority => 1,
-xref_priority => 0,
-version => 1,
-release => 1,
-dbname => 'shares_CDS_with_OTTT'
);
$newentry->status("XREF");
$t_pair[1]->add_DBEntry($newentry);
my $xref_ottt = new Bio::EnsEMBL::DBEntry
(
-primary_id =>$entry->primary_id,
-display_id =>$entry->display_id,
-priority => 1,
-xref_priority => 0,
-version => 1,
-release => 1,
-dbname => 'OTTT'
);
print "OTTT xref to be added here\n";
$xref_ottt->status("XREF");
$t_pair[0]->add_DBEntry($xref_ottt);
}
}
}
my $link_attrib = Bio::EnsEMBL::Attribute->new
(-CODE => 'enst_link',
-NAME => 'enst link',
-DESCRIPTION => 'Code to link a OTTT with an ENST when they both share the CDS of ENST',
-VALUE => $t_pair[1]->dbID);
$t_pair[1]->add_Attributes($link_attrib);
my $xref_entry = new Bio::EnsEMBL::DBEntry
(
-primary_id =>$t_pair[1]->dbID,
-display_id =>$t_pair[1]->dbID,
-priority => 1,
-xref_priority => 0,
-version => 1,
-release => 1,
-dbname => 'shares_CDS_with_ENST'
);
$xref_entry->status("XREF");
$t_pair[0]->add_DBEntry($xref_entry);
print "OTTT TO ADD: ",$t_pair[0]->stable_id,"\n";
}
elsif ($delete_t == $t_pair[0]){
foreach my $entry(@{ $t_pair[0]->get_all_DBEntries}){
if ($entry->dbname eq 'Vega_transcript'){
if($entry->primary_id eq $entry->display_id){
my $newentry = new Bio::EnsEMBL::DBEntry
(
-primary_id => $entry->primary_id,
-display_id => $entry->display_id,
-priority => 1,
-xref_priority => 0,
-version => 1,
-release => 1,
-dbname => 'shares_CDS_with_OTTT'
);
$newentry->status("XREF");
$t_pair[1]->add_DBEntry($newentry);
}
}
}
my $attrib_value = $t_pair[0]->slice->coord_system_name.":".$t_pair[0]->slice->coord_system->version.":".$t_pair[0]->slice->seq_region_name.":".
$t_pair[0]->start.":".$t_pair[0]->end.":1";
my $attribute = Bio::EnsEMBL::Attribute->new
(-CODE => 'TranscriptEdge',
-NAME => 'Transcript Edge',
-DESCRIPTION => '',
-VALUE => $attrib_value);
$t_pair[1]->add_Attributes($attribute);
my @delete_e = @{$delete_t->get_all_Exons};
my @exons = @{$t_pair[1]->get_all_Exons};
if (scalar(@delete_e) == scalar(@exons)){
my $e;
for ($e = 0, $e<scalar(@delete_e), $e++){
if($delete_e[$e] && $exons[$e]){
$self->transfer_supporting_evidence($delete_e[$e], $exons[$e]);
}
}
}
$self->add_havana_attribute($t_pair[0],$t_pair[1]);
}elsif ($delete_t == $t_pair[1]){
foreach my $entry(@{ $t_pair[0]->get_all_DBEntries}){
if ($entry->dbname eq 'Vega_transcript'){
if($entry->primary_id eq $entry->display_id){
my $enstentry = new Bio::EnsEMBL::DBEntry
(
-primary_id => $entry->primary_id,
-display_id => $entry->display_id,
-version => 1,
-release => 1,
-priority => 1,
-xref_priority => 0,
-dbname => 'shares_CDS_and_UTR_with_OTTT'
);
$enstentry->status("XREF");
$t_pair[0]->add_DBEntry($enstentry);
}
}
}
$self->transfer_supporting_features($delete_t,$t_pair[0]);
} } |
sub transcript_high
{ my ($self,$tran) = @_;
my $high;
if ( $tran->start_Exon->strand == 1){
$high = $tran->end_Exon->end;
}
else{
$high = $tran->start_Exon->end;
}
return $high;
}
} |
sub transcript_low
{ my ($self,$tran) = @_;
my $low;
if ( $tran->start_Exon->strand == 1){
$low = $tran->start_Exon->start;
}
else{
$low = $tran->end_Exon->start;
}
return $low;
}
} |
sub transfer_supporting_evidence
{ my ($self, $source_exon, $target_exon) = @_;
my @target_sf = @{$target_exon->get_all_supporting_features};
my %unique_evidence;
my %hold_evidence;
SOURCE_FEAT:
foreach my $feat ( @{$source_exon->get_all_supporting_features}){
next SOURCE_FEAT unless $feat->isa("Bio::EnsEMBL::FeaturePair");
next SOURCE_FEAT if ( $unique_evidence{ $feat } );
if ( $hold_evidence{ $feat->hseqname }{ $feat->start }{ $feat->end }{ $feat->hstart }{ $feat->hend } ){
next SOURCE_FEAT;
}
TARGET_FEAT:
foreach my $tsf (@target_sf){
next TARGET_FEAT unless $tsf->isa("Bio::EnsEMBL::FeaturePair");
if($feat->start == $tsf->start &&
$feat->end == $tsf->end &&
$feat->strand == $tsf->strand &&
$feat->hseqname eq $tsf->hseqname &&
$feat->hstart == $tsf->hstart &&
$feat->hend == $tsf->hend){
next SOURCE_FEAT;
}
}
$target_exon->add_supporting_features($feat);
$unique_evidence{ $feat } = 1;
$hold_evidence{ $feat->hseqname }{ $feat->start }{ $feat->end }{ $feat->hstart }{ $feat->hend } = 1;
}
}
} |
transfer_supporting_features | description | prev | next | Top |
sub transfer_supporting_features
{ my ($self, $delete_t, $transcript) = @_;
my @exon_features;
my @delete_tsf = @{ $delete_t->get_all_supporting_features };
DTSF: foreach my $dtsf (@delete_tsf){
next DTSF unless $dtsf->isa("Bio::EnsEMBL::FeaturePair");
$transcript->add_supporting_features($dtsf);
}
my @delete_e = @{$delete_t->get_all_Exons};
my @exons = @{$transcript->get_all_Exons};
if (scalar(@delete_e) == scalar(@exons)){
my $e;
for ($e = 0, $e<scalar(@delete_e), $e++){
if($delete_e[$e] && $exons[$e]){
$self->transfer_supporting_evidence($delete_e[$e], $exons[$e]);
}
}
}
} |
General documentation
The rest of the documentation details each of the object
methods. Internal methods are usually preceded with a _