Raw content of Bio::EnsEMBL::Analysis::Tools::GeneBuildUtils::GeneUtils =head1 NAME Bio::EnsEMBL::Analysis::Tools::GeneBuildUtils::GeneUtils - utilities for gene objects =head1 SYNOPSIS use Bio::EnsEMBL::Analysis::Tools::GeneBuildUtils::GeneUtils qw(clone_Gene); or use Bio::EnsEMBL::Analysis::Tools::GeneBuildUtils::GeneUtils to get all methods =head1 DESCRIPTION All methods in this class should take a Bio::EnsEMBL::Gene object as their first argument. The methods provided should carry out some standard functionality for said objects such as printing info, and cloning =head1 CONTACT please send any questions to ensembl-dev@ebi.ac.uk =head1 METHODS the rest of the documention details the exported static class methods =cut package Bio::EnsEMBL::Analysis::Tools::GeneBuildUtils::GeneUtils; use strict; use warnings; use Exporter; use vars qw (@ISA @EXPORT); @ISA = qw(Exporter); @EXPORT = qw( print_Gene print_Gene_Transcript_and_Exons clone_Gene Gene_info prune_Exons get_one2one_orth_for_gene_in_other_species get_one2one_homology_for_gene_in_other_species get_transcript_with_longest_CDS attach_Slice_to_Gene attach_Analysis_to_Gene attach_Analysis_to_Gene_no_support fully_load_Gene empty_Gene ); use Bio::EnsEMBL::Utils::Exception qw(verbose throw warning stack_trace_dump); use Bio::EnsEMBL::Analysis::Tools::GeneBuildUtils::TranscriptUtils qw(print_Transcript clone_Transcript get_evidence_ids attach_Slice_to_Transcript fully_load_Transcript empty_Transcript attach_Analysis_to_Transcript attach_Analysis_to_Transcript_no_support print_Transcript_and_Exons); use Bio::EnsEMBL::Analysis::Tools::GeneBuildUtils qw (id coord_string empty_Object); use Bio::EnsEMBL::Analysis::Tools::GeneBuildUtils::ExonUtils; use Bio::EnsEMBL::Gene; =head2 print_Gene Arg [1] : Bio::EnsEMBL::Gene Function : prints out information about the gene object passed in, it will also iterate down the gene structure printing information about transcripts, exons etc Returntype: n/a Exceptions: n/a Example : print_Gene($gene); =cut sub print_Gene{ my $gene = shift; print Gene_info($gene)."\n"; foreach my $transcript(@{$gene->get_all_Transcripts}){ my $indent = "\t"; print_Transcript($transcript, $indent); } } sub print_Gene_Transcript_and_Exons{ my $gene = shift; print Gene_info($gene)."\n"; foreach my $transcript(@{$gene->get_all_Transcripts}){ my $indent = "\t"; print_Transcript_and_Exons($transcript, $indent); } } =head2 clone_Gene Arg [1] : Bio::EnsEMBL::Gene Function : to produce a copy of the given gene object which can be altered without changing the original object or its children to the original object Returntype: Bio::EnsEMBL::Gene Exceptions: none Example : my $newgene = clone_Gene($gene); =cut sub clone_Gene{ my ($gene, $clone_xrefs) = @_; $clone_xrefs = 1 if(!defined($clone_xrefs)); my $newgene = Bio::EnsEMBL::Gene->new(); $newgene->dbID($gene->dbID); $newgene->biotype($gene->biotype); $newgene->analysis($gene->analysis); $newgene->stable_id($gene->stable_id); $newgene->version($gene->version); if ($clone_xrefs){ foreach my $DBEntry (@{$gene->get_all_DBEntries}){ $newgene->add_DBEntry($DBEntry); } $newgene->display_xref($gene->display_xref) ; } foreach my $transcript(@{$gene->get_all_Transcripts}){ my $newtranscript = clone_Transcript($transcript, $clone_xrefs); $newgene->add_Transcript($newtranscript); } $newgene->slice($gene->slice); return $newgene; } =head2 Gene_info Arg [1] : Bio::EnsEMBL::Gene Function : returns a string of information about the gene Returntype: n/a Exceptions: none Example : print Gene_info($gene)."\n"; =cut sub Gene_info{ my ($gene) = @_; my $coord_string = coord_string($gene); my $id = id($gene); return "GENE: id ".$id." ".$coord_string." biotype ".$gene->biotype; } =head2 prune_Exons Arg [1] : Bio::EnsEMBL::Gene Function : remove duplicate exons between Transcripts but ensure translation and other exons are maintained Returntype: Exceptions: Example : =cut sub prune_Exons{ my ($gene) = @_; my @unique_exons; # keep track of all unique exons found so far to avoid making # duplicates need to be very careful about # translation->start_Exon and translation->end_Exon # my $cloned_gene = clone_Gene($gene); foreach my $tran (@{$cloned_gene->get_all_Transcripts}) { my @newexons; foreach my $exon (@{$tran->get_all_Exons}) { my $found; #always empty UNI:foreach my $uni (@unique_exons) { if ($uni->start == $exon->start && $uni->end == $exon->end && $uni->strand == $exon->strand && $uni->phase == $exon->phase && $uni->end_phase == $exon->end_phase ) { $found = $uni; last UNI; } } if (defined($found)) { push(@newexons,$found); if ($exon == $tran->translation->start_Exon){ $tran->translation->start_Exon($found); } if ($exon == $tran->translation->end_Exon){ $tran->translation->end_Exon($found); } } else { push(@newexons,$exon); push(@unique_exons, $exon); } } $tran->flush_Exons; foreach my $exon (@newexons) { $tran->add_Exon($exon); } } return $cloned_gene; } =head2 get_transcript_with_longest_CDS Arg [0] : Bio::EnsEMBL::Gene Function : returns Array containing the longest transcript-obj, the length of all translateable exons and the number of exons for this transcript Returntype: Array longest transcript, max. length, max nr of exons for this transcript Exceptions: Example : =cut sub get_transcript_with_longest_CDS { my $gene = shift; my $maxlen = 0; my $nexonmax = 0; my $longest; foreach my $trans (@{$gene->get_all_Transcripts}) { my $len = 0; if ($trans->translation) { my @trans_exons = @{$trans->get_all_translateable_Exons()}; foreach my $exon (@trans_exons) { $len+= $exon->length; } if ($len > $maxlen) { $maxlen = $len; $longest = $trans; $nexonmax = scalar(@{$trans->get_all_Exons}); } } } if (!defined($longest)) { print "Didn't find a longest transcript for gene ". $gene->stable_id . " type " . $gene->type . "\n"; } return ($longest,$maxlen,$nexonmax); } =head2 get_one2one_orth_for_gene_in_other_species Arg [0] : Bio::EnsEMBL::Gene Arg [1] : String species_name ("Homo sapiens") Description: Returns the one2one orthologue as a Bio::EnsEMBL::Gene object in the specified species or undef if there is non or more than one orthologue Returntype: Bio::EnsEMBL::Gene object Exceptions: none Example : $one2one_orth_mus = get_one2one_orth_for_gene_in_other_species($gene, "Mus musculus"); =cut sub get_one2one_orth_for_gene_in_other_species { my ($gene, $other_species) = @_ ; # this is a list of homolgy-desription terms which are used in ensembl_compara databases # versions v18 - v 47. They are stored in homology.description table. my @orthology_description_dictionary = qw ( PIP SEED BRH DWGA MBRH RHS UBRH YoungParalogues ortholog_many2many ortholog_one2many ortholog_one2one apparent_ortholog_one2one between_species_paralog within_species_paralog ) ; my %orthology_dict; @orthology_dict{@orthology_description_dictionary} = 1; my $one2one_homologue_in_other_species = undef; my @homologies_found ; foreach my $homolog_to_check ( @{ $gene->get_all_homologous_Genes()} ) { my ($check_homg, $check_homology, $check_species ) = @$homolog_to_check ; unless ( exists $orthology_dict{$check_homology->description}){ throw("The homology-description : ".$check_homology->description. " is an unknow, probably new description. This". " is probably due to changes in compara. Please add the new description to the \@orthology_description_dictionary ". " in the GeneUtils.pm module and make sure that the retrieval of one2one orthologues is not affected\n") ; } if ($check_species eq $other_species) { push @homologies_found , [$check_homology,$check_homg]; #print "Homology found : " . $check_homology->description. "\t" . $check_homg->stable_id . "\n" ; # now this is a tricky one ... depending on the string we need to make a decision if it's a one2one or not. # ortholgos can be retrieved by gene-trees as well as the homology pipeline. # compara changes this string ( stored in the compara homology-table column description if ( $check_homology->description=~m/ortholog_one2one/){ # a ortholog_one2one-relation has already been found - this is wrong, or compara changed ... if ($one2one_homologue_in_other_species ) { my $err_str = "We have more than one ortholog_one2one-relationship found for : " . $gene->stable_id ."\t" . $check_species. "\n" ; for my $hg ( @homologies_found ) { my ( $homology, $homolog ) = @$hg ; $err_str .= $homology->description ."\t" . $homolog->stable_id . "\n" ; } throw("$err_str"); } $one2one_homologue_in_other_species = $check_homg ; } elsif ( $check_homology->description=~m/(PIP|SEED|BRH|DWGA|MBRH|RHS|UBRH)/){ if ( $one2one_homologue_in_other_species ) { print "more than one homology found, so i assume that it's not a one2one homolog\n" ; return undef ; } $one2one_homologue_in_other_species = $check_homg ; } } } # if ( scalar (@homologies_found ) > 0 ) { # print "\nRelationships identified for : " . $gene->stable_id . " :\n" ; # for my $hg ( @homologies_found ) { # my ( $homology, $homolog ) = @$hg ; # print "\t" . $homology->description ."\t" . $homolog->stable_id . "\n" ; # } # print "\n" ; # } return $one2one_homologue_in_other_species; } sub get_one2one_homology_for_gene_in_other_species { my ($gene, $other_species) = @_ ; print "using method get_one2one_homology_for_gene_in_other_species analysis_2008_01_21 \n" ; my $one2one_homologue_gene_in_other_species = undef; my $homology = undef ; my $more_than_one = undef ; my @ortholog_one2one; # problem with the old routine is that it only works with old compara # versions ( like with versions where there are homology-classes like # UBRH, MBRH etc. ) - if we use the new, tree-based compara pipeline we # get other homology descriptions. foreach my $homolog_to_check ( @{ $gene->get_all_homologous_Genes()} ) { my ($check_homologue_gene, $check_homology, $check_species ) = @$homolog_to_check ; if ($check_species eq $other_species) { print "homology_key\t$other_species\t".$check_homology->description."\t" . $gene->stable_id . "\t" . $check_homologue_gene->stable_id ."\n" ; if ( $check_homology->description =~m/ortholog_one2one/ || $check_homology->description =~m/apparent_ortholog_one2one/) { push @ortholog_one2one, $check_homology ; } if ( $one2one_homologue_gene_in_other_species ) { $one2one_homologue_gene_in_other_species = undef; $more_than_one = 1 ; } $one2one_homologue_gene_in_other_species = $check_homologue_gene ; $homology = $check_homology ; } } if ( $more_than_one ) { if ( scalar(@ortholog_one2one) eq 1 && $ortholog_one2one[0]){ return $ortholog_one2one[0] ; } elsif ( scalar(@ortholog_one2one) > 1) { throw("weird more than one ortholog_one2one found\n"); } else { #print "no orthologue_one2one found\n" ; $homology = undef ; } } return $homology; } =head2 attach_Slice_to_Gene Arg [1] : Bio::EnsEMBL::Gene Arg [2] : Bio::EnsEMBL::Slice Function : attach given slice to gene and its object hierachy Returntype: n/a Exceptions: Example : =cut sub attach_Slice_to_Gene{ my ($gene, $slice) = @_; $gene->slice($slice); foreach my $transcript(@{$gene->get_all_Transcripts}){ attach_Slice_to_Transcript($transcript, $slice); } } sub attach_Analysis_to_Gene{ my ($gene, $analysis) = @_; $gene->analysis($analysis); foreach my $transcript(@{$gene->get_all_Transcripts}){ attach_Analysis_to_Transcript($transcript, $analysis); } } sub attach_Analysis_to_Gene_no_support{ my ($gene, $analysis) = @_; $gene->analysis($analysis); foreach my $transcript(@{$gene->get_all_Transcripts}){ attach_Analysis_to_Transcript_no_support($transcript, $analysis); } } =head2 fully_load_Gene Arg [1] : Bio::EnsEMBL::Gene Function : descend object hierachy to ensure is fully loaded from the database Returntype: Bio::EnsEMBL::Gene Exceptions: Example : =cut sub fully_load_Gene{ my ($gene, $keep_xrefs) = @_; $keep_xrefs = 1 if(!defined($keep_xrefs)); foreach my $t(@{$gene->get_all_Transcripts}){ fully_load_Transcript($t, $keep_xrefs); } $gene->analysis; $gene->get_all_DBEntries if($keep_xrefs); $gene->get_all_Attributes; $gene->stable_id; return $gene; } =head2 empty_Gene Arg [1] : Bio::EnsEMBL::Gene Arg [2] : Boolean, whether to remove the stable id Arg [3] : Boolean, whether to remove the xrefs Function : detached gene and its object hierachy from database Returntype: Bio::EnsEMBL::Gene Exceptions: Example : =cut sub empty_Gene{ my ($gene, $remove_stable_id, $remove_xrefs) = @_; fully_load_Gene($gene); foreach my $t(@{$gene->get_all_Transcripts}){ empty_Transcript($t, $remove_stable_id, $remove_xrefs); } $gene->display_xref(undef) if($remove_xrefs); empty_Object($gene, $remove_stable_id); return $gene; } 1;