Raw content of Bio::EnsEMBL::Compara::Homology package Bio::EnsEMBL::Compara::Homology; use strict; use Bio::EnsEMBL::Compara::BaseRelation; use Bio::EnsEMBL::Utils::Exception qw( deprecate throw warning ); use Bio::SimpleAlign; our @ISA = qw(Bio::EnsEMBL::Compara::BaseRelation); =head1 NAME Bio::EnsEMBL::Compara::Homology - Homology between two proteins =head1 SYNOPSIS use Bio::EnsEMBL::Registry; my $homology_adaptor = $reg->get_adaptor("Multi", "compara", "Homology"); ## Please, refer to Bio::EnsEMBL::Compara::DBSQL::MemberAdaptor ## to see how to get a Member from the database. Also, you can ## find alternative ways to fetch homologies in the POD for the ## Bio::EnsEMBL::Compara::DBSQL::HomologyAdaptor module. my $homologies = $homology_adaptor->fetch_all_by_Member($member); foreach my $this_homology (@$homologies) { my $homologue_genes = $this_homology->gene_list(); print join(" and ", @$homologue_genes), " are ", $this_homology->description, "\n"; } =head1 AUTHOR Ensembl Team =head1 COPYRIGHT Copyright (c) 2004. Ensembl Team You may distribute this module under the same terms as perl itself =head1 CONTACT This modules is part of the EnsEMBL project () Questions can be posted to the ensembl-dev mailing list: ensembl-dev@ebi.ac.uk =head1 INHERITANCE This class inherits all the methods and attributes from Bio::EnsEMBL::DBSQL::BaseAdaptor =head1 APPENDIX The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _ =cut my %TWOD_CODONS = ("TTT" => "Phe",#Phe "TTC" => "Phe", "TTA" => "Leu",#Leu "TTG" => "Leu", "TAT" => "Tyr",#Tyr "TAC" => "Tyr", "CAT" => "His",#His "CAC" => "His", "CAA" => "Gln",#Gln "CAG" => "Gln", "AAT" => "Asn",#Asn "AAC" => "Asn", "AAA" => "Lys",#Lys "AAG" => "Lys", "GAT" => "Asp",#Asp "GAC" => "Asp", "GAA" => "Glu",#Glu "GAG" => "Glu", "TGT" => "Cys",#Cys "TGC" => "Cys", "AGT" => "Ser",#Ser "AGC" => "Ser", "AGA" => "Arg",#Arg "AGG" => "Arg", "ATT" => "Ile",#Ile "ATC" => "Ile", "ATA" => "Ile"); my %FOURD_CODONS = ("CTT" => "Leu",#Leu "CTC" => "Leu", "CTA" => "Leu", "CTG" => "Leu", "GTT" => "Val",#Val "GTC" => "Val", "GTA" => "Val", "GTG" => "Val", "TCT" => "Ser",#Ser "TCC" => "Ser", "TCA" => "Ser", "TCG" => "Ser", "CCT" => "Pro",#Pro "CCC" => "Pro", "CCA" => "Pro", "CCG" => "Pro", "ACT" => "Thr",#Thr "ACC" => "Thr", "ACA" => "Thr", "ACG" => "Thr", "GCT" => "Ala",#Ala "GCC" => "Ala", "GCA" => "Ala", "GCG" => "Ala", "CGT" => "Arg",#Arg "CGC" => "Arg", "CGA" => "Arg", "CGG" => "Arg", "GGT" => "Gly",#Gly "GGC" => "Gly", "GGA" => "Gly", "GGG" => "Gly"); my %CODONS = ("ATG" => "Met", "TGG" => "Trp", "TAA" => "TER", "TAG" => "TER", "TGA" => "TER", "---" => "---"); foreach my $codon (keys %TWOD_CODONS) { $CODONS{$codon} = $TWOD_CODONS{$codon}; } foreach my $codon (keys %FOURD_CODONS) { $CODONS{$codon} = $FOURD_CODONS{$codon}; } =head2 get_SimpleAlign Arg [1] : string 'cdna' (optional) Example : $simple_align = $homology->get_SimpleAlign(); $cdna_s_align = $homology->get_SimpleAlign('cdna'); Description: get pairwise simple alignment (from the multialignment) Returntype : Bio::SimpleAlign =cut sub get_SimpleAlign { my $self = shift; my $alignment = shift; my $changeSelenos = shift; unless (defined $changeSelenos) { $changeSelenos = 0; } my $sa = Bio::SimpleAlign->new(); #Hack to try to work with both bioperl 0.7 and 1.2: #Check to see if the method is called 'addSeq' or 'add_seq' my $bio07 = 0; if(!$sa->can('add_seq')) { $bio07 = 1; } my $ma = $self->adaptor->db->get_MemberAdaptor; foreach my $member_attribute (@{$self->get_all_Member_Attribute}) { my ($member, $attribute) = @{$member_attribute}; if ($member->chr_name =~ /mt/i) { # codeml icodes # 0:universal code (default) if ($member->taxon->classification =~ /vertebrata/i) { # 1:mamalian mt $sa->{_special_codeml_icode} = 1; } else { # 4:invertebrate mt $sa->{_special_codeml_icode} = 4; } } my $peptide_member = $ma->fetch_by_dbID($attribute->peptide_member_id); my $seqstr; my $alphabet = 'protein'; if (defined $alignment && $alignment =~ /^cdna$/i) { $seqstr = $attribute->cdna_alignment_string($peptide_member,$changeSelenos); $seqstr =~ s/\s+//g; $alphabet = 'dna'; } else { $seqstr = $attribute->alignment_string($peptide_member); } next if(!$seqstr); my $cigar_start = $attribute->cigar_start; my $cigar_end = $attribute->cigar_end; $cigar_start = 1 unless (defined $cigar_start && $cigar_start != 0); unless (defined $cigar_end && $cigar_end != 0) { $cigar_end = $peptide_member->seq_length; $cigar_end = $cigar_end*3 if (defined $alignment && $alignment =~ /^cdna$/i); } #print STDERR "cigar_start $cigar_start cigar_end $cigar_end\n"; my $seq = Bio::LocatableSeq->new(-SEQ => $seqstr, -ALPHABET => $alphabet, -START => $cigar_start, -END => $cigar_end, -ID => $peptide_member->stable_id, -STRAND => 0); if($bio07) { $sa->addSeq($seq); } else { $sa->add_seq($seq); } } return $sa; } =head2 subtype Arg [1] : string $subtype (optional) Example : $subtype = $homology->subtype(); $homology->subtype($subtype); Description: getter/setter of string description of homology subtype. Examples: 'Chordata', 'Euteleostomi', 'Homo sapiens' Returntype : string Exceptions : none Caller : general =cut sub subtype { my $self = shift; # deprecate("Use taxonomy_level() instead."); return $self->taxonomy_level(@_); } =head2 taxonomy_level Arg [1] : string $taxonomy_level (optional) Example : $taxonomy_level = $homology->taxonomy_level(); $homology->taxonomy_level($taxonomy_level); Description: getter/setter of string description of homology taxonomy_level. Examples: 'Chordata', 'Euteleostomi', 'Homo sapiens' Returntype : string Exceptions : none Caller : general =cut sub taxonomy_level { my $self = shift; $self->{'_subtype'} = shift if(@_); $self->{'_subtype'} = '' unless($self->{'_subtype'}); return $self->{'_subtype'}; } =head2 taxonomy_alias Arg [1] : string $taxonomy_alias (optional) Example : $taxonomy_alias = $homology->taxonomy_alias(); $homology->taxonomy_alias($taxonomy_alias); Description: get string description of homology taxonomy_alias. Examples: 'Chordates', 'Bony vertebrates', 'Homo sapiens' Defaults to taxonomy_level if alias is not in the db Returntype : string Exceptions : none Caller : general =cut sub taxonomy_alias { my $self = shift; my $ancestor_node_id = $self->ancestor_node_id; my $tree_dba = $self->adaptor->db->get_ProteinTreeAdaptor; my $ancestor = $tree_dba->fetch_node_by_node_id($ancestor_node_id); return $ancestor->get_tagvalue('taxon_alias') || undef; } =head2 n Arg [1] : float $n (optional) Example : $n = $homology->n(); $homology->n(3); Description: getter/setter of number of nonsynonymous positions for the homology. Returntype : float Exceptions : none Caller : general =cut sub n { my $self = shift; $self->{'_n'} = shift if(@_); return $self->{'_n'}; } =head2 s Arg [1] : float $s (optional) Example : $s = $homology->s(); $homology->s(4); Description: getter/setter of number of synonymous positions for the homology. Returntype : float Exceptions : none Caller : general =cut sub s { my $self = shift; $self->{'_s'} = shift if(@_); return $self->{'_s'}; } =head2 lnl Arg [1] : float $lnl (optional) Example : $lnl = $homology->lnl(); $homology->lnl(-1234.567); Description: getter/setter of number of the negative log likelihood for the dnds homology calculation. Returntype : float Exceptions : none Caller : general =cut sub lnl { my $self = shift; $self->{'_lnl'} = shift if(@_); return $self->{'_lnl'}; } =head2 threshold_on_ds Arg [1] : float $threshold_ond_ds (optional) Example : $lnl = $homology->threshold_on_ds(); $homology->threshold_on_ds(1.01340); Description: getter/setter of the threshold on ds for which the dnds ratio still makes sense. Returntype : float Exceptions : none Caller : general =cut sub threshold_on_ds { my $self = shift; $self->{'_threshold_on_ds'} = shift if(@_); return $self->{'_threshold_on_ds'}; } =head2 dn Arg [1] : floating $dn (can be undef) Arg [2] : boolean $apply_threshold_on_ds (optional, default = 1) Can be 0 or 1. Example : $homology->dn or $homology->dn(0.1209) if you want to retrieve dn without applying threshold_on_ds, the right call is $homology->dn(undef,0). Description: set/get the non synonymous subtitution rate Returntype : floating Exceptions : Caller : =cut sub dn { my ($self, $dn, $apply_threshold_on_ds) = @_; $self->{'_dn'} = $dn if (defined $dn); $apply_threshold_on_ds = 1 unless (defined $apply_threshold_on_ds); unless (defined $self->ds(undef, $apply_threshold_on_ds)) { return undef; } return $self->{'_dn'}; } =head2 ds Arg [1] : floating $ds (can be undef) Arg [2] : boolean $apply_threshold_on_ds (optional, default = 1) Can be 0 or 1. Example : $homology->ds or $homology->ds(0.9846) if you want to retrieve ds without applying threshold_on_ds, the right call is $homology->dn(undef,0). Description: set/get the synonymous subtitution rate Returntype : floating Exceptions : Caller : =cut sub ds { my ($self, $ds, $apply_threshold_on_ds) = @_; $self->{'_ds'} = $ds if (defined $ds); $apply_threshold_on_ds = 1 unless (defined $apply_threshold_on_ds); if (defined $self->{'_ds'} && defined $self->{'_threshold_on_ds'} && $self->{'_ds'} > $self->{'_threshold_on_ds'}) { if ($apply_threshold_on_ds) { return undef; } else { warning("Threshold on ds values is switched off. Be aware that you may obtain saturated ds values that are not to be trusted, neither the dn/ds ratio\n"); } } return $self->{'_ds'}; } =head2 dnds_ratio Arg [1] : boolean $apply_threshold_on_ds (optional, default = 1) Can be 0 or 1. Example : $homology->dnds_ratio or $homology->dnds_ratio(0) if you want to obtain a result even when the dS is above the threshold on dS. Description: return the ratio of dN/dS Returntype : floating Exceptions : Caller : =cut sub dnds_ratio { my $self = shift; my $apply_threshold_on_ds = shift; $apply_threshold_on_ds = 1 unless (defined $apply_threshold_on_ds); my $ds = $self->ds(undef, $apply_threshold_on_ds); my $dn = $self->dn(undef, $apply_threshold_on_ds); unless (defined $dn && defined $ds && $ds !=0) { return undef; } unless (defined $self->{'_dnds_ratio'}) { $self->{'_dnds_ratio'} = sprintf("%.5f",$dn/$ds); } return $self->{'_dnds_ratio'}; } =head2 get_4D_SimpleAlign Example : $4d_align = $homology->get_4D_SimpleAlign(); Description: get 4 times degenerate positions pairwise simple alignment Returntype : Bio::SimpleAlign =cut sub get_4D_SimpleAlign { my $self = shift; my $sa = Bio::SimpleAlign->new(); #Hack to try to work with both bioperl 0.7 and 1.2: #Check to see if the method is called 'addSeq' or 'add_seq' my $bio07 = 0; if(!$sa->can('add_seq')) { $bio07 = 1; } my $ma = $self->adaptor->db->get_MemberAdaptor; my %member_seqstr; foreach my $member_attribute (@{$self->get_all_Member_Attribute}) { my ($member, $attribute) = @{$member_attribute}; my $peptide_member = $ma->fetch_by_dbID($attribute->peptide_member_id); my $seqstr; $seqstr = $attribute->cdna_alignment_string($peptide_member); next if(!$seqstr); # print STDERR $seqstr,"\n"; my @tmp_tab = split /\s+/, $seqstr; # print STDERR "tnp_tab 0: ", $tmp_tab[0],"\n"; $member_seqstr{$peptide_member->stable_id} = \@tmp_tab; } my $seqstr_length; foreach my $seqid (keys %member_seqstr) { unless (defined $seqstr_length) { # print STDERR $member_seqstr{$seqid}->[0],"\n"; $seqstr_length = scalar @{$member_seqstr{$seqid}}; next; } unless ($seqstr_length == scalar @{$member_seqstr{$seqid}}) { die "Length of dna alignment are not the same, $seqstr_length and " . scalar @{$member_seqstr{$seqid}} ." respectively for homology_id " . $self->dbID . "\n"; } } my %FourD_member_seqstr; for (my $i=0; $i < $seqstr_length; $i++) { my $FourD_codon = 1; my $FourD_aminoacid; foreach my $seqid (keys %member_seqstr) { if (FourD_codon($member_seqstr{$seqid}->[$i])) { if (defined $FourD_aminoacid && $FourD_aminoacid eq $FOURD_CODONS{$member_seqstr{$seqid}->[$i]}) { # print STDERR "YES ",$FOURD_CODONS{$member_seqstr{$seqid}->[$i]}," ",$member_seqstr{$seqid}->[$i],"\n"; next; } elsif (defined $FourD_aminoacid) { # print STDERR "NO ",$FOURD_CODONS{$member_seqstr{$seqid}->[$i]}," ",$member_seqstr{$seqid}->[$i],"\n"; $FourD_codon = 0; last; } else { $FourD_aminoacid = $FOURD_CODONS{$member_seqstr{$seqid}->[$i]}; # print STDERR $FOURD_CODONS{$member_seqstr{$seqid}->[$i]}," ",$member_seqstr{$seqid}->[$i]," "; } next; } else { # print STDERR "NO ",$CODONS{$member_seqstr{$seqid}->[$i]}," ",$member_seqstr{$seqid}->[$i],"\n"; $FourD_codon = 0; last; } } next unless ($FourD_codon); foreach my $seqid (keys %member_seqstr) { $FourD_member_seqstr{$seqid} .= substr($member_seqstr{$seqid}->[$i],2,1); } } foreach my $seqid (keys %FourD_member_seqstr) { my $seq = Bio::LocatableSeq->new(-SEQ => $FourD_member_seqstr{$seqid}, -START => 1, -END => length($FourD_member_seqstr{$seqid}), -ID => $seqid, -STRAND => 0); if($bio07) { $sa->addSeq($seq); } else { $sa->add_seq($seq); } } return $sa; } sub FourD_codon { my ($codon) = @_; if (defined $FOURD_CODONS{$codon}) { return 1; } return 0; } sub TwoD_codon { my ($codon) = @_; if (defined $TWOD_CODONS{$codon}) { return 1; } return 0; } =head2 print_homology Example : $homology->print_homology Description: This method prints a short descriptor of the homology USE ONLY FOR DEBUGGING not for data output since the format of this output may change as need dictates. =cut sub print_homology { my $self = shift; printf("Homology %d,%s,%s : ", $self->dbID, $self->description, $self->subtype); foreach my $member_attribute (@{$self->get_all_Member_Attribute}) { my ($member, $attribute) = @{$member_attribute}; printf("%s(%d)\t", $member->stable_id, $member->dbID); } print("\n"); } =head2 get_all_PeptideAlignFeature Example : my ($paf) = @{$homology->get_all_PeptideAlignFeature}; my ($paf, $recipPaf) = @{$homology->get_all_PeptideAlignFeature}; Description: return the peptide_align_feature object (and its reciprocal hit) used to create this homology Returntype : array ref of peptide_align_feature objects Exceptions : Caller : =cut sub get_all_PeptideAlignFeature { my $self = shift; my @pafs; throw("homology must have a valid adaptor and db in order to get peptide_align_features") unless($self->adaptor and $self->adaptor->db); my $pafDBA = $self->adaptor->db->get_PeptideAlignFeatureAdaptor; foreach my $RefArrayOfMemberAttributeArrayRef ($self->get_Member_Attribute_by_source("ENSEMBLGENE")) { foreach my $memAttributeArrayRef (@{$RefArrayOfMemberAttributeArrayRef}) { my $member = $memAttributeArrayRef->[0]; my $attribute = $memAttributeArrayRef->[1]; if($attribute->peptide_align_feature_id) { my $paf = $pafDBA->fetch_by_dbID($attribute->peptide_align_feature_id); push @pafs, $paf; } } } return \@pafs; } =head2 has_species_by_name Arg [1] : string $species_name Example : my $ret = $homology->has_species_by_name("Homo sapiens"); Description: return TRUE or FALSE whether one of the members in the homology is from the given species Returntype : 1 or 0 Exceptions : Caller : =cut sub has_species_by_name { my $self = shift; my $species_name = shift; foreach my $member_attribute (@{$self->get_all_Member_Attribute}) { my ($member, $attribute) = @{$member_attribute}; return 1 if($member->genome_db->name eq $species_name); } return 0; } =head2 gene_list Example : my $pair = $homology->gene_list Description: return the pair of members for the homology Returntype : array ref of (2) Bio::EnsEMBL::Compara::Member objects Caller : general =cut sub gene_list { my $self = shift; my @genes; foreach my $member_attribute (@{$self->get_all_Member_Attribute}) { my ($member, $attribute) = @{$member_attribute}; push @genes, $member; } return \@genes; } =head2 homology_key Example : my $key = $homology->homology_key; Description: returns a string uniquely identifying this homology in world space. uses the gene_stable_ids of the members and orders them by taxon_id and concatonates them together. Returntype : string Exceptions : Caller : =cut sub homology_key { my $self = shift; return $self->{'_homology_key'} if(defined($self->{'_homology_key'})); my @genes; foreach my $member_attribute (@{$self->get_all_Member_Attribute}) { my ($member, $attribute) = @{$member_attribute}; push @genes, $member; } @genes = sort {$a->taxon_id <=> $b->taxon_id || $a->stable_id cmp $b->stable_id} @genes; @genes = map ($_->stable_id, @genes); my $homology_key = join('_', @genes); return $homology_key; } =head2 node_id Arg [1] : int $node_id (optional) Example : $node_id = $homology->node_id(); $homology->subtype($node_id); Description: getter/setter of integer that refer to a node_id in the protein_tree data. Returntype : int Exceptions : none Caller : general =cut sub node_id { my $self = shift; $self->{'_ancestor_node_id'} = shift if(@_); $self->{'_ancestor_node_id'} = '' unless($self->{'_ancestor_node_id'}); return $self->{'_ancestor_node_id'}; } =head2 ancestor_node_id Arg [1] : int $ancestor_node_id (optional) Example : $ancestor_node_id = $homology->ancestor_node_id(); $homology->subtype($ancestor_node_id); Description: getter/setter of integer that refer to the ancestor_node_id in the protein_tree data. Returntype : int Exceptions : none Caller : general =cut sub ancestor_node_id { my $self = shift; $self->{'_ancestor_node_id'} = shift if(@_); $self->{'_ancestor_node_id'} = '' unless($self->{'_ancestor_node_id'}); return $self->{'_ancestor_node_id'}; } =head2 tree_node_id Arg [1] : int $tree_node_id (optional) Example : $tree_node_id = $homology->tree_node_id(); $homology->subtype($tree_node_id); Description: getter/setter of integer that refer to the tree node_id in the protein_tree data. Returntype : int Exceptions : none Caller : general =cut sub tree_node_id { my $self = shift; $self->{'_tree_node_id'} = shift if(@_); $self->{'_tree_node_id'} = '' unless($self->{'_tree_node_id'}); return $self->{'_tree_node_id'}; } 1;