Raw content of Bio::Taxonomy::Taxon # $Id: Taxon.pm,v 1.1 2002/11/18 22:08:33 kortsch Exp $ # # BioPerl module for Bio::Taxonomy::Taxon # # Cared for by Dan Kortschak but pilfered extensively from # the Bio::Tree::Node code of Jason Stajich # # You may distribute this module under the same terms as perl itself # POD documentation - main docs before the code =head1 NAME Bio::Taxonomy::Taxon - Generic Taxonomic Entity object =head1 SYNOPSIS use Bio::Taxonomy::Taxon; my $taxonA = new Bio::Taxonomy::Taxon(); my $taxonL = new Bio::Taxonomy::Taxon(); my $taxonR = new Bio::Taxonomy::Taxon(); my $taxon = new Bio::Taxonomy::Taxon(); $taxon->add_Descendents($nodeL); $taxon->add_Descendents($nodeR); $species = $taxon->species; =head1 DESCRIPTION Makes a taxonomic unit suitable for use in a taxonomic tree =head1 CONTACT Dan Kortschak email B<kortschak@rsbs.anu.edu.au> =head1 APPENDIX The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _ =cut # code begins... package Bio::Taxonomy::Taxon; use vars qw(@ISA $CREATIONORDER); use strict; # Object preamble - inherits from Bio::Root::Object, Bio::Tree::NodeI, Bio::Species and Bio::Taxonomy use Bio::Root::Root; use Bio::Tree::NodeI; use Bio::Taxonomy; use Bio::Species; # import rank information from Bio::Taxonomy.pm use vars qw(@RANK %RANK); @ISA = qw(Bio::Root::Root Bio::Tree::NodeI); BEGIN { $CREATIONORDER = 0; } =head2 new Title : new Usage : my $obj = new Bio::Taxonomy::Taxon(); Function: Builds a new Bio::Taxonomy::Taxon object Returns : Bio::Taxonomy::Taxon Args : -descendents => array pointer to descendents (optional) -branch_length => branch length [integer] (optional) -taxon => taxon -id => unique taxon id for node (from NCBI's list preferably) -rank => the taxonomic level of the node (also from NCBI) =cut sub new { my($class,@args) = @_; my $self = $class->SUPER::new(@args); my ($children,$branchlen,$id,$taxon,$rank,$desc) = $self->_rearrange([qw(DESCENDENTS BRANCH_LENGTH ID TAXON RANK DESC)], @args); $self->{'_desc'} = {}; defined $desc && $self->description($desc); defined $taxon && $self->taxon($taxon); defined $id && $self->id($id); defined $branchlen && $self->branch_length($branchlen); defined $rank && $self->rank($rank); if( defined $children ) { if( ref($children) !~ /ARRAY/i ) { $self->warn("Must specify a valid ARRAY reference to initialize a Taxon's Descendents"); } foreach my $c ( @$children ) { $self->add_Descendent($c); } } $self->_creation_id($CREATIONORDER++); return $self; } =head2 add_Descendent Title : add_Descendent Usage : $taxon->add_Descendant($taxon); Function: Adds a descendent to a taxon Returns : number of current descendents for this taxon Args : Bio::Taxonomy::Taxon boolean flag, true if you want to ignore the fact that you are adding a second node with the same unique id (typically memory location reference in this implementation). default is false and will throw an error if you try and overwrite an existing node. =cut sub add_Descendent{ my ($self,$node,$ignoreoverwrite) = @_; return -1 if( ! defined $node ) ; if( ! $node->isa('Bio::Taxonomy::Taxon') ) { $self->warn("Trying to add a Descendent who is not a Bio::Taxonomy::Taxon"); return -1; } # do we care about order? $node->{'_ancestor'} = $self; if( $self->{'_desc'}->{$node->internal_id} && ! $ignoreoverwrite ) { $self->throw("Going to overwrite a taxon which is $node that is already stored here, set the ignore overwrite flag (parameter 2) to true to ignore this in the future"); } $self->{'_desc'}->{$node->internal_id} = $node; # is this safely unique - we've tested before at any rate?? $self->invalidate_height(); return scalar keys %{$self->{'_desc'}}; } =head2 each_Descendent Title : each_Descendent($sortby) Usage : my @taxa = $taxon->each_Descendent; Function: all the descendents for this taxon (but not their descendents i.e. not a recursive fetchall) Returns : Array of Bio::Taxonomy::Taxon objects Args : $sortby [optional] "height", "creation" or coderef to be used to sort the order of children taxa. =cut sub each_Descendent{ my ($self, $sortby) = @_; # order can be based on branch length (and sub branchlength) $sortby ||= 'height'; if (ref $sortby eq 'CODE') { return sort $sortby values %{$self->{'_desc'}}; } else { if ($sortby eq 'height') { return map { $_->[0] } sort { $a->[1] <=> $b->[1] || $a->[2] <=> $b->[2] } map { [$_, $_->height, $_->internal_id ] } values %{$self->{'_desc'}}; } else { return map { $_->[0] } sort { $a->[1] <=> $b->[1] } map { [$_, $_->height ] } values %{$self->{'_desc'}}; } } } =head2 remove_Descendent Title : remove_Descendent Usage : $taxon->remove_Descedent($taxon_foo); Function: Removes a specific taxon from being a Descendent of this taxon Returns : nothing Args : An array of Bio::taxonomy::Taxon objects which have be previously passed to the add_Descendent call of this object. =cut sub remove_Descendent{ my ($self,@nodes) = @_; foreach my $n ( @nodes ) { if( $self->{'_desc'}->{$n->internal_id} ) { $n->{'_ancestor'} = undef; $self->{'_desc'}->{$n->internal_id}->{'_ancestor'} = undef; delete $self->{'_desc'}->{$n->internal_id}; } else { $self->debug(sprintf("no taxon %s (%s) listed as a descendent in this taxon %s (%s)\n",$n->id, $n,$self->id,$self)); $self->debug("Descendents are " . join(',', keys %{$self->{'_desc'}})."\n"); } } 1; } =head2 remove_all_Descendents Title : remove_all_Descendents Usage : $taxon->remove_All_Descendents() Function: Cleanup the taxon's reference to descendents and reset their ancestor pointers to undef, if you don't have a reference to these objects after this call they will be cleanedup - so a get_nodes from the Tree object would be a safe thing to do first Returns : nothing Args : none =cut sub remove_all_Descendents{ my ($self) = @_; # this won't cleanup the taxa themselves if you also have # a copy/pointer of them (I think)... while( my ($node,$val) = each %{ $self->{'_desc'} } ) { $val->{'_ancestor'} = undef; } $self->{'_desc'} = {}; 1; } =head2 get_Descendents Title : get_Descendents Usage : my @taxa = $taxon->get_Descendents; Function: Recursively fetch all the taxa and their descendents *NOTE* This is different from each_Descendent Returns : Array or Bio::Taxonomy::Taxon objects Args : none =cut # implemented in the interface =head2 ancestor Title : ancestor Usage : $taxon->ancestor($newval) Function: Set the Ancestor Returns : value of ancestor Args : newvalue (optional) =cut sub ancestor { my ($self, $value) = @_; if (defined $value) { $self->{'_ancestor'} = $value; } return $self->{'_ancestor'}; } =head2 branch_length Title : branch_length Usage : $obj->branch_length($newval) Function: Example : Returns : value of branch_length Args : newvalue (optional) =cut sub branch_length { my ($self,$value) = @_; if( defined $value) { $self->{'branch_length'} = $value; } return $self->{'branch_length'}; } =head2 description Title : description Usage : $obj->description($newval) Function: Example : Returns : value of description Args : newvalue (optional) =cut sub description { my ($self,$value) = @_; if( defined $value ) { $self->{'_desc'} = $value; } return $self->{'_desc'}; } =head2 rank Title : rank Usage : $obj->rank($newval) Function: Set the taxonomic rank Example : Returns : taxonomic rank of taxon Args : newvalue (optional) =cut sub rank { my ($self,$value) = @_; if (defined $value) { my $ranks=join("|",@RANK); if ($value=~/$ranks/) { $self->{'_rank'} = $value; } else { $self->throw("Attempted to set unknown taxonomic rank: $value.\n"); } } return $self->{'_rank'}; } =head2 taxon Title : taxon Usage : $obj->taxon($newtaxon) Function: Set the name of the taxon Example : Returns : name of taxon Args : newtaxon (optional) =cut # because internal taxa have names too... sub taxon { my ($self,$value) = @_; if( defined $value ) { $self->{'_taxon'} = $value; } return $self->{'_taxon'}; } =head2 id Title : id Usage : $obj->id($newval) Function: Example : Returns : value of id Args : newvalue (optional) =cut sub id { my ($self,$value) = @_; if( defined $value ) { $self->{'_id'} = $value; } return $self->{'_id'}; } sub DESTROY { my ($self) = @_; # try to insure that everything is cleaned up $self->SUPER::DESTROY(); if( defined $self->{'_desc'} && ref($self->{'_desc'}) =~ /ARRAY/i ) { while( my ($nodeid,$node) = each %{ $self->{'_desc'} } ) { $node->{'_ancestor'} = undef; # ensure no circular references $node->DESTROY(); $node = undef; } $self->{'_desc'} = {}; } } =head2 internal_id Title : internal_id Usage : my $internalid = $taxon->internal_id Function: Returns the internal unique id for this taxon (a monotonically increasing number for this in-memory implementation but could be a database determined unique id in other implementations) Returns : unique id Args : none =cut sub internal_id { return $_[0]->_creation_id; } =head2 _creation_id Title : _creation_id Usage : $obj->_creation_id($newval) Function: a private method signifying the internal creation order Returns : value of _creation_id Args : newvalue (optional) =cut sub _creation_id { my ($self,$value) = @_; if( defined $value) { $self->{'_creation_id'} = $value; } return $self->{'_creation_id'} || 0; } # The following methods are implemented by NodeI decorated interface =head2 is_Leaf Title : is_Leaf Usage : if( $node->is_Leaf ) Function: Get Leaf status Returns : boolean Args : none =cut sub is_Leaf { my ($self) = @_; my $rc = 0; $rc = 1 if( ! defined $self->{'_desc'} || keys %{$self->{'_desc'}} == 0); return $rc; } =head2 to_string Title : to_string Usage : my $str = $taxon->to_string() Function: For debugging, provide a taxon as a string Returns : string Args : none =cut =head2 height Title : height Usage : my $len = $taxon->height Function: Returns the height of the tree starting at this taxon. Height is the maximum branchlength. Returns : The longest length (weighting branches with branch_length) to a leaf Args : none =cut sub height { my ($self) = @_; return $self->{'_height'} if( defined $self->{'_height'} ); if( $self->is_Leaf ) { if( !defined $self->branch_length ) { $self->debug(sprintf("Trying to calculate height of a taxon when a taxon (%s) has an undefined branch_length",$self->id || '?' )); return 0; } return $self->branch_length; } my $max = 0; foreach my $subnode ( $self->each_Descendent ) { my $s = $subnode->height; if( $s > $max ) { $max = $s; } } return ($self->{'_height'} = $max + ($self->branch_length || 1)); } =head2 invalidate_height Title : invalidate_height Usage : private helper method Function: Invalidate our cached value of the taxon's height in the tree Returns : nothing Args : none =cut sub invalidate_height { my ($self) = @_; $self->{'_height'} = undef; if( $self->ancestor ) { $self->ancestor->invalidate_height; } } =head2 classify Title : classify Usage : @obj->classify() Function: a method to return the classification of a species Returns : name of taxon and ancestor's taxon recursively Args : boolean to specify whether we want all taxa not just ranked levels =cut sub classify { my ($self,$allnodes) = @_; my @classification=($self->taxon); my $node=$self; while (defined $node->ancestor) { push @classification, $node->ancestor->taxon if $allnodes==1; $node=$node->ancestor; } return (@classification); } =head2 has_rank Title : has_rank Usage : $obj->has_rank($rank) Function: a method to query ancestors' rank Returns : boolean Args : $rank =cut sub has_rank { my ($self,$rank) = @_; return $self if $self->rank eq $rank; while (defined $self->ancestor) { return $self if $self->ancestor->rank eq $rank; $self=$self->ancestor; } return undef; } =head2 has_taxon Title : has_taxon Usage : $obj->has_taxon($taxon) Function: a method to query ancestors' taxa Returns : boolean Args : Bio::Taxonomy::Taxon object =cut sub has_taxon { my ($self,$taxon) = @_; return $self if ((defined $self->id && $self->id == $taxon->id) || ($self->taxon eq $taxon->taxon && $self->rank eq $taxon->rank)); while (defined $self->ancestor) { return $self if ((defined $self->id && $self->id == $taxon->id) || ($self->taxon eq $taxon->taxon && $self->rank eq $taxon->rank) && ($self->taxon ne 'no rank')); $self=$self->ancestor; } return undef; } =head2 distance_to_root Title : distance_to_root Usage : $obj->distance_to_root Function: a method to query ancestors' taxa Returns : number of links to root Args : =cut sub distance_to_root { my ($self,$taxon) = @_; my $count=0; while (defined $self->ancestor) { $count++; $self=$self->ancestor; } return $count; } =head2 recent_common_ancestor Title : recent_common_ancestor Usage : $obj->recent_common_ancestor($taxon) Function: a method to query find common ancestors Returns : Bio::Taxonomy::Taxon of query or undef if no ancestor of rank Args : Bio::Taxonomy::Taxon =cut sub recent_common_ancestor { my ($self,$node) = @_; while (defined $node->ancestor) { my $common=$self->has_taxon($node); return $common if defined $common; $node=$node->ancestor; } return undef; } =head2 species Title : species Usage : $obj=$taxon->species; Function: Returns a Bio::Species object reflecting the taxon's tree position Returns : a Bio::Species object Args : none =cut sub species { my ($self) = @_; my $species; if ($self->has_rank('subspecies') && $self->ancestor->rank eq 'species') { $species = Bio::Species->new(-classification => $self->ancestor->classify); $species->genus($self->ancestor->ancestor->taxon); $species->species($self->ancestor->taxon); $species->sub_species($self->taxon); } elsif ($self->has_rank('species')) { $species = Bio::Species->new(-classification => $self->classify); $species->genus($self->ancestor->taxon); $species->species($self->taxon); } else { $self->throw("Trying to create a species from a taxonomic entity without species rank. Use classify instead of species.\n"); } return $species; } 1;