Raw content of Bio::Taxonomy::Tree
# $Id: Tree.pm,v 1.1 2002/11/18 22:08:33 kortsch Exp $
#
# BioPerl module for Bio::Taxonomy::Tree
#
# Cared for by Dan Kortschak but pilfered extensively from Bio::Tree::Tree by Jason Stajich
#
# You may distribute this module under the same terms as perl itself
# POD documentation - main docs before the code
=head1 NAME
Bio::Taxonomy::Tree - An Organism Level Implementation of TreeI interface.
=head1 SYNOPSIS
# like from a TreeIO
my $treeio = new Bio::TreeIO(-format => 'newick', -file => 'treefile.dnd');
my $tree = $treeio->next_tree;
my @nodes = $tree->get_nodes;
my $root = $tree->get_root_node;
my @leaves = $tree->get_leaves;
=head1 DESCRIPTION
This object holds handles to Taxonomic Nodes which make up a tree.
=head1 EXAMPLES
use Bio::Species;
use Bio::Taxonomy::Tree;
my $human=new Bio::Species;
my $chimp=new Bio::Species;
my $bonobo=new Bio::Species;
$human->classification(qw( sapiens Homo Hominidae
Catarrhini Primates Eutheria
Mammalia Euteleostomi Vertebrata
Craniata Chordata
Metazoa Eukaryota ));
$chimp->classification(qw( troglodytes Pan Hominidae
Catarrhini Primates Eutheria
Mammalia Euteleostomi Vertebrata
Craniata Chordata
Metazoa Eukaryota ));
$bonobo->classification(qw( paniscus Pan Hominidae
Catarrhini Primates Eutheria
Mammalia Euteleostomi Vertebrata
Craniata Chordata
Metazoa Eukaryota ));
# ranks passed to $taxonomy match ranks of species
my @ranks = ('superkingdom','kingdom','phylum','subphylum',
'no rank 1','no rank 2','class','no rank 3','order',
'suborder','family','genus','species');
my $taxonomy=new Bio::Taxonomy(-ranks => \@ranks,
-method => 'trust',
-order => -1);
my @nodes;
my $tree1=new Bio::Taxonomy::Tree;
my $tree2=new Bio::Taxonomy::Tree;
push @nodes,$tree1->make_species_branch($human,$taxonomy);
push @nodes,$tree2->make_species_branch($chimp,$taxonomy);
my ($homo_sapiens)=$tree1->get_leaves;
$tree1->splice($tree2);
push @nodes,$tree1->add_species($bonobo,$taxonomy);
my @taxa;
foreach my $leaf ($tree1->get_leaves) {
push @taxa,$leaf->taxon;
}
print join(", ",@taxa)."\n";
@taxa=();
$tree1->remove_branch($homo_sapiens);
foreach my $leaf ($tree1->get_leaves) {
push @taxa,$leaf->taxon;
}
print join(", ",@taxa)."\n";
=head1 FEEDBACK
See AUTHOR
=head1 AUTHOR - Dan Kortschak
Email kortschak@rsbs.anu.edu.au
=head1 CONTRIBUTORS
Mainly Jason Stajich
=head1 APPENDIX
The rest of the documentation details each of the object methods.
Internal methods are usually preceded with a _
=cut
# Code begins...
package Bio::Taxonomy::Tree;
use vars qw(@ISA);
use strict;
# Object preamble - inherits from Bio::Root::Root
use Bio::Root::Root;
use Bio::Tree::TreeFunctionsI;
use Bio::Tree::TreeI;
use Bio::Taxonomy::Taxon;
# Import rank information from Bio::Taxonomy.pm
use vars qw(@RANK %RANK);
@ISA = qw(Bio::Root::Root Bio::Tree::TreeI Bio::Tree::TreeFunctionsI);
=head2 new
Title : new
Usage : my $obj = new Bio::Taxonomy::Tree();
Function: Builds a new Bio::Taxonomy::Tree object
Returns : Bio::Taxonomy::Tree
Args :
=cut
sub new {
my($class,@args) = @_;
my $self = $class->SUPER::new(@args);
$self->{'_rootnode'} = undef;
$self->{'_maxbranchlen'} = 0;
my ($root)= $self->_rearrange([qw(ROOT)], @args);
if( $root ) { $self->set_root_node($root); }
return $self;
}
=head2 get_nodes
Title : get_nodes
Usage : my @nodes = $tree->get_nodes()
Function: Return list of Bio::Taxonomy::Taxon objects
Returns : array of Bio::Taxonomy::Taxon objects
Args : (named values) hash with one value
order => 'b|breadth' first order or 'd|depth' first order
=cut
sub get_nodes{
my ($self, @args) = @_;
my ($order, $sortby) = $self->_rearrange([qw(ORDER SORTBY)],@args);
$order ||= 'depth';
$sortby ||= 'height';
if ($order =~ m/^b|(breadth)$/oi) {
my $node = $self->get_root_node;
my @children = ($node);
for (@children) {
push @children, $_->each_Descendent($sortby);
}
return @children;
}
if ($order =~ m/^d|(depth)$/oi) {
# this is depth-first search I believe
my $node = $self->get_root_node;
my @children = ($node,$node->get_Descendents($sortby));
return @children;
}
}
=head2 get_root_node
Title : get_root_node
Usage : my $node = $tree->get_root_node();
Function: Get the Top Node in the tree, in this implementation
Trees only have one top node.
Returns : Bio::Taxonomy::Taxon object
Args : none
=cut
sub get_root_node{
my ($self) = @_;
return $self->{'_rootnode'};
}
=head2 set_root_node
Title : set_root_node
Usage : $tree->set_root_node($node)
Function: Set the Root Node for the Tree
Returns : Bio::Taxonomy::Taxon
Args : Bio::Taxonomy::Taxon
=cut
sub set_root_node{
my ($self,$value) = @_;
if( defined $value ) {
if( ! $value->isa('Bio::Taxonomy::Taxon') ) {
$self->warn("Trying to set the root node to $value which is not a Bio::Taxonomy::Taxon");
return $self->get_root_node;
}
$self->{'_rootnode'} = $value;
}
return $self->get_root_node;
}
=head2 get_leaves
Title : get_leaves
Usage : my @nodes = $tree->get_leaves()
Function: Return list of Bio::Taxonomy::Taxon objects
Returns : array of Bio::Taxonomy::Taxon objects
Args :
=cut
sub get_leaves{
my ($self) = @_;
my $node = $self->get_root_node;
my @leaves;
my @children = ($node);
for (@children) {
push @children, $_->each_Descendent();
}
for (@children) {
push @leaves, $_ if $_->is_Leaf;
}
return @leaves;
}
=head2 make_species_branch
Title : make_species_branch
Usage : @nodes = $tree->make_species_branch($species,$taxonomy)
Function: Return list of Bio::Taxonomy::Taxon objects based on a Bio::Species object
Returns : array of Bio::Taxonomy::Taxon objects
Args : Bio::Species and Bio::Taxonomy objects
=cut
# I'm not happy that make_species_branch and make_branch are seperate routines
# should be able to just make_branch and have it sort things out
sub make_species_branch{
my ($self,$species,$taxonomy) = @_;
if (! $species->isa('Bio::Species') ) {
$self->throw("Trying to classify $species which is not a Bio::Species object");
}
if (! $taxonomy->isa('Bio::Taxonomy') ) {
$self->throw("Trying to classify with $taxonomy which is not a Bio::Taxonomy object");
}
# this is done to make sure we aren't duplicating a path (let God sort them out)
if (defined $self->get_root_node) {
$self->get_root_node->remove_all_Descendents;
}
my @nodes;
# nb taxa in [i][0] and ranks in [i][1]
my @taxa=$taxonomy->classify($species);
for (my $i = 0; $i < @taxa; $i++) {
$nodes[$i]=Bio::Taxonomy::Taxon->new(-taxon => $taxa[$i][0],
-rank => $taxa[$i][1]);
}
for (my $i = 0; $i < @taxa-1; $i++) {
$nodes[$i]->add_Descendent($nodes[$i+1]);
}
$self->set_root_node($nodes[0]);
return @nodes;
}
=head2 make_branch
Title : make_branch
Usage : $tree->make_branch($node)
Function: Make a linear Bio::Taxonomy::Tree object from a leafish node
Returns :
Args : Bio::Taxonomy::Taxon object
=cut
sub make_branch{
my ($self,$node) = @_;
# this is done to make sure we aren't duplicating a path (let God sort them out)
# note that if you are using a linked set of node which include node
# already in the tree, this will break
$self->get_root_node->remove_all_Descendents;
while (defined $node->ancestor) {
$self->set_root_node($node);
$node=$node->ancestor;
}
}
=head2 splice
Title : splice
Usage : @nodes = $tree->splice($tree)
Function: Return a of Bio::Taxonomy::Tree object that is a fusion of two
Returns : array of Bio::Taxonomy::Taxon added to tree
Args : Bio::Taxonomy::Tree object
=cut
sub splice{
my ($self,$tree) = @_;
my @nodes;
my @newleaves = $tree->get_leaves;
foreach my $leaf (@newleaves) {
push @nodes,$self->add_branch($leaf);
}
return @nodes;
}
=head2 add_species
Title : add_species
Usage : @nodes = $tree->add_species($species,$taxonomy)
Function: Return a of Bio::Taxonomy::Tree object with a new species added
Returns : array of Bio::Taxonomy::Taxon added to tree
Args : Bio::Species object
=cut
sub add_species{
my ($self,$species,$taxonomy) = @_;
my $branch=Bio::Taxonomy::Tree->new;
my @nodes=$branch->make_species_branch($species,$taxonomy);
my ($newleaf)=$branch->get_leaves;
return $self->add_branch($newleaf);
}
=head2 add_branch
Title : add_branch
Usage : $tree->add_branch($node,boolean)
Function: Return a of Bio::Taxonomy::Tree object with a new branch added
Returns : array of Bio::Taxonomy::Taxon objects of the resulting tree
Args : Bio::Taxonomy::Taxon object
boolean flag to force overwrite of descendent
(see Bio::Node->add_Descendent)
=cut
sub add_branch {
my ($self,$node,$force) = @_;
my $best_node_level=0;
my ($best_node,@nodes,$common);
my @leaves=$self->get_leaves;
foreach my $leaf (@leaves) {
$common=$node->recent_common_ancestor($leaf); # the root of the part to add
if (defined $common && ($common->distance_to_root > $best_node_level)) {
$best_node_level = $common->distance_to_root;
$best_node = $common;
}
}
return unless defined $best_node;
push @nodes,($self->get_root_node,$self->get_root_node->get_Descendents);
foreach my $node (@nodes) {
if ((defined $best_node->id && $best_node->id == $node->id) ||
($best_node->rank eq $node->rank && $best_node->taxon eq $node->taxon) &&
($best_node->rank ne 'no rank')) {
foreach my $descendent ($common->each_Descendent) {
$node->add_Descendent($descendent,$force);
}
}
$self->set_root_node($node) if $node->distance_to_root==0;
}
return ($common->get_Descendents);
}
=head2 remove_branch
Title : remove_branch
Usage : $tree->remove_branch($node)
Function: remove a branch up to the next multifurcation
Returns :
Args : Bio::Taxonomy::Taxon object
=cut
sub remove_branch{
my ($self,$node) = @_;
# we can define a branch at any point along it
while (defined $node->ancestor) {
last if $node->ancestor->each_Descendent > 1;
$node=$node->ancestor;
}
$node->remove_all_Descendents; # I'm not sure if this is necessary,
# but I don't see that remove_Descendent
# has the side effect of deleting
# descendent nodes of the deletee
$node->ancestor->remove_Descendent($node);
}
1;