Raw content of Bio::EnsEMBL::Compara::Graph::Node
=head1 NAME
Node - DESCRIPTION of Object
=head1 SYNOPSIS
=head1 DESCRIPTION
Object oriented graph system which is based on Node and Link objects. There is
no 'graph' object, the graph is constructed out of Nodes and Links, and the
graph is 'walked' from Node to Link to Node. Can be used to represent any graph
structure from DAGs (directed acyclic graph) to Trees to undirected cyclic Graphs.
The system is fully connected so from any object in the graph one can 'walk' to
any other. Links contain pointers to the nodes on either side (called neighbors),
and each Node contains a list of the links it is connected to.
Nodes also keep hashes of their neighbors for fast 'set theory' operations.
This graph system is used as the foundation for the Nested-set
(Compara::NestedSet) system for storing trees in the compara database.
System has a simple API based on creating Nodes and then linking them together:
my $node1 = new Bio::EnsEMBL::Compara::Graph::Node;
my $node2 = new Bio::EnsEMBL::Compara::Graph::Node;
new Bio::EnsEMBL::Compara::Graph::Link($node1, $node2, $distance_between);
And to 'disconnect' nodes, one just breaks a link;
my $link = $node1->link_for_neighbor($node2);
$link->dealloc;
Convenience methods to simplify this process
$node1->create_link_to_node($node2, $distance_between);
$node2->unlink_neighbor($node1);
=head1 CONTACT
Contact Jessica Severin on implemetation/design detail: jessica@ebi.ac.uk
Contact Ewan Birney on EnsEMBL in general: birney@sanger.ac.uk
=head1 APPENDIX
The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _
=cut
package Bio::EnsEMBL::Compara::Graph::Node;
use strict;
use Bio::EnsEMBL::Utils::Exception;
use Bio::EnsEMBL::Utils::Argument;
use Bio::EnsEMBL::Compara::Graph::Link;
use Bio::EnsEMBL::Compara::Graph::CGObject;
use warnings;
our @ISA = qw(Bio::EnsEMBL::Compara::Graph::CGObject);
#################################################
# creation methods
#################################################
#new and alloc method in superclass
sub init {
my $self = shift;
$self->SUPER::init;
return $self;
}
sub dealloc {
my $self = shift;
#$self->unlink_all_neighbors;
return $self->SUPER::dealloc;
}
sub copy {
my $self = shift;
my $mycopy = $self->SUPER::copy;
bless $mycopy, "Bio::EnsEMBL::Compara::Graph::Node";
return $mycopy;
}
sub copy_shallow_links {
my $self = shift;
my $mycopy = $self->copy;
#copies links to all my neighbors but does not recurse beyond
foreach my $link (@{$self->links}) {
$mycopy->create_link_to_node($link->get_neighbor($self),
$link->distance_between);
}
return $mycopy;
}
sub copy_graph {
my $self = shift;
my $incoming_link = shift;
my $mycopy = $self->copy;
#printf("Graph::Node::copy %d", $self->obj_id);
#printf(" from link %s", $incoming_link->obj_id) if($incoming_link);
#print("\n");
foreach my $link (@{$self->links}) {
next if($incoming_link and $link->equals($incoming_link));
my $newnode = $link->get_neighbor($self)->copy_graph($link);
$mycopy->create_link_to_node($newnode, $link->distance_between);
}
return $mycopy;
}
#################################################
#
# get/set variable methods
#
#################################################
=head2 node_id
Arg [1] : (opt.) integer node_id
Example : my $nsetID = $object->node_id();
Example : $object->node_id(12);
Description: Getter/Setter for the node_id of this object in the database
Returntype : integer node_id
Exceptions : none
Caller : general
=cut
sub node_id {
my $self = shift;
$self->{'_node_id'} = shift if(@_);
return $self->obj_id unless(defined($self->{'_node_id'}));
return $self->{'_node_id'};
}
#######################################
# Set manipulation methods
#######################################
=head2 create_link_to_node
Overview : attaches neighbor Graph::Node to this nested set
Arg [1] : Bio::EnsEMBL::Compara::Graph::Node $node
Arg [2] : (opt.) distance to node
Example : $self->add_child($node);
Returntype : Compara::Graph::Link object
Exceptions : if neighbor is undef or not a NestedSet subclass
Caller : general
=cut
sub create_link_to_node {
my $self = shift;
my $node = shift;
my $distance = shift;
throw("neighbor not defined")
unless(defined($node));
throw("arg must be a [Bio::EnsEMBL::Compara::Graph::Node] not a [$node]")
unless($node->isa('Bio::EnsEMBL::Compara::Graph::Node'));
#print("create_link_to_node\n"); $self->print_node; $node->print_node;
my $link = $self->link_for_neighbor($node);
return $link if($link);
#results in calls to _add_neighbor_link_to_hash on each node
$link = new Bio::EnsEMBL::Compara::Graph::Link($self, $node);
if(defined($distance)) {
$link->distance_between($distance);
}
return $link;
}
sub create_directed_link_to_node {
my $self = shift;
my $node = shift;
my $distance = shift;
throw("neighbor not defined")
unless(defined($node));
throw("arg must be a [Bio::EnsEMBL::Compara::Graph::Node] not a [$node]")
unless($node->isa('Bio::EnsEMBL::Compara::Graph::Node'));
#print("create_link_to_node\n"); $self->print_node; $node->print_node;
my $link = $self->link_for_neighbor($node);
return $link if($link);
#results in calls to _add_neighbor_link_to_hash on each node
$link = new Bio::EnsEMBL::Compara::Graph::Link($self, $node);
if(defined($distance)) {
$link->distance_between($distance);
}
$link->{'_link_node2'}->_unlink_node_in_hash($link->{'_link_node1'});
return $link;
}
#
# internal method called by Compara::Graph::Link
sub _add_neighbor_link_to_hash {
my $self = shift;
my $neighbor = shift;
my $link = shift;
$self->{'_obj_id_to_link'} = {} unless($self->{'_obj_id_to_link'});
$self->{'_obj_id_to_link'}->{$neighbor->obj_id} = $link;
}
sub _unlink_node_in_hash {
my $self = shift;
my $neighbor = shift;
delete $self->{'_obj_id_to_link'}->{$neighbor->obj_id};
}
=head2 unlink_neighbor
Overview : unlink and release neighbor from self if its mine
might cause neighbor to delete if refcount reaches Zero.
Arg [1] : $node Bio::EnsEMBL::Compara::Graph::Node instance
Example : $self->unlink_neighbor($node);
Returntype : undef
Caller : general
=cut
sub unlink_neighbor {
my ($self, $node) = @_;
throw("neighbor not defined") unless(defined($node));
throw("arg must be a [Bio::EnsEMBL::Compara::Graph::Node] not a [$node]")
unless($node->isa('Bio::EnsEMBL::Compara::Graph::Node'));
my $link = $self->link_for_neighbor($node);
throw($self->obj_id. " not my neighbor ". $node->obj_id) unless($link);
$link->dealloc;
return undef;
}
sub unlink_all {
my $self = shift;
foreach my $link (@{$self->links}) {
$link->dealloc;
}
return undef;
}
=head2 cascade_unlink
Overview : release all neighbors and clear arrays and hashes
will cause potential deletion of neighbors if refcount reaches Zero.
Example : $self->cascade_unlink
Returntype : $self
Exceptions : none
Caller : general
=cut
sub cascade_unlink {
my $self = shift;
my $caller = shift;
no warnings qw/recursion/;
#printf("cascade_unlink : "); $self->print_node;
# if($self->refcount > $self->link_count) {
# printf("!!!! node is being retained - can't cascade_unlink\n");
# return undef;
# }
my @neighbors;
foreach my $link (@{$self->links}) {
my $neighbor = $link->get_neighbor($self);
next if($caller and $neighbor->equals($caller));
$link->dealloc;
push @neighbors, $neighbor;
}
foreach my $neighbor (@neighbors) {
$neighbor->cascade_unlink($self);
}
return $self;
}
sub minimize_node {
my $self = shift;
return $self unless($self->link_count() == 2);
#printf("Node::minimize_node "); $self->print_node;
my ($link1, $link2) = @{$self->links};
my $dist = $link1->distance_between + $link2->distance_between;
my $node1 = $link1->get_neighbor($self);
my $node2 = $link2->get_neighbor($self);
new Bio::EnsEMBL::Compara::Graph::Link($node1, $node2, $dist);
$link1->dealloc;
$link2->dealloc;
return undef;
}
=head2 links
Overview : returns a list of Compara::Graph::Link connected to this node
Example : my @links = @{self->links()};
Returntype : array reference of Bio::EnsEMBL::Compara::Graph::Link objects (could be empty)
Exceptions : none
Caller : general
=cut
sub links {
my $self = shift;
return [] unless($self->{'_obj_id_to_link'});
my @links = values(%{$self->{'_obj_id_to_link'}});
return \@links;
}
sub link_for_neighbor {
my $self = shift;
my $node = shift;
throw("arg must be a [Bio::EnsEMBL::Compara::Graph::Node] not a [$node]")
unless($node and $node->isa('Bio::EnsEMBL::Compara::Graph::Node'));
return $self->{'_obj_id_to_link'}->{$node->obj_id};
}
sub print_node {
my $self = shift;
printf("Node(%s)%s\n", $self->obj_id, $self->name);
}
sub print_links {
my $self = shift;
foreach my $link (@{$self->links}) {
$link->print_link;
}
}
sub link_count {
my $self = shift;
return scalar(@{$self->links});
}
sub is_leaf {
my $self = shift;
return 1 if($self->link_count <= 1);
return 0;
}
##################################
#
# simple search methods
#
##################################
sub equals {
my $self = shift;
my $other = shift;
#throw("arg must be a [Bio::EnsEMBL::Compara::Graph::Node] not a [$other]")
# unless($other and $other->isa('Bio::EnsEMBL::Compara::Graph::Node'));
return 1 if($self->obj_id eq $other->obj_id); # BEWARE speed up change below
# return 1 if($self->{'_cgobject_id'} eq $other->{'_cgobject_id'});
return 0;
}
sub like {
my $self = shift;
my $other = shift;
throw("arg must be a [Bio::EnsEMBL::Compara::Graph::Node] not a [$other]")
unless($other and $other->isa('Bio::EnsEMBL::Compara::Graph::Node'));
return 1 if($self->obj_id eq $other->obj_id);
return 0 unless($self->link_count == $other->link_count);
foreach my $link (@{$self->links}) {
my $node = $link->get_neighbor($self);
return 0 unless($other->has_neighbor($node));
}
return 1;
}
sub has_neighbor {
my $self = shift;
my $node = shift;
throw "[$node] must be a Bio::EnsEMBL::Compara::Graph::Node object"
unless ($node and $node->isa("Bio::EnsEMBL::Compara::Graph::Node"));
return 1 if(defined($self->{'_obj_id_to_link'}->{$node->obj_id}));
return 0;
}
sub neighbors {
my $self = shift;
my @neighbors;
foreach my $link (@{$self->links}) {
my $neighbor = $link->get_neighbor($self);
push @neighbors, $neighbor;
}
return \@neighbors;
}
sub find_node_by_name {
my $self = shift;
my $name = shift;
unless (defined $name) {
throw("a name needs to be given as argument. The argument is currently undef\n");
}
return $self if($name eq $self->name);
foreach my $neighbor (@{$self->_walk_graph_until(-name => $name)}) {
return $neighbor if($name eq $neighbor->name);
}
return undef;
}
sub find_node_by_node_id {
my $self = shift;
my $node_id = shift;
unless (defined $node_id) {
throw("a node_id needs to be given as argument. The argument is currently undef\n");
}
return $self if($node_id eq $self->node_id);
foreach my $neighbor (@{$self->_walk_graph_until(-node_id => $node_id)}) {
return $neighbor if($node_id eq $neighbor->node_id);
}
return undef;
}
sub all_nodes_in_graph {
my $self = shift;
return $self->_walk_graph_until;
}
sub all_links_in_graph {
my ($self, @args) = @_;
my $cache_links;
if (scalar @args) {
($cache_links) =
rearrange([qw(CACHE_LINKS)], @args);
}
no warnings qw/recursion/;
unless (defined $cache_links) {
$cache_links = {};
}
foreach my $link (@{$self->links}) {
next if ($cache_links->{$link});
$cache_links->{$link} = $link;
my $neighbor = $link->get_neighbor($self);
$neighbor->all_links_in_graph(-cache_links => $cache_links);
}
return [ values %{$cache_links} ];
}
sub _walk_graph_until {
my ($self, @args) = @_;
my $name;
my $node_id;
my $cache_nodes;
if (scalar @args) {
($name, $node_id, $cache_nodes) =
rearrange([qw(NAME NODE_ID CACHE_NODES)], @args);
}
no warnings qw/recursion/;
unless (defined $cache_nodes) {
$cache_nodes = {};
$cache_nodes->{$self} = $self;
}
foreach my $neighbor (@{$self->neighbors}) {
next if ($cache_nodes->{$neighbor});
$cache_nodes->{$neighbor} = $neighbor;
last if (defined $name && $name eq $neighbor->name);
last if (defined $node_id && $node_id eq $neighbor->node_id);
$neighbor->_walk_graph_until(-name => $name, -node_id => $node_id, -cache_nodes => $cache_nodes);
}
return [ values %{$cache_nodes} ];
}
1;