Raw content of Bio::EnsEMBL::Compara::Graph::ConnectedComponentGraphs
#
# You may distribute this module under the same terms as perl itself
#
# POD documentation - main docs before the code
=pod
=head1 NAME
Bio::EnsEMBL::Compara::Graph::ConnectedComponentGraphs
=cut
=head1 SYNOPSIS
my $ccgEngine = new Bio::EnsEMBL::Compara::Graph::ConnectedComponentGraphs;
my $link = $ccgEngine->add_connection($node_id1, $node_id2);
$link->add_tag("my_tag","my_value");
my $holding_node = $ccgEngine->holding_node;
foreach my $link (@{$holding_node->links}) {
my $graph = $link->get_neighbor($holding_node);
}
=cut
=head1 DESCRIPTION
This is a general purpose tool for building connected component graphs
from pairs of scalars. The scalars can be any perl scalar (number, string,
object reference, hash reference, list reference) The scalars are treated as
distinct IDs so that equal scalars point to the same node/component.
As new scalar IDs are encountered new nodes are created and graphs are grown
and merged as the connections are added. It uses the Node data structure.
typical use would be
my $ccgEngine = new Bio::EnsEMBL::Compara::Graph::ConnectedComponentGraphs;
foreach my($node_id1, $node_id2) (@some_list_of_pairs) {
$ccgEngine->add_connection($node_id1, $node_id2);
}
printf("built %d graphs\n", $ccEngine->get_graph_count);
printf("has %d distinct components\n", $ccEngine->get_component_count);
$graph_holding_node = $ccEngine->holding_node;
The holding node has a hard coded name 'ccg_holding_node' that can only be
retrieved by $ccEngine->holding_node.
The holding node has DIRECTED links to graph nodes, each of which is a single entry
point to an independant graph. This entry node (and there is only one for each graph)
have a special name 'connected_to_holding_node', and can be found from any node in a
graph using $current_node->find_node_by_name('connected_to_holding_node');
The DIRECTED link holding node to graph nodes means that it is not bi-directional. You
can only go from holding node to graph nodes, NOT fromgraph nodes to holding node. This
is done to clearly isolate independant subgraph and facilitate the walking in each of them,
without mixing them up.
=cut
=head1 CONTACT
Contact Abel Ureta-Vidal on module implemetation/design detail: abel@ebi.ac.uk
or more generally ensembl-dev@ebi.ac.uk
=cut
=head1 APPENDIX
The rest of the documentation details each of the object methods.
Internal methods are usually preceded with a _
=cut
package Bio::EnsEMBL::Compara::Graph::ConnectedComponentGraphs;
use strict;
use Bio::EnsEMBL::Compara::Graph::Node;
use Time::HiRes qw(time gettimeofday tv_interval);
sub new {
my $class = shift;
my $self = {};
bless $self,$class;
$self->{'holding_node'} = new Bio::EnsEMBL::Compara::Graph::Node;
$self->{'holding_node'}->name("ccg_holding_node");
$self->{'graphs'} = undef;
$self->{'cache_nodes'} = {};
return $self;
}
sub DESTROY {
my $self = shift;
$self->{'holding_node'}->cascade_unlink;
$self->{'holding_node'} = undef;
}
=head2 add_connection
Arg [1] : node1 identifier (some unique number, name or object/data reference)
Arg [2] : node2 identifier
Example : $ccgEngine->add_connection($id1, $id2);
$ccgEngine->add_connection($member1, $member2);
$ccgEngine->add_connection("ENG00000016598", "ENG00000076598");
Description: Takes a pair of unique scalars and uses the Node objects to build graphs in memory.
There is a single graph holding node for the entire build process, and each independant
graph has a single node connected to this "holding" node.
Returntype : Bio::EnsEMBL::Compara::Graph::Link
Exceptions : none
Caller : general
=cut
sub add_connection {
my $self = shift;
my $node1_id = shift;
my $node2_id = shift;
return undef if ($node1_id eq $node2_id);
my ($node1, $node2);
$node1 = $self->{'cache_nodes'}->{$node1_id};
$node2 = $self->{'cache_nodes'}->{$node2_id};
if (defined $node1 && defined $node2) {
if ($node1->has_neighbor($node2)) {
#link exist already return it
return $node1->link_for_neighbor($node2);
} else {
#link does not exit, creates it
my $link = $node1->create_link_to_node($node2);
$self->{'graphs'} = undef;
return $link
}
}
if(!defined($node1)) {
$node1 = new Bio::EnsEMBL::Compara::Graph::Node;
$node1->node_id($node1_id);
$self->{'cache_nodes'}->{$node1_id} = $node1;
if (defined $node2) {
my $link = $node1->create_link_to_node($node2);
$self->{'graphs'} = undef;
return $link;
}
}
if(!defined($node2)) {
$node2 = new Bio::EnsEMBL::Compara::Graph::Node;
$node2->node_id($node2_id);
$self->{'cache_nodes'}->{$node2_id} = $node2;
my $link = $node1->create_link_to_node($node2);
$self->{'graphs'} = undef;
return $link;
}
}
=head2 get_graph_count
Arg [1] : none
Example : $ccgEngine->get_graph_count;
Description: return the number of independant graphs currently in memory
Returntype : integer
Exceptions : none
Caller : general
=cut
sub get_graph_count {
my $self = shift;
return scalar @{$self->holding_node->links};
}
=head2 get_component_count
Arg [1] : none
Example : $ccgEngine->get_component_count;
Description: return the number of nodes involved in the graphs currently in memory
Returntype : integer
Exceptions : none
Caller : general
=cut
sub get_component_count {
my $self = shift;
return scalar(keys(%{$self->{'cache_nodes'}}));
}
=head2 holding_node
Arg [1] : none
Example : $ccgEngine->holding_node;
Description: return the node that hold links to each graph currently in memory
Returntype : Bio::EnsEMBL::Compara::Graph::Node
Exceptions : none
Caller : general
=cut
sub holding_node {
my $self = shift;
$self->graphs;
return $self->{'holding_node'};
}
=head2 graphs
Arg [1] : none
Example : $ccgEngine->graphs;
Description: return the array reference of nodes, each of them is the entry point
to an individual independant graph. IMPORTANT: be aware that this requires
walk through all the nodes to find the independant graphs. If you are still
adding connections and calling this method can get longer time as the graphs
are growing. We suggest to use it when your graphs are stable, so the graphs
walking is only done once, and the result cached.
Returntype : array reference of Bio::EnsEMBL::Compara::Graph::Node
Exceptions : none
Caller : general
=cut
sub graphs {
my $self = shift;
unless (defined $self->{'graphs'}) {
$self->{'holding_node'}->unlink_all;
my @graphs;
my %already_seen_nodes;
foreach my $node (values %{$self->{'cache_nodes'}}) {
next if ($already_seen_nodes{$node});
my $nodes = $node->all_nodes_in_graph;
for my $n (@{$nodes}) {
$already_seen_nodes{$n} = 1;
$n->name("");
}
$node->name("connected_to_holding_node");
$self->{'holding_node'}->create_directed_link_to_node($node);
push @graphs, $node;
}
$self->{'graphs'} = \@graphs;
}
return $self->{'graphs'};
}
1;