Raw content of Bio::EnsEMBL::Compara::Graph::ConnectedComponents
#
# You may distribute this module under the same terms as perl itself
#
# POD documentation - main docs before the code
=pod
=head1 NAME
Bio::EnsEMBL::Compara::Graph::ConnectedComponents
=cut
=head1 SYNOPSIS
my $aa = $sdba->get_AnalysisAdaptor;
my $analysis = $aa->fetch_by_logic_name('PAFCluster');
my $rdb = new Bio::EnsEMBL::Compara::Graph::ConnectedComponents(
-input_id => "{'species_set'=>[1,2,3,14]}",
-analysis => $analysis);
$rdb->fetch_input
$rdb->run;
=cut
=head1 DESCRIPTION
This is a general purpose tool for building connected component clusters
from pairs of scalars. The scalars can be any perl scalar (number, string,
object reference, hash reference, list reference) The scalars are treated as
distinct IDs so that equal scalars point to the same node/component.
As new scalar IDs are encountered new nodes are created and clusters are grown
and merged as the connections are added. It uses the NestedSet data structure.
typical use would be
my $ccEngine = new Bio::EnsEMBL::Compara::Graph::ConnectedComponents;
foreach my($node_id1, $node_id2) (@some_list_of_pairs) {
$ccEngine->add_connection($node_id1, $node_id2);
}
printf("built %d clusters\n", $ccEngine->get_cluster_count);
printf("has %d distinct components\n", $ccEngine->get_component_count);
$cluster_root = $ccEngine->clusterset;
=cut
=head1 CONTACT
Contact Jessica Severin on module implemetation/design detail: jessica@ebi.ac.uk
Contact Ewan Birney on EnsEMBL in general: birney@sanger.ac.uk
=cut
=head1 APPENDIX
The rest of the documentation details each of the object methods.
Internal methods are usually preceded with a _
=cut
package Bio::EnsEMBL::Compara::Graph::ConnectedComponents;
use strict;
use Bio::EnsEMBL::Compara::NestedSet;
use Time::HiRes qw(time gettimeofday tv_interval);
#new method inherited from CGObject which calls init
sub new {
my $class = shift;
my $self = {};
bless $self,$class;
$self->{'tree_root'} = new Bio::EnsEMBL::Compara::NestedSet;
$self->{'tree_root'}->name("CC_clusterset");
$self->{'member_leaves'} = {};
return $self;
}
sub DESTROY {
my $self = shift;
$self->{'tree_root'}->cascade_unlink;
$self->{'tree_root'} = undef;
}
=head2 add_connection
Description: Takes a pair of unique scalars and uses the NestedSet objects
to build a 3 layer tree in memory. There is a single root for the entire build
process, and each cluster is a child of this root. The are children of
the clusters.
Arg [1] : node1 identifier (some unique number, name or object/data reference)
Arg [2] : node2 identifier
Example : $ccEngine->add_connection(1234567, $member);
$ccEngine->add_connection(1234567, "ENG00000076598");
Returntype : undef
Exceptions : none
Caller : general
=cut
sub add_connection {
my $self = shift;
my $node1_id = shift;
my $node2_id = shift;
my ($node1, $node2);
$node1 = $self->{'member_leaves'}->{$node1_id};
$node2 = $self->{'member_leaves'}->{$node2_id};
if(!defined($node1)) {
$node1 = new Bio::EnsEMBL::Compara::NestedSet;
$node1->node_id($node1_id);
$self->{'member_leaves'}->{$node1_id} = $node1;
}
if(!defined($node2)) {
$node2 = new Bio::EnsEMBL::Compara::NestedSet;
$node2->node_id($node2_id);
$self->{'member_leaves'}->{$node2_id} = $node2;
}
my $parent1 = $node1->parent;
my $parent2 = $node2->parent;
if(!defined($parent1) and !defined($parent2)) {
#neither member is in a cluster so create new cluster with just these 2 members
# printf("create new cluster\n");
my $cluster = new Bio::EnsEMBL::Compara::NestedSet;
$self->{'tree_root'}->add_child($cluster);
$cluster->add_child($node1);
$cluster->add_child($node2);
}
elsif(defined($parent1) and !defined($parent2)) {
# printf("add member to cluster %d\n", $parent1->node_id);
# $node2->print_member;
$parent1->add_child($node2);
}
elsif(!defined($parent1) and defined($parent2)) {
# printf("add member to cluster %d\n", $parent2->node_id);
# $node1->print_member;
$parent2->add_child($node1);
}
elsif(defined($parent1) and defined($parent2)) {
if($parent1->equals($parent2)) {
# printf("both members already in same cluster %d\n", $parent1->node_id);
} else {
#these member already belong to a different clusters -> need to merge clusters
# print("MERGE clusters\n");
$parent1->merge_children($parent2);
$parent2->disavow_parent; #releases from root
}
}
my $link = undef;
#$link = $node1->create_link_to_node($node2); #builds a cyclic connected graph of the connections
return $link;
}
sub get_cluster_count {
my $self = shift;
return $self->{'tree_root'}->get_child_count;
}
sub get_component_count {
my $self = shift;
return scalar(keys(%{$self->{'member_leaves'}}));
}
sub clusterset {
my $self = shift;
return $self->{'tree_root'};
}
1;