Bio::EnsEMBL::Compara::Graph
ConnectedComponents
Toolbar
Summary
Bio::EnsEMBL::Compara::Graph::ConnectedComponents
Package variables
No package variables defined.
Included modules
Time::HiRes qw ( time gettimeofday tv_interval )
Synopsis
my $aa = $sdba->get_AnalysisAdaptor;
my $analysis = $aa->fetch_by_logic_name('PAFCluster');
my $rdb = new Bio::EnsEMBL::Compara::Graph::ConnectedComponents(
-input_id => "{'species_set'=>[1,2,3,14]}",
-analysis => $analysis);
$rdb->fetch_input
$rdb->run;
Description
This is a general purpose tool for building connected component clusters
from pairs of scalars. The scalars can be any perl scalar (number, string,
object reference, hash reference, list reference) The scalars are treated as
distinct IDs so that equal scalars point to the same node/component.
As new scalar IDs are encountered new nodes are created and clusters are grown
and merged as the connections are added. It uses the NestedSet data structure.
typical use would be
my $ccEngine = new Bio::EnsEMBL::Compara::Graph::ConnectedComponents;
foreach my($node_id1, $node_id2) (@some_list_of_pairs) {
$ccEngine->add_connection($node_id1, $node_id2);
}
printf("built %d clusters\n", $ccEngine->get_cluster_count);
printf("has %d distinct components\n", $ccEngine->get_component_count);
$cluster_root = $ccEngine->clusterset;
Methods
DESTROY | No description | Code |
add_connection | Description | Code |
clusterset | No description | Code |
get_cluster_count | No description | Code |
get_component_count | No description | Code |
new | No description | Code |
Methods description
Description: Takes a pair of unique scalars and uses the NestedSet objects to build a 3 layer tree in memory. There is a single root for the entire build process, and each cluster is a child of this root. The <scalars> are children of the clusters. Arg [1] : <scalar> node1 identifier (some unique number, name or object/data reference) Arg [2] : <scalar> node2 identifier Example : $ccEngine->add_connection(1234567, $member); $ccEngine->add_connection(1234567, "ENG00000076598"); Returntype : undef Exceptions : none Caller : general |
Methods code
sub DESTROY
{ my $self = shift;
$self->{'tree_root'}->cascade_unlink;
$self->{'tree_root'} = undef; } |
sub add_connection
{ my $self = shift;
my $node1_id = shift;
my $node2_id = shift;
my ($node1, $node2);
$node1 = $self->{'member_leaves'}->{$node1_id};
$node2 = $self->{'member_leaves'}->{$node2_id};
if(!defined($node1)) {
$node1 = new Bio::EnsEMBL::Compara::NestedSet;
$node1->node_id($node1_id);
$self->{'member_leaves'}->{$node1_id} = $node1;
}
if(!defined($node2)) {
$node2 = new Bio::EnsEMBL::Compara::NestedSet;
$node2->node_id($node2_id);
$self->{'member_leaves'}->{$node2_id} = $node2;
}
my $parent1 = $node1->parent;
my $parent2 = $node2->parent;
if(!defined($parent1) and !defined($parent2)) {
my $cluster = new Bio::EnsEMBL::Compara::NestedSet;
$self->{'tree_root'}->add_child($cluster);
$cluster->add_child($node1);
$cluster->add_child($node2);
}
elsif(defined($parent1) and !defined($parent2)) {
$parent1->add_child($node2);
}
elsif(!defined($parent1) and defined($parent2)) {
$parent2->add_child($node1);
}
elsif(defined($parent1) and defined($parent2)) {
if($parent1->equals($parent2)) {
} else {
$parent1->merge_children($parent2);
$parent2->disavow_parent; }
}
my $link = undef;
return $link; } |
sub clusterset
{ my $self = shift;
return $self->{'tree_root'};
}
1; } |
sub get_cluster_count
{ my $self = shift;
return $self->{'tree_root'}->get_child_count; } |
sub get_component_count
{ my $self = shift;
return scalar(keys(%{$self->{'member_leaves'}})); } |
sub new
{ my $class = shift;
my $self = {};
bless $self,$class;
$self->{'tree_root'} = new Bio::EnsEMBL::Compara::NestedSet;
$self->{'tree_root'}->name("CC_clusterset");
$self->{'member_leaves'} = {};
return $self; } |
General documentation
The rest of the documentation details each of the object methods.
Internal methods are usually preceded with a _