Bio::EnsEMBL::Compara::Graph
Algorithms
Toolbar
Summary
Algorithms - DESCRIPTION of Object
Package variables
No package variables defined.
Included modules
Time::HiRes qw ( time gettimeofday tv_interval )
Inherit
Synopsis
Description
Collection of graph traversal algorithms
Methods
_internal_find_balanced_link | No description | Code |
_internal_get_all_links | No description | Code |
balance_tree | No description | Code |
calc_leaf_count | No description | Code |
calc_link_sum | Description | Code |
calc_node_count | No description | Code |
calc_tree_weight | No description | Code |
chop_tree | No description | Code |
find_balanced_link | No description | Code |
get_all_links | No description | Code |
parent_graph | No description | Code |
root_tree_on_link | No description | Code |
Methods description
Arg [1] : <Bio::EnsEMBL::Compara::Graph::Link> $link Arg [2] : <Bio::EnsEMBL::Compara::Graph::Node> $node Example : $sum = Bio::EnsEMBL::Compara::Graph::Algorithms::calc_link_sum($link, $next_node); Description: returns the sum of all the lengths of the links starting from node Returntype : Bio::EnsEMBL::Compara::Graph::Link Exceptions : none |
Methods code
_internal_find_balanced_link | description | prev | next | Top |
sub _internal_find_balanced_link
{
my $link = shift;
my $visited_links = shift;
my $stats = shift;
my $debug = shift;
return if(defined($visited_links->{$link->obj_id}));
$visited_links->{$link->obj_id} = 1;
my ($node1, $node2) = $link->get_nodes;
my $weight1 = calc_link_sum($link, $node1);
my $weight2 = calc_link_sum($link, $node2);
my $balance = abs($weight1 - $weight2);
if($debug) {
printf("balance (%f - %f = %f) : ", $weight1, $weight2, $balance);
$link->print_link;
}
if(!defined($stats->{'best_balance'}) or ($balance < $stats->{'best_balance'})) {
$stats->{'best_balance'} = $balance;
$stats->{'weight1'} = $weight1;
$stats->{'weight2'} = $weight2;
$stats->{'leaves1'} = calc_leaf_count($link, $node1);
$stats->{'leaves2'} = calc_leaf_count($link, $node2);
$stats->{'best_link'} = $link;
}
my $heavier_node = $node1;
if($weight2 > $weight1) { $heavier_node = $node2;}
foreach my $next_link (@{$heavier_node->links}) {
throw("failure: node has an undefined link") unless(defined($next_link));
next if($link->equals($next_link));
_internal_find_balanced_link($next_link, $visited_links, $stats, $debug);
}
return undef;
}
} |
sub _internal_get_all_links
{
my $link = shift;
my $visited_links = shift;
return if(defined($visited_links->{$link->obj_id}));
$visited_links->{$link->obj_id} = $link;
my ($node1, $node2) = $link->get_nodes;
foreach my $next_link (@{$node1->links}) {
next if($next_link->equals($link));
_internal_get_all_links($next_link, $visited_links);
}
foreach my $next_link (@{$node2->links}) {
next if($next_link->equals($link));
_internal_get_all_links($next_link, $visited_links);
}
return undef;
}
1; } |
sub balance_tree
{
my $tree = shift;
my $starttime = time();
my $last_root = Bio::EnsEMBL::Compara::NestedSet->new;
$last_root->merge_children($tree);
my $best_root = $last_root;
my $best_weight = calc_tree_weight($last_root);
my @all_nodes = $last_root->get_all_subnodes;
foreach my $node (@all_nodes) {
$node->re_root;
$last_root = $node;
my $new_weight = calc_tree_weight($node);
if($new_weight < $best_weight) {
$best_weight = $new_weight;
$best_root = $node;
}
}
printf("%1.3f secs to run balance_tree\n", (time()-$starttime));
$best_root->re_root;
$tree->merge_children($best_root); } |
sub calc_leaf_count
{
my $link = shift;
my $to_node = shift;
throw("node not part of link") unless($link and $to_node and $link->get_neighbor($to_node));
return 1 if($to_node->is_leaf);
my $node_count = 0;
foreach my $next_link (@{$to_node->links}) {
throw("failure: node has an undefined link") unless(defined($next_link));
next if($link->equals($next_link));
my $next_node = $next_link->get_neighbor($to_node);
$node_count += calc_leaf_count($next_link, $next_node);
}
return $node_count;
}
} |
sub calc_link_sum
{
my $link = shift;
my $to_node = shift;
throw("node not part of link") unless($link and $to_node and $link->get_neighbor($to_node));
return 0.0 if($to_node->is_leaf);
my $link_sum = $link->distance_between;
foreach my $next_link (@{$to_node->links}) {
throw("failure: node has an undefined link") unless(defined($next_link));
next if($link->equals($next_link));
my $next_node = $next_link->get_neighbor($to_node);
$link_sum += calc_link_sum($next_link, $next_node);
}
return $link_sum; } |
sub calc_node_count
{
my $link = shift;
my $to_node = shift;
throw("node not part of link") unless($link and $to_node and $link->get_neighbor($to_node));
return 1 if($to_node->is_leaf);
my $node_count = 1;
foreach my $next_link (@{$to_node->links}) {
throw("failure: node has an undefined link") unless(defined($next_link));
next if($link->equals($next_link));
my $next_node = $next_link->get_neighbor($to_node);
$node_count += calc_node_count($next_link, $next_node);
}
return $node_count; } |
sub calc_tree_weight
{
my $tree = shift;
my $weight=0.0;
foreach my $node (@{$tree->get_all_leaves}) {
my $dist = $node->distance_to_root;
$weight += $dist * $dist;
}
return $weight;
}
} |
sub chop_tree
{
my $tree = shift;
printf("chop_tree\n");
my $all_links = get_all_links($tree);
printf("%d links in graph\n", scalar(@$all_links));
my @sortedlinks =
sort { $b->distance_between <=> $a->distance_between
} @{$all_links;};
my $count = 0;
foreach my $link (@sortedlinks) {
$count++;
$link->print_link;
my ($node1, $node2) = $link->get_nodes;
my $weight1 = calc_link_sum($link, $node1);
my $weight2 = calc_link_sum($link, $node2);
my $leafcount1 = calc_leaf_count($link, $node1);
my $leafcount2 = calc_leaf_count($link, $node2);
printf(" link dist balance (%7.5f --- %7.5f)\n", $weight1, $weight2);
printf(" leaf counts (%8d --- %8d)\n", $leafcount1, $leafcount2);
if($leafcount1 >75 and $leafcount2>75) {
printf("%d link is ok to break\n", $count);
last;
}
} } |
sub find_balanced_link
{
my $link = shift;
my $debug = shift;
my $stats = {};
my $visited_links = {};
my $starttime = time();
_internal_find_balanced_link($link, $visited_links, $stats, $debug);
if($debug) {
printf("%1.3f secs to run find_balanced_link\n", (time()-$starttime));
printf("best balanced (%f - %f = %f) : ",
$stats->{'weight1'}, $stats->{'weight2'}, $stats->{'best_balance'});
$stats->{'best_link'}->print_link;
printf(" %d --- %d leaves\n", $stats->{'leaves1'}, $stats->{'leaves2'});
}
return $stats->{'best_link'}; } |
sub get_all_links
{
my $obj = shift;
my $starttime = time();
my $visited_links = {};
return undef unless($obj);
my $link=undef;
if($obj->isa("Bio::EnsEMBL::Compara::Graph::Link")) {
$link = $obj;
}
if($obj->isa("Bio::EnsEMBL::Compara::Graph::Node")) {
($link) = @{$obj->links};
}
return undef unless($link);
_internal_get_all_links($link, $visited_links);
printf("%1.3f secs to run get_all_links\n", (time()-$starttime));
my @links = values(%{$visited_links});
return\@ links; } |
sub parent_graph
{ my $node = shift;
my $parent_link = shift;
return undef unless($node);
unless($node->isa('Bio::EnsEMBL::Compara::NestedSet')) {
bless $node, "Bio::EnsEMBL::Compara::NestedSet";
}
$node->_set_parent_link($parent_link);
foreach my $next_link (@{$node->links}) {
throw("failure: node has an undefined link") unless(defined($next_link));
next if($parent_link and $parent_link->equals($next_link));
my $next_node = $next_link->get_neighbor($node);
parent_graph($next_node, $next_link);
}
}
} |
root_tree_on_link | description | prev | next | Top |
sub root_tree_on_link
{
my $link = shift;
my ($node1, $node2) = $link->get_nodes;
parent_graph($node1);
parent_graph($node2);
my $dist = $link->distance_between;
my $root = new Bio::EnsEMBL::Compara::NestedSet;
$root->add_child($node1, $dist / 2.0); $root->add_child($node2, $dist / 2.0);
parent_graph($root);
return $root; } |
General documentation
The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _