Bio::EnsEMBL::Compara::RunnableDB
RAP
Toolbar
Summary
Bio::EnsEMBL::Compara::RunnableDB::RAP
Package variables
No package variables defined.
Included modules
File::Basename
Getopt::Long
IO::File
Switch
Time::HiRes qw ( time gettimeofday tv_interval )
Inherit
Synopsis
my $db = Bio::EnsEMBL::Compara::DBAdaptor->new($locator);
my $rap = Bio::EnsEMBL::Compara::RunnableDB::RAP->new (
-db => $db,
-input_id => $input_id
-analysis => $analysis );
$rap->fetch_input(); #reads from DB
$rap->run();
$rap->output();
$rap->write_output(); #writes to DB
Description
This Analysis/RunnableDB is designed to take ProteinTree as input
This must already have a tree built on it. It dumps that tree for import
into the RAP program which will do species tree to gene tree reconciliation.
input_id/parameters format eg: "{'protein_tree_id'=>1234}"
protein_tree_id : use 'id' to fetch a cluster from the ProteinTree
Methods
DESTROY | No description | Code |
check_job_fail_options | No description | Code |
dumpTreeToWorkdir | No description | Code |
fetch_input | No description | Code |
get_params | No description | Code |
next_token | No description | Code |
parse_RAP_output | No description | Code |
parse_rap_newick_into_tree | No description | Code |
pre_root_tree | No description | Code |
print_params | No description | Code |
rap_newick_format | No description | Code |
run | No description | Code |
run_rap | No description | Code |
store_duplication_tags | No description | Code |
store_proteintree | No description | Code |
write_output | No description | Code |
Methods description
None available.
Methods code
sub DESTROY
{ my $self = shift;
if($self->{'protein_tree'}) {
printf("RAP::DESTROY releasing tree\n") if($self->debug);
$self->{'protein_tree'}->release_tree;
$self->{'protein_tree'} = undef;
}
$self->SUPER::DESTROY if $self->can("SUPER::DESTROY");
}
} |
sub check_job_fail_options
{
my $self = shift;
printf("RAP failed : ");
$self->input_job->print_job;
$self->dataflow_output_id($self->input_id, 2);
$self->input_job->update_status('FAILED');
if($self->{'protein_tree'}) {
$self->{'protein_tree'}->release_tree;
$self->{'protein_tree'} = undef;
}
}
} |
sub dumpTreeToWorkdir
{
my $self = shift;
my $tree = shift;
my @leaves = @{$tree->get_all_leaves};
my $leafcount = scalar(@leaves);
if($leafcount<3) {
printf(STDERR "tree cluster %d has <3 proteins - can not build a tree\n", $tree->node_id);
return undef;
}
printf("dumpTreeToWorkdir : %d members\n", $leafcount) if($self->debug);
my $treeName = "proteintree_". $tree->node_id;
$self->{'file_root'} = $self->worker_temp_directory. $treeName;
my $rap_infile = $self->{'file_root'} . ".rap_in";
$self->{'rap_infile'} = $rap_infile;
$self->{'rap_outfile'} = $self->{'file_root'} . ".rap_out";
return $rap_infile if(-e $rap_infile);
print("rap_infile = '$rap_infile'\n") if($self->debug);
open(OUTFILE, ">$rap_infile")
or $self->throw("Error opening $rap_infile for write");
printf(OUTFILE "$treeName\n[\n");
foreach my $member (@leaves) {
printf(OUTFILE "%s\"%s\"\n", $member->member_id, $member->genome_db->name);
}
print OUTFILE "]\n";
print OUTFILE $self->rap_newick_format($tree);
print OUTFILE ";\n";
close OUTFILE;
return $rap_infile; } |
sub fetch_input
{ my( $self) = @_;
$self->{'tree_scale'} = 20;
$self->throw("No input_id") unless defined($self->input_id);
$self->{'comparaDBA'} = Bio::EnsEMBL::Compara::DBSQL::DBAdaptor->new(-DBCONN=>$self->db->dbc);
$self->get_params($self->parameters);
$self->get_params($self->input_id);
$self->print_params if($self->debug);
unless($self->{'protein_tree'}) {
throw("undefined ProteinTree as input\n");
}
return 1; } |
sub get_params
{ my $self = shift;
my $param_string = shift;
return unless($param_string);
print("parsing parameter string : ",$param_string,"\n") if($self->debug);
my $params = eval($param_string);
return unless($params);
if($self->debug) {
foreach my $key (keys %$params) {
print(" $key : ", $params->{$key}, "\n");
}
}
if(defined($params->{'protein_tree_id'})) {
$self->{'protein_tree'} =
$self->{'comparaDBA'}->get_ProteinTreeAdaptor->
fetch_node_by_node_id($params->{'protein_tree_id'});
}
if(defined($params->{'species_tree_file'})) {
$self->{'species_tree_file'} = $params->{'species_tree_file'};
}
return; } |
sub next_token
{ my $string = shift;
my $delim = shift;
$$string =~ s/^(\s)+//;
return undef unless(length($$string));
my $index=undef;
my @delims = split(/ */, $delim);
foreach my $dl (@delims) {
my $pos = index($$string, $dl);
if($pos>=0) {
$index = $pos unless(defined($index));
$index = $pos if($pos<$index);
}
}
unless(defined($index)) {
throw("couldn't find delimiter $delim\n");
}
my $token ='';
if($index==0) {
$token = substr($$string,0,1);
$$string = substr($$string, 1);
} else {
$token = substr($$string, 0, $index);
$$string = substr($$string, $index);
}
return $token;
}
} |
sub parse_RAP_output
{
my $self = shift;
my $rap_outfile = $self->{'rap_outfile'};
my $tree = $self->{'protein_tree'};
$tree->flatten_tree;
$tree->print_tree($self->{'tree_scale'}) if($self->debug>2);
foreach my $node (@{$tree->get_all_leaves}) {
$node->add_tag("Duplication", 0);
unless($node->isa('Bio::EnsEMBL::Compara::AlignedMember')) {
$node->disavow_parent;
}
}
$tree->add_tag("Duplication", 0);
print("load from file $rap_outfile\n") if($self->debug);
open (FH, $rap_outfile) or throw("Could not open newick file [$rap_outfile]");
my $chew_rap = 1;
while($chew_rap>0) {
my $line = <FH>;
chomp($line);
printf("rap line %d : %s\n", $chew_rap, $line) if($self->debug>2);
if($line =~ "^]") { $chew_rap=0;}
else { $chew_rap++; };
}
my $newick = <FH>;
chomp($newick);
close(FH);
printf("rap_newick_like_string: '%s'\n", $newick) if($self->debug>1);
my $newtree = $self->parse_rap_newick_into_tree($newick);
$newtree->print_tree($self->{'tree_scale'}) if($self->debug > 1);
foreach my $member (@{$tree->get_all_leaves}) {
my $tmpnode = $newtree->find_node_by_name($member->member_id);
if($tmpnode) {
$tmpnode->add_child($member, 0.0);
$tmpnode->minimize_node; } else {
print("unable to find node in newick for member");
$member->print_member;
}
}
$tree->merge_children($newtree);
$tree->add_tag("Duplication", $newtree->get_tagvalue('Duplication'));
$newtree->release_tree;
$tree->print_tree($self->{'tree_scale'}) if($self->debug);
return undef; } |
sub parse_rap_newick_into_tree
{
my $self = shift;
my $newick = shift;
my $count=1;
my $debug = 1 if($self->debug > 2);
print("$newick\n") if($debug);
my $token = next_token(\$newick, "(;");
my $lastset = undef;
my $node = undef;
my $root = undef;
my $state=1;
my $bracket_level = 0;
while($token) {
if($debug) { printf("state %d : '%s'\n", $state, $token); };
switch ($state) {
case 1 { $node = new Bio::EnsEMBL::Compara::NestedSet;
$node->node_id($count++);
$lastset->add_child($node) if($lastset);
$root=$node unless($root);
if($token eq '#') {
if($debug) { printf(" Duplication node\n"); };
$node->add_tag("Duplication", 1);
$token = next_token(\$newick, "(");
if($debug) { printf("state %d : '%s'\n", $state, $token); };
if($token ne "(") { throw("parse error: expected ( after #\n"); }
}
$node->print_node if($debug);
if($token eq '(') { printf(" create set\n") if($debug);
$token = next_token(\$newick, "\"/(:,)");
$state = 1;
$bracket_level++;
$lastset = $node;
} else {
$state = 2;
}
}
case 2 { if($token eq '/') {
printf("eat the /\n") if($debug);
$token = next_token(\$newick, "\"/(:,)"); }
elsif($token eq '"') { $token = next_token(\$newick, '"');
printf("got quoted name : %s\n", $token) if($debug);
$node->name($token);
$node->add_tag($token, "");
if($debug) { print(" naming leaf"); $node->print_node; }
$token = next_token(\$newick, "\""); unless($token eq '"') {
throw("parse error: expected matching\" ");
}
$token = next_token(\$newick, "/(:,)"); }
elsif(!($token =~ /[:,);]/)) { $node->name($token);
if($debug) { print(" naming leaf"); $node->print_node; }
$token = next_token(\$newick, "/:,);");
}
else { $state = 3; }
}
case 3 { if($token eq ':') {
$token = next_token(\$newick, ",);");
$node->distance_to_parent($token);
if($debug) { print("set distance: $token\n "); $node->print_node; }
$token = next_token(\$newick, ",);"); }
$state = 4;
}
case 4 { if($token eq ')') {
if($debug) { print("end set : "); $lastset->print_node; }
$node = $lastset;
$lastset = $lastset->parent;
$token = next_token(\$newick, "\"/:,);");
$state=2;
$bracket_level--;
} elsif($token eq ',') {
$token = next_token(\$newick, "\"/(:,)");
$state=1;
} elsif($token eq ';') {
throw("parse error: unbalanced ()\n") if($bracket_level ne 0);
$state=13;
$token = next_token(\$newick, "(");
} else {
throw("parse error: expected ; or ) or ,\n");
}
}
case 13 {
throw("parse error: nothing expected after ;");
}
}
}
return $root; } |
sub pre_root_tree
{
my $self = shift;
my $tree = shift;
$tree->print_tree($self->{'tree_scale'}) if($self->debug);
my $node = new Bio::EnsEMBL::Compara::NestedSet;
$node->merge_children($tree);
my ($link) = @{$node->links};
$link = Bio::EnsEMBL::Compara::Graph::Algorithms::find_balanced_link($link, $self->debug);
if($self->debug) { print("balanced link is\n "); $link->print_link; }
my $root = Bio::EnsEMBL::Compara::Graph::Algorithms::root_tree_on_link($link);
$node->minimize_node;
$tree->merge_children($root);
$tree->print_tree($self->{'tree_scale'}) if($self->debug);
return $tree; } |
sub print_params
{ my $self = shift;
print("params:\n");
print(" tree_id : ", $self->{'protein_tree'}->node_id,"\n") if($self->{'protein_tree'});
print(" species_tree_file : ", $self->{'species_tree_file'},"\n") if($self->{'species_tree_file'}); } |
sub rap_newick_format
{ my $self = shift;
my $tree_node = shift;
my $newick = "";
if($tree_node->get_child_count() > 0) {
$newick .= "(";
my $first_child=1;
foreach my $child (@{$tree_node->sorted_children}) {
$newick .= "," unless($first_child);
$newick .= $self->rap_newick_format($child);
$first_child = 0;
}
$newick .= ")";
}
if(!($tree_node->equals($self->{'protein_tree'}))) {
if($tree_node->isa('Bio::EnsEMBL::Compara::AlignedMember')) {
$newick .= sprintf("%s", $tree_node->member_id,);
}
$newick .= sprintf(":%1.4f", $tree_node->distance_to_parent);
}
return $newick;
}
} |
sub run
{
my $self = shift;
$self->run_rap; } |
sub run_rap
{
my $self = shift;
my $starttime = time()*1000;
$self->pre_root_tree($self->{'protein_tree'});
$self->{'rap_infile'} = $self->dumpTreeToWorkdir($self->{'protein_tree'});
return unless($self->{'rap_infile'});
$self->{'newick_file'} = $self->{'rap_infile'} . "_rap_tree.txt ";
my $rap_executable = $self->analysis->program_file;
my $cmd = "java -jar /usr/local/ensembl/bin/rap.jar";
if ($rap_executable) {
$cmd = $rap_executable;
}
$cmd .= " 80"; $cmd .= " 50.0"; $cmd .= " 30"; $cmd .= " 0.15"; $cmd .= " 0.03"; $cmd .= " 0.15"; $cmd .= " ". $self->{'species_tree_file'};
$cmd .= " ". $self->{'rap_infile'};
$cmd .= " ". $self->{'rap_outfile'};
$cmd .= " 2>&1 > /dev/null" unless($self->debug);
$self->{'comparaDBA'}->dbc->disconnect_when_inactive(1);
print("$cmd\n") if($self->debug);
unless(system($cmd) == 0) {
print("$cmd\n");
throw("error running rap, $!\n");
}
$self->{'comparaDBA'}->dbc->disconnect_when_inactive(0);
$self->parse_RAP_output;
my $runtime = time()*1000-$starttime;
$self->{'protein_tree'}->store_tag('RAP_runtime_msec', $runtime);
}
} |
sub store_duplication_tags
{
my $self = shift;
my $node = shift;
if($node->get_tagvalue("Duplication") eq '1') {
if($self->debug) { printf("store duplication : "); $node->print_node; }
$node->store_tag('Duplication', 1);
} else {
$node->store_tag('Duplication', 0);
}
foreach my $child (@{$node->children}) {
$self->store_duplication_tags($child);
}
return undef;
}
1; } |
sub store_proteintree
{
my $self = shift;
return unless($self->{'protein_tree'});
printf("RAP::store_proteintree\n") if($self->debug);
my $treeDBA = $self->{'comparaDBA'}->get_ProteinTreeAdaptor;
$treeDBA->sync_tree_leftright_index($self->{'protein_tree'});
$treeDBA->store($self->{'protein_tree'});
$treeDBA->delete_nodes_not_in_tree($self->{'protein_tree'});
if($self->debug >1) {
print("done storing - now print\n");
$self->{'protein_tree'}->print_tree($self->{'tree_scale'});
}
$self->{'protein_tree'}->store_tag('reconciliation_method', 'RAP');
$self->store_duplication_tags($self->{'protein_tree'});
return undef; } |
sub write_output
{ my $self = shift;
$self->store_proteintree; } |
General documentation
The rest of the documentation details each of the object methods.
Internal methods are usually preceded with a _