Bio::EnsEMBL::Compara::RunnableDB
OrthoTree
Toolbar
Summary
Bio::EnsEMBL::Compara::RunnableDB::OrthoTree
Package variables
No package variables defined.
Included modules
File::Basename
Getopt::Long
IO::File
Scalar::Util qw ( looks_like_number )
Time::HiRes qw ( time gettimeofday tv_interval )
Inherit
Synopsis
my $db = Bio::EnsEMBL::Compara::DBAdaptor->new($locator);
my $otree = Bio::EnsEMBL::Compara::RunnableDB::OrthoTree->new (
-db => $db,
-input_id => $input_id,
-analysis => $analysis );
$otree->fetch_input(); #reads from DB
$otree->run();
$otree->output();
$otree->write_output(); #writes to DB
Description
This Analysis/RunnableDB is designed to take ProteinTree as input
This must already have a rooted tree with duplication/sepeciation tags
on the nodes.
It analyzes that tree structure to pick Orthologues and Paralogs for
each genepair.
input_id/parameters format eg: "{'protein_tree_id'=>1234}"
protein_tree_id : use 'id' to fetch a cluster from the ProteinTree
Methods
DESTROY | No description | Code |
_slurp | No description | Code |
_treefam_genepairlink_stats | No description | Code |
analyze_genepairlink | No description | Code |
ancient_residual_test | No description | Code |
check_homology_consistency | No description | Code |
check_job_fail_options | No description | Code |
delete_old_homologies | No description | Code |
delete_old_homologies_old | No description | Code |
delete_old_orthotree_tags | No description | Code |
direct_ortholog_test | No description | Code |
display_link_analysis | No description | Code |
duplication_confidence_score | No description | Code |
fetch_input | Description | Code |
genepairlink_check_dups | No description | Code |
genepairlink_fetch_homology | No description | Code |
generate_attribute_arguments | No description | Code |
get_ancestor_species_hash | No description | Code |
get_ancestor_taxon_level | No description | Code |
get_matrix_hash | No description | Code |
get_params | No description | Code |
inspecies_paralog_test | No description | Code |
load_species_tree | No description | Code |
load_species_tree_from_file | No description | Code |
load_species_tree_from_tax | No description | Code |
one2many_ortholog_test | No description | Code |
outspecies_test | No description | Code |
print_params | No description | Code |
run | Description | Code |
run_analysis | No description | Code |
store_gene_link_as_homology | No description | Code |
store_homologies | No description | Code |
write_output | Description | Code |
Methods description
Title : fetch_input Usage : $self->fetch_input Function: Fetches input data for repeatmasker from the database Returns : none Args : none |
Title : run Usage : $self->run Function: runs OrthoTree Returns : none Args : none |
Title : write_output Usage : $self->write_output
Function: parse clustalw output and update homology and
homology_member tables
Returns : none
Args : none |
Methods code
sub DESTROY
{ my $self = shift;
if($self->{'protein_tree'}) {
printf("OrthoTree::DESTROY releasing protein_tree\n") if($self->debug);
$self->{'protein_tree'}->release_tree;
$self->{'protein_tree'} = undef;
}
if($self->{'taxon_tree'}) {
printf("OrthoTree::DESTROY releasing taxon_tree\n") if($self->debug);
$self->{'taxon_tree'}->release_tree;
$self->{'taxon_tree'} = undef;
}
$self->SUPER::DESTROY if $self->can("SUPER::DESTROY");
}
} |
sub _slurp
{ my ($self, $file_name) = @_;
my $slurped;
{
local $/ = undef;
open(my $fh, '<', $file_name);
$slurped = <$fh>;
close($fh);
}
return $slurped; } |
sub _treefam_genepairlink_stats
{
my $self = shift;
my $starttime = time()*1000;
my $tmp_time = time();
my $tree = $self->{'protein_tree'};
my @all_protein_leaves = @{$tree->get_all_leaves};
$self->get_ancestor_species_hash($tree);
$tmp_time = time();
printf("build fully linked graph\n") if($self->debug);
my @genepairlinks;
while (my $protein1 = shift @all_protein_leaves) {
foreach my $protein2 (@all_protein_leaves) {
my $ancestor = $protein1->find_first_shared_ancestor($protein2);
my $taxon_level = $self->get_ancestor_taxon_level($ancestor);
my $distance = $protein1->distance_to_ancestor($ancestor) +
$protein2->distance_to_ancestor($ancestor);
my $genepairlink = new Bio::EnsEMBL::Compara::Graph::Link
($protein1, $protein2, $distance);
$genepairlink->add_tag("hops", 0);
$genepairlink->add_tag("ancestor", $ancestor);
$genepairlink->add_tag("taxon_name", $taxon_level->name);
push @genepairlinks, $genepairlink;
}
}
printf("%1.3f secs build links and features\n", time()-$tmp_time)
if($self->debug>1);
$self->{'protein_tree'}->print_tree($self->{'tree_scale'})
if($self->debug);
$tmp_time = time();
printf("sort links\n") if($self->debug);
my @sorted_genepairlinks =
sort {$a->distance_between <=> $b->distance_between} @genepairlinks;
printf("%1.3f secs to sort links\n", time()-$tmp_time) if($self->debug > 1);
printf("analyze links\n") if($self->debug);
$tmp_time = time();
my $count = 0;
foreach my $genepairlink (@sorted_genepairlinks) {
my ($protein1, $protein2) = $genepairlink->get_nodes;
$self->genepairlink_check_dups($genepairlink);
$self->genepairlink_fetch_homology($genepairlink) if($self->debug);
if($self->inspecies_paralog_test($genepairlink)) { }
elsif($self->direct_ortholog_test($genepairlink)) { }
elsif($self->ancient_residual_test($genepairlink)) { }
elsif($self->one2many_ortholog_test($genepairlink)) { }
elsif($self->outspecies_test($genepairlink)) { }
else {
printf
("OOPS!!!! %s - %s\n",
$protein1->gene_member->stable_id,
$protein2->gene_member->stable_id
);
}
my $stid1;
if (defined($protein1->gene_member)) {
$stid1 = $protein1->gene_member->stable_id;
} elsif (defined($protein1->get_tagvalue("G"))) {
$stid1 = $protein1->get_tagvalue("G");
} else {
$stid1 = "unknown";
}
my $stid2;
if (defined($protein2->gene_member)) {
$stid2 = $protein2->gene_member->stable_id;
} elsif (defined($protein2->get_tagvalue("G"))) {
$stid2 = $protein2->get_tagvalue("G");
} else {
$stid2 = "unknown";
}
my @stids = sort ($stid1,$stid2);
my $type = $genepairlink->get_tagvalue('orthotree_type');
my $subtype = $genepairlink->get_tagvalue('orthotree_subtype') || 'NA';
my $tree_type = "GT";
$tree_type = "TF" if $self->{'_treefam'};
my $tree_id = $self->{'protein_tree'}->node_id unless $self->{'_treefam'};
$tree_id = $self->{'_treefam'} if $self->{'_treefam'};
my $dup =
$genepairlink->get_tagvalue('ancestor')->get_tagvalue('Duplication');
my $dup_alg =
$genepairlink->get_tagvalue('ancestor')->get_tagvalue('Duplication_alg') || 0;
$self->{_gpresults} .= "$tree_type,$tree_id,$stids[0]"."_"."$stids[1]".","."$type,$subtype,$dup,$dup_alg\n";
$self->{_homologytable} .= "$stids[0]".","."$stids[1]".","."$type,$subtype\n";
print STDERR "analyze links $count\n" if ($count % 500 == 0);
}
printf("%1.3f secs to analyze genepair links\n", time()-$tmp_time)
if($self->debug > 1);
$self->store_homologies if ($self->{debug}); } |
sub analyze_genepairlink
{
my $self = shift;
my $genepairlink = shift;
my ($protein1, $protein2) = $genepairlink->get_nodes;
$self->genepairlink_check_dups($genepairlink);
$self->genepairlink_fetch_homology($genepairlink) if($self->debug);
if($self->inspecies_paralog_test($genepairlink)) { }
elsif($self->direct_ortholog_test($genepairlink)) { }
elsif($self->ancient_residual_test($genepairlink)) { }
elsif($self->one2many_ortholog_test($genepairlink)) { }
elsif($self->outspecies_test($genepairlink)) { }
else {
printf
(
"OOPS!!!! %s - %s\n",
$protein1->gene_member->stable_id,
$protein2->gene_member->stable_id
);
}
$self->{'old_homology_count'}++
if($genepairlink->get_tagvalue('old_homology'));
my $type = $genepairlink->get_tagvalue('orthotree_type');
if($type) {
if(!defined($self->{'orthotree_homology_counts'}->{$type})) {
$self->{'orthotree_homology_counts'}->{$type} = 1;
} else {
$self->{'orthotree_homology_counts'}->{$type}++;
}
}
$self->display_link_analysis($genepairlink) if($self->debug >1);
return undef; } |
sub ancient_residual_test
{
my $self = shift;
my $genepairlink = shift;
my ($pep1, $pep2) = $genepairlink->get_nodes;
return undef if($pep1->genome_db_id == $pep2->genome_db_id);
my $ancestor = $genepairlink->get_tagvalue('ancestor');
my $species_hash = $self->get_ancestor_species_hash($ancestor);
my $count1 = $species_hash->{$pep1->genome_db_id};
my $count2 = $species_hash->{$pep2->genome_db_id};
return undef if($count1>1);
return undef if($count2>1);
if ($self->{'_treefam'}) {
my $dup_value = $ancestor->get_tagvalue("Duplication");
if ($dup_value eq '') {
$dup_value = 0;
$ancestor->add_tag("Duplication",0) ;
}
}
my $sis_value = $ancestor->get_tagvalue("duplication_confidence_score");
if($ancestor->get_tagvalue("Duplication") > 0 && $sis_value ne '0') {
$genepairlink->add_tag("orthotree_type", 'apparent_ortholog_one2one');
my $taxon = $self->get_ancestor_taxon_level($ancestor);
$genepairlink->add_tag("orthotree_subtype", $taxon->name);
if ('' eq $ancestor->get_tagvalue("duplication_confidence_score")) {
$self->duplication_confidence_score($ancestor);
$genepairlink->{duplication_confidence_score} = $ancestor->get_tagvalue("duplication_confidence_score");
}
} else {
$genepairlink->add_tag("orthotree_type", 'ortholog_one2one');
my $taxon = $self->get_ancestor_taxon_level($ancestor);
$genepairlink->add_tag("orthotree_subtype", $taxon->name);
}
return 1; } |
sub check_homology_consistency
{ my $self = shift;
if ($self->{debug}) {
print "checking homology consistency\n";
foreach my $mlss_member_id ( keys %{$self->{_homology_consistency}} ) {
my $count = scalar(keys %{$self->{_homology_consistency}{$mlss_member_id}});
if ($count > 1) {
my ($mlss, $member_id) = split("_",$mlss_member_id);
print "mlss member_id : $mlss $member_id\n";
}
}
}
foreach my $mlss_member_id ( keys %{$self->{_homology_consistency}} ) {
my $count = scalar(keys %{$self->{_homology_consistency}{$mlss_member_id}});
if ($count > 1) {
my ($mlss, $member_id) = split("_",$mlss_member_id);
$self->throw("Inconsistent homologies in mlss $mlss and member_id $member_id");
}
} } |
sub check_job_fail_options
{
my $self = shift;
if ( $self->{'protein_tree'}->get_tagvalue('gene_count') > $self->{'max_gene_count'} ) {
$self->dataflow_output_id($self->input_id, 2);
$self->input_job->update_status('FAILED');
$self->{'protein_tree'}->release_tree;
$self->{'protein_tree'} = undef;
throw("OrthoTree : cluster size over threshold and FAIL it");
}
if($self->input_job->retry_count >= 2) {
$self->dataflow_output_id($self->input_id, 2);
$self->input_job->update_status('FAILED');
$self->DESTROY;
throw("OrthoTree job failed >=3 times: try something else and FAIL it");
}
if ($self->input_job->retry_count >= 1) {
if ($self->{'protein_tree'}->get_tagvalue('gene_count') > 400 && !defined($self->worker->{HIGHMEM})) {
$self->input_job->adaptor->reset_highmem_job_by_dbID($self->input_job->dbID);
$self->DESTROY;
throw("OrthoTree job too big: try something else and FAIL it");
}
} } |
sub delete_old_homologies
{
my $self = shift;
return undef unless ($self->input_job->retry_count > 0);
print "deleting old homologies\n" if ($self->debug);
my $delete_time = time();
my $tree_node_id = $self->{protein_tree}->node_id;
my $sql1 = "delete h.*, hm.* from homology h, homology_member hm where h.homology_id=hm.homology_id and h.tree_node_id=$tree_node_id";
my $sth1 = $self->dbc->prepare($sql1);
$sth1->execute;
$sth1->finish;
printf("%1.3f secs build links and features\n", time()-$delete_time);
$self->{old_homologies_deleted} = 1;
return undef; } |
sub delete_old_homologies_old
{
my $self = shift;
my $genepairlink = shift;
return undef unless ($self->input_job->retry_count > 0);
my ($member1, $member2) = $genepairlink->get_nodes;
my @homologies = @{$self->{'comparaDBA'}->get_HomologyAdaptor->fetch_by_Member_Member_method_link_type
($member1->gene_member, $member2->gene_member, 'ENSEMBL_ORTHOLOGUES')};
push @homologies, @{$self->{'comparaDBA'}->get_HomologyAdaptor->fetch_by_Member_Member_method_link_type
($member1->gene_member, $member2->gene_member, 'ENSEMBL_PARALOGUES')};
my $sql1 = "DELETE FROM homology WHERE homology_id=?";
my $sth1 = $self->dbc->prepare($sql1);
my $sql2 = "DELETE FROM homology_member WHERE homology_id=?";
my $sth2 = $self->dbc->prepare($sql2);
foreach my $homology (@homologies) {
$sth1->execute($homology->dbID);
$sth2->execute($homology->dbID);
}
$sth1->finish;
$sth2->finish;
return undef; } |
sub delete_old_orthotree_tags
{
my $self = shift;
return undef unless ($self->input_job->retry_count > 0);
print "deleting old orthotree tags\n" if ($self->debug);
my @node_ids;
foreach my $node ($self->{'protein_tree'}->get_all_subnodes) {
push @node_ids, $node->node_id;
}
my @list_ids;
foreach my $id (@node_ids) {
push @list_ids, $id;
if (scalar @list_ids == 2000) {
my $sql = "delete from protein_tree_tag where node_id in (".join(",",@list_ids).") and tag in ('duplication_confidence_score','taxon_id','taxon_name','OrthoTree_runtime_msec','OrthoTree_types_hashstr')";
my $sth = $self->dbc->prepare($sql);
$sth->execute;
$sth->finish;
@list_ids = ();
}
}
if (scalar @list_ids) {
my $sql = "delete from protein_tree_tag where node_id in (".join(",",@list_ids).") and tag in ('duplication_confidence_score','taxon_id','taxon_name','OrthoTree_runtime_msec','OrthoTree_types_hashstr')";
my $sth = $self->dbc->prepare($sql);
$sth->execute;
$sth->finish;
@list_ids = ();
}
return undef; } |
sub direct_ortholog_test
{
my $self = shift;
my $genepairlink = shift;
return undef if($genepairlink->get_tagvalue('has_dups'));
my ($pep1, $pep2) = $genepairlink->get_nodes;
return undef if($pep1->genome_db_id == $pep2->genome_db_id);
my $ancestor = $genepairlink->get_tagvalue('ancestor');
my $species_hash = $self->get_ancestor_species_hash($ancestor);
my $count1 = $species_hash->{$pep1->genome_db_id};
my $count2 = $species_hash->{$pep2->genome_db_id};
return undef if($count1>1);
return undef if($count2>1);
$genepairlink->add_tag("orthotree_type", 'ortholog_one2one');
my $taxon = $self->get_ancestor_taxon_level($ancestor);
$genepairlink->add_tag("orthotree_subtype", $taxon->name);
return 1; } |
sub display_link_analysis
{
my $self = shift;
my $genepairlink = shift;
my ($protein1, $protein2) = $genepairlink->get_nodes;
my $ancestor = $genepairlink->get_tagvalue('ancestor');
printf("%21s(%7d) - %21s(%7d) : %10.3f dist : %3d hops : ",
$protein1->gene_member->stable_id, $protein1->gene_member->member_id,
$protein2->gene_member->stable_id, $protein2->gene_member->member_id,
$genepairlink->distance_between, $genepairlink->get_tagvalue('hops'));
if($genepairlink->get_tagvalue('has_dups')) { printf("%5s ", 'DUP');
} else { printf("%5s ", ""); }
my $homology = $genepairlink->get_tagvalue('old_homology');
if($homology) { printf("%5s ", $homology->description);
} else { printf("%5s ", ""); }
print("ancestor:(");
my $dup_value = $ancestor->get_tagvalue("Duplication");
my $sis_value = $ancestor->get_tagvalue("duplication_confidence_score");
if($dup_value eq '1' || $dup_value eq '2'){
if ($sis_value eq '0') {
print("DD ");
} else {
print("DUP ");
}
}
else{print" ";}
printf("%9s)", $ancestor->node_id);
my $taxon_level = $ancestor->get_tagvalue('taxon_level');
printf(" %s %s %s\n",
$genepairlink->get_tagvalue('orthotree_type'),
$genepairlink->get_tagvalue('orthotree_subtype'),
$taxon_level->name
);
return undef; } |
sub duplication_confidence_score
{ my $self = shift;
my $ancestor = shift;
my ($child_a, $child_b, $dummy) = @{$ancestor->children};
throw("tree is multifurcated in duplication_confidence_score\n") if (defined($dummy));
my @child_a_gdbs = keys %{$self->get_ancestor_species_hash($child_a)};
my @child_b_gdbs = keys %{$self->get_ancestor_species_hash($child_b)};
my %seen = (); my @gdb_a = grep { ! $seen{$_} ++ } @child_a_gdbs;
%seen = (); my @gdb_b = grep { ! $seen{$_} ++ } @child_b_gdbs;
my @isect = my @diff = my @union = (); my %count;
foreach my $e (@gdb_a, @gdb_b) { $count{$e}++ }
foreach my $e (keys %count) {
push(@union, $e); push @{ $count{$e} == 2 ?\@ isect :\@ diff }, $e;
}
my $duplication_confidence_score = 0;
my $scalar_isect = scalar(@isect);
my $scalar_union = scalar(@union);
$duplication_confidence_score =
(($scalar_isect)/$scalar_union) unless (0 == $scalar_isect);
$ancestor->store_tag
(
"duplication_confidence_score",
$duplication_confidence_score
) unless ($self->{'_readonly'});
my $rounded_duplication_confidence_score = (int((100.0 * $scalar_isect / $scalar_union + 0.5))); my $species_intersection_score = $ancestor->get_tagvalue("species_intersection_score");
unless (defined($species_intersection_score) && $species_intersection_score ne '') {
my $ancestor_node_id = $ancestor->node_id;
warn("Difference in the ProteinTree: duplication_confidence_score [$duplication_confidence_score] whereas species_intersection_score [$species_intersection_score] is undefined in njtree - ancestor $ancestor_node_id\n");
return;
}
if ($species_intersection_score ne $rounded_duplication_confidence_score && !defined($self->{_readonly})) {
my $ancestor_node_id = $ancestor->node_id;
$self->throw("Inconsistency in the ProteinTree: duplication_confidence_score [$duplication_confidence_score] != species_intersection_score [$species_intersection_score] - $ancestor_node_id\n");
} } |
sub fetch_input
{ my( $self) = @_;
$self->{'tree_scale'} = 1;
$self->{'store_homologies'} = 1;
$self->{'max_gene_count'} = 200;
$self->{all_between} = 0;
$self->{no_between} = 0.25;
$self->throw("No input_id") unless defined($self->input_id);
$self->{'comparaDBA'} = Bio::EnsEMBL::Compara::DBSQL::DBAdaptor->new
(
-DBCONN=>$self->db->dbc
);
$self->get_params($self->parameters);
$self->get_params($self->input_id);
if($self->{analysis_data_id}) {
my $analysis_data_id = $self->{analysis_data_id};
my $analysis_data_params = $self->db->get_AnalysisDataAdaptor->fetch_by_dbID($analysis_data_id);
$self->get_params($analysis_data_params);
}
$self->print_params if($self->debug);
my $starttime = time();
$self->{'treeDBA'} = $self->{'comparaDBA'}->get_ProteinTreeAdaptor;
$self->{homologyDBA} = $self->{'comparaDBA'}->get_HomologyAdaptor;
$self->{'protein_tree'} = $self->{'treeDBA'}->
fetch_tree_at_node_id($self->{'protein_tree_id'});
$self->check_job_fail_options;
if($self->debug) {
$self->{'protein_tree'}->print_tree($self->{'tree_scale'});
printf("time to fetch tree : %1.3f secs\n" , time()-$starttime);
}
unless($self->{'protein_tree'}) {
throw("undefined ProteinTree as input\n");
}
$self->delete_old_homologies;
$self->delete_old_orthotree_tags;
$self->load_species_tree;
return 1; } |
sub genepairlink_check_dups
{
my $self = shift;
my $genepairlink = shift;
my ($pep1, $pep2) = $genepairlink->get_nodes;
my $ancestor = $genepairlink->get_tagvalue('ancestor');
my $has_dup=0;
my %nodes_between;
my $tnode = $pep1;
do {
$tnode = $tnode->parent;
my $dup_value = $tnode->get_tagvalue("Duplication");
my $sis_value = $tnode->get_tagvalue("duplication_confidence_score");
$dup_value = 0 unless (defined($dup_value) && $dup_value ne '');
$sis_value = 0 unless (defined($sis_value) && $sis_value ne '');
unless ($sis_value eq '0') {
if($dup_value > 0) {
$has_dup = 1;
}
}
$nodes_between{$tnode->node_id} = $tnode;
} while(!($tnode->equals($ancestor)));
$tnode = $pep2;
do {
$tnode = $tnode->parent;
my $dup_value = $tnode->get_tagvalue("Duplication");
my $sis_value = $tnode->get_tagvalue("duplication_confidence_score");
$dup_value = 0 unless (defined($dup_value) && $dup_value ne '');
$sis_value = 0 unless (defined($sis_value) && $sis_value ne '');
$genepairlink->{duplication_confidence_score} = $sis_value;
unless ($sis_value eq '0') {
if($dup_value > 0) {
$has_dup = 1;
}
}
$nodes_between{$tnode->node_id} = $tnode;
} while(!($tnode->equals($ancestor)));
$genepairlink->add_tag("hops", scalar(keys(%nodes_between)));
$genepairlink->add_tag("has_dups", $has_dup);
return undef;
}
} |
sub genepairlink_fetch_homology
{
my $self = shift;
my $genepairlink = shift;
my ($member1, $member2) = $genepairlink->get_nodes;
my $sql = "select homology.homology_id from homology ".
"join homology_member hm1 using(homology_id) ".
"join homology_member hm2 using (homology_id) " .
"where hm1.peptide_member_id=? and hm2.peptide_member_id=?";
my $sth = $self->dbc->prepare($sql);
$sth->execute($member1->member_id, $member2->member_id);
my $ref = $sth->fetchrow_arrayref();
return undef unless($ref);
$sth->finish;
my ($homology_id) = @$ref;
return undef unless($homology_id);
my $homology =
$self->{'comparaDBA'}->get_HomologyAdaptor->fetch_by_dbID($homology_id);
$genepairlink->add_tag("old_homology", $homology);
return $homology; } |
sub generate_attribute_arguments
{ my ($self, $protein1, $protein2, $type) = @_;
my $new_aln1_cigarline = "";
my $new_aln2_cigarline = "";
my $perc_id1 = 0;
my $perc_pos1 = 0;
my $perc_id2 = 0;
my $perc_pos2 = 0;
if ($type eq 'between_species_paralog') {
return ($new_aln1_cigarline, $perc_id1, $perc_pos1, $new_aln2_cigarline, $perc_id2, $perc_pos2);
}
my $identical_matches = 0;
my $positive_matches = 0;
my $m_hash = $self->get_matrix_hash;
my ($aln1state, $aln2state);
my ($aln1count, $aln2count);
my $alignment_string = $protein1->alignment_string;
my @aln1 = unpack("A1" x length($alignment_string), $alignment_string);
$alignment_string = $protein2->alignment_string;
my @aln2 = unpack("A1" x length($alignment_string), $alignment_string);
for (my $i=0; $i <= $#aln1; $i++) {
next if ($aln1[$i] eq "-" && $aln2[$i] eq "-");
my ($cur_aln1state, $cur_aln2state) = qw(M M);
if ($aln1[$i] eq "-") {
$cur_aln1state = "D";
}
if ($aln2[$i] eq "-") {
$cur_aln2state = "D";
}
if ($cur_aln1state eq "M" && $cur_aln2state eq "M" && $aln1[$i] eq $aln2[$i]) {
$identical_matches++;
$positive_matches++;
} elsif ($cur_aln1state eq "M" && $cur_aln2state eq "M" && $m_hash->{uc $aln1[$i]}{uc $aln2[$i]} > 0) {
$positive_matches++;
}
unless (defined $aln1state) {
$aln1count = 1;
$aln2count = 1;
$aln1state = $cur_aln1state;
$aln2state = $cur_aln2state;
next;
}
if ($cur_aln1state eq $aln1state) {
$aln1count++;
} else {
if ($aln1count == 1) {
$new_aln1_cigarline .= $aln1state;
} else {
$new_aln1_cigarline .= $aln1count.$aln1state;
}
$aln1count = 1;
$aln1state = $cur_aln1state;
}
if ($cur_aln2state eq $aln2state) {
$aln2count++;
} else {
if ($aln2count == 1) {
$new_aln2_cigarline .= $aln2state;
} else {
$new_aln2_cigarline .= $aln2count.$aln2state;
}
$aln2count = 1;
$aln2state = $cur_aln2state;
}
}
if ($aln1count == 1) {
$new_aln1_cigarline .= $aln1state;
} else {
$new_aln1_cigarline .= $aln1count.$aln1state;
}
if ($aln2count == 1) {
$new_aln2_cigarline .= $aln2state;
} else {
$new_aln2_cigarline .= $aln2count.$aln2state;
}
my $seq_length1 = $protein1->seq_length;
unless (0 == $seq_length1) {
$perc_id1 = $identical_matches*100.0/$seq_length1; $perc_pos1 = $positive_matches*100.0/$seq_length1; }
my $seq_length2 = $protein2->seq_length;
unless (0 == $seq_length2) {
$perc_id2 = $identical_matches*100.0/$seq_length2; $perc_pos2 = $positive_matches*100.0/$seq_length2; }
return ($new_aln1_cigarline, $perc_id1, $perc_pos1, $new_aln2_cigarline, $perc_id2, $perc_pos2); } |
sub get_ancestor_species_hash
{
my $self = shift;
my $node = shift;
my $species_hash = $node->get_tagvalue('species_hash');
return $species_hash if($species_hash);
$species_hash = {};
my $duplication_hash = {};
my $is_dup=0;
if($node->isa('Bio::EnsEMBL::Compara::AlignedMember')) {
my $node_genome_db_id = $node->genome_db_id;
$species_hash->{$node_genome_db_id} = 1;
$node->add_tag('species_hash', $species_hash);
return $species_hash;
}
foreach my $child (@{$node->children}) {
my $t_species_hash = $self->get_ancestor_species_hash($child);
next unless(defined($t_species_hash)); foreach my $genome_db_id (keys(%$t_species_hash)) {
unless(defined($species_hash->{$genome_db_id})) {
$species_hash->{$genome_db_id} = $t_species_hash->{$genome_db_id};
} else {
$is_dup=1;
$duplication_hash->{$genome_db_id} = 1;
$species_hash->{$genome_db_id} += $t_species_hash->{$genome_db_id};
}
}
}
$node->add_tag("species_hash", $species_hash);
if($is_dup && !($self->{'_treefam'})) {
my $original_duplication_value = $node->get_tagvalue("Duplication");
$original_duplication_value = 0
unless (defined $original_duplication_value && $original_duplication_value ne '');
if ($original_duplication_value == 0) {
$node->add_tag("duplication_hash", $duplication_hash);
$node->store_tag("Duplication", 1) unless ($self->{'_readonly'});
$node->store_tag("Duplication_alg", 'species_count')
unless ($self->{'_readonly'});
} elsif ($original_duplication_value == 1) {
my $dup_alg = $node->get_tagvalue("Duplication_alg");
if (defined $dup_alg and $dup_alg ne 'species_count') {
$node->add_tag("duplication_hash", $duplication_hash);
$node->store_tag("Duplication", 2) unless ($self->{'_readonly'});
$node->store_tag("Duplication_alg", 'species_count')
unless ($self->{'_readonly'});
}
}
}
return $species_hash; } |
sub get_ancestor_taxon_level
{
my $self = shift;
my $ancestor = shift;
my $taxon_level = $ancestor->get_tagvalue('taxon_level');
return $taxon_level if($taxon_level);
my $taxon_tree = $self->{'taxon_tree'};
my $species_hash = $self->get_ancestor_species_hash($ancestor);
foreach my $gdbID (keys(%$species_hash)) {
my $gdb =
$self->{'comparaDBA'}->get_GenomeDBAdaptor->fetch_by_dbID($gdbID);
my $taxon = $taxon_tree->find_node_by_node_id($gdb->taxon_id);
unless($taxon) {
throw("oops missing taxon " . $gdb->taxon_id ."\n");
}
if($taxon_level) {
$taxon_level = $taxon_level->find_first_shared_ancestor($taxon);
} else {
$taxon_level = $taxon;
}
}
my $taxon_id = $taxon_level->get_tagvalue("taxon_id");
$ancestor->add_tag("taxon_level", $taxon_level);
$ancestor->store_tag("taxon_id", $taxon_id)
unless ($self->{'_readonly'});
$ancestor->store_tag("taxon_name", $taxon_level->name)
unless ($self->{'_readonly'});
return $taxon_level; } |
sub get_matrix_hash
{ my $self = shift;
return $self->{'matrix_hash'} if (defined $self->{'matrix_hash'});
my $BLOSUM62 = "# Matrix made by matblas from blosum62.iij
# * column uses minimum score
# BLOSUM Clustered Scoring Matrix in 1/2 Bit Units
# Blocks Database = /data/blocks_5.0/blocks.dat
# Cluster Percentage: >= 62
# Entropy = 0.6979, Expected = -0.5209
A R N D C Q E G H I L K M F P S T W Y V B Z X *
A 4 -1 -2 -2 0 -1 -1 0 -2 -1 -1 -1 -1 -2 -1 1 0 -3 -2 0 -2 -1 0 -4
R -1 5 0 -2 -3 1 0 -2 0 -3 -2 2 -1 -3 -2 -1 -1 -3 -2 -3 -1 0 -1 -4
N -2 0 6 1 -3 0 0 0 1 -3 -3 0 -2 -3 -2 1 0 -4 -2 -3 3 0 -1 -4
D -2 -2 1 6 -3 0 2 -1 -1 -3 -4 -1 -3 -3 -1 0 -1 -4 -3 -3 4 1 -1 -4
C 0 -3 -3 -3 9 -3 -4 -3 -3 -1 -1 -3 -1 -2 -3 -1 -1 -2 -2 -1 -3 -3 -2 -4
Q -1 1 0 0 -3 5 2 -2 0 -3 -2 1 0 -3 -1 0 -1 -2 -1 -2 0 3 -1 -4
E -1 0 0 2 -4 2 5 -2 0 -3 -3 1 -2 -3 -1 0 -1 -3 -2 -2 1 4 -1 -4
G 0 -2 0 -1 -3 -2 -2 6 -2 -4 -4 -2 -3 -3 -2 0 -2 -2 -3 -3 -1 -2 -1 -4
H -2 0 1 -1 -3 0 0 -2 8 -3 -3 -1 -2 -1 -2 -1 -2 -2 2 -3 0 0 -1 -4
I -1 -3 -3 -3 -1 -3 -3 -4 -3 4 2 -3 1 0 -3 -2 -1 -3 -1 3 -3 -3 -1 -4
L -1 -2 -3 -4 -1 -2 -3 -4 -3 2 4 -2 2 0 -3 -2 -1 -2 -1 1 -4 -3 -1 -4
K -1 2 0 -1 -3 1 1 -2 -1 -3 -2 5 -1 -3 -1 0 -1 -3 -2 -2 0 1 -1 -4
M -1 -1 -2 -3 -1 0 -2 -3 -2 1 2 -1 5 0 -2 -1 -1 -1 -1 1 -3 -1 -1 -4
F -2 -3 -3 -3 -2 -3 -3 -3 -1 0 0 -3 0 6 -4 -2 -2 1 3 -1 -3 -3 -1 -4
P -1 -2 -2 -1 -3 -1 -1 -2 -2 -3 -3 -1 -2 -4 7 -1 -1 -4 -3 -2 -2 -1 -2 -4
S 1 -1 1 0 -1 0 0 0 -1 -2 -2 0 -1 -2 -1 4 1 -3 -2 -2 0 0 0 -4
T 0 -1 0 -1 -1 -1 -1 -2 -2 -1 -1 -1 -1 -2 -1 1 5 -2 -2 0 -1 -1 0 -4
W -3 -3 -4 -4 -2 -2 -3 -2 -2 -3 -2 -3 -1 1 -4 -3 -2 11 2 -3 -4 -3 -2 -4
Y -2 -2 -2 -3 -2 -1 -2 -3 2 -1 -1 -2 -1 3 -3 -2 -2 2 7 -1 -3 -2 -1 -4
V 0 -3 -3 -3 -1 -2 -2 -3 -3 3 1 -2 1 -1 -2 -2 0 -3 -1 4 -3 -2 -1 -4
B -2 -1 3 4 -3 0 1 -1 0 -3 -4 0 -3 -3 -2 0 -1 -4 -3 -3 4 1 -1 -4
Z -1 0 0 1 -3 3 4 -2 0 -3 -3 1 -1 -3 -1 0 -1 -3 -2 -2 1 4 -1 -4
X 0 -1 -1 -1 -2 -1 -1 -1 -1 -1 -1 -1 -1 -1 -2 0 0 -2 -1 -1 -1 -1 -1 -4
* -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 -4 1
";
my $matrix_string;
my @lines = split(/\n/,$BLOSUM62);
foreach my $line (@lines) {
next if ($line =~ /^\#/);
if ($line =~ /^[A-Z\*\s]+$/) {
$matrix_string .= sprintf "$line\n";
} else {
my @t = split(/\s+/,$line);
shift @t;
$matrix_string .= sprintf(join(" ",@t)."\n");
}
}
my %matrix_hash;
@lines = ();
@lines = split /\n/, $matrix_string;
my $lts = shift @lines;
$lts =~ s/^\s+//;
$lts =~ s/\s+$//;
my @letters = split /\s+/, $lts;
foreach my $letter (@letters) {
my $line = shift @lines;
$line =~ s/^\s+//;
$line =~ s/\s+$//;
my @penalties = split /\s+/, $line;
die "Size of letters array and penalties array are different\n"
unless (scalar @letters == scalar @penalties);
for (my $i=0; $i < scalar @letters; $i++) {
$matrix_hash{uc $letter}{uc $letters[$i]} = $penalties[$i];
}
}
$self->{'matrix_hash'} =\% matrix_hash;
return $self->{'matrix_hash'};
}
1; } |
sub get_params
{ my $self = shift;
my $param_string = shift;
return unless($param_string);
print("parsing parameter string : ",$param_string,"\n") if($self->debug);
my $params = eval($param_string);
return unless($params);
if($self->debug) {
foreach my $key (keys %$params) {
print(" $key : ", $params->{$key}, "\n");
}
}
foreach my $key (qw[cdna species_tree_file protein_tree_id use_genomedb_id max_gene_count analysis_data_id]) {
my $value = $params->{$key};
$self->{$key} = $value if defined $value;
}
return; } |
sub inspecies_paralog_test
{
my $self = shift;
my $genepairlink = shift;
my ($pep1, $pep2) = $genepairlink->get_nodes;
return undef unless($pep1->genome_db_id == $pep2->genome_db_id);
my $ancestor = $genepairlink->get_tagvalue('ancestor');
my $taxon = $self->get_ancestor_taxon_level($ancestor);
$genepairlink->add_tag("orthotree_type", 'within_species_paralog');
$genepairlink->add_tag("orthotree_subtype", $taxon->name);
if ('' eq $ancestor->get_tagvalue("duplication_confidence_score")) {
$self->duplication_confidence_score($ancestor);
$genepairlink->{duplication_confidence_score} = $ancestor->get_tagvalue("duplication_confidence_score");
}
return 1; } |
sub load_species_tree
{ my $self = shift @_;
my $starttime = time();
my $tree = (exists $self->{'species_tree_file'})
? $self->load_species_tree_from_file
: $self->load_species_tree_from_tax
;
$self->{'taxon_tree'} = $tree;
if($self->debug) {
$tree->print_tree(1);
printf("%1.3f secs for load species tree\n", time()-$starttime);
} } |
sub load_species_tree_from_file
{ my $self = shift @_;
my $taxonDBA = $self->{'comparaDBA'}->get_NCBITaxonAdaptor;
print "load_species_tree_from_file\n" if $self->debug;
my $newick = $self->_slurp($self->{species_tree_file});
my $tree = Bio::EnsEMBL::Compara::Graph::NewickParser::parse_newick_into_tree($newick);
foreach my $node (@{$tree->all_nodes_in_graph}) {
my ($id) = split('-',$node->name);
$id =~ s/\*//; if (looks_like_number($id)) {
$node->node_id($id);
my $ncbi_node = $taxonDBA->fetch_node_by_taxon_id($id);
$node->name($ncbi_node->name) if (defined $ncbi_node);
} else { $node->name($id);
}
$node->add_tag('taxon_id', $id);
}
return $tree; } |
sub load_species_tree_from_tax
{
my $self = shift;
printf("load_species_tree_from_tax\n") if($self->debug);
my $starttime = time();
my $taxonDBA = $self->{'comparaDBA'}->get_NCBITaxonAdaptor;
my $gdb_list = $self->{'comparaDBA'}->get_GenomeDBAdaptor->fetch_all;
my $root=undef;
foreach my $gdb (@$gdb_list) {
next if ($gdb->name =~ /Ancestral/);
my $taxon = $taxonDBA->fetch_node_by_taxon_id($gdb->taxon_id);
$taxon->no_autoload_children;
$taxon->{_tags}{taxon_id} = $taxon->taxon_id; $root = $taxon->root unless($root);
$root->merge_node_via_shared_ancestor($taxon);
}
$root = $root->minimize_tree;
return $root; } |
sub one2many_ortholog_test
{
my $self = shift;
my $genepairlink = shift;
my ($pep1, $pep2) = $genepairlink->get_nodes;
return undef if($pep1->genome_db_id == $pep2->genome_db_id);
my $ancestor = $genepairlink->get_tagvalue('ancestor');
my $species_hash = $self->get_ancestor_species_hash($ancestor);
my $count1 = $species_hash->{$pep1->genome_db_id};
my $count2 = $species_hash->{$pep2->genome_db_id};
return undef unless
(
($count1==1 and $count2>1) or ($count1>1 and $count2==1)
);
my $dup_value = $ancestor->get_tagvalue("Duplication");
my $sis_value = $ancestor->get_tagvalue("duplication_confidence_score");
if($dup_value > 0 && $sis_value ne '0') {
return undef;
}
$genepairlink->add_tag("orthotree_type", 'ortholog_one2many');
my $taxon = $self->get_ancestor_taxon_level($ancestor);
$genepairlink->add_tag("orthotree_subtype", $taxon->name);
return 1; } |
sub outspecies_test
{
my $self = shift;
my $genepairlink = shift;
my ($pep1, $pep2) = $genepairlink->get_nodes;
return undef if($pep1->genome_db_id == $pep2->genome_db_id);
my $ancestor = $genepairlink->get_tagvalue('ancestor');
my $taxon = $self->get_ancestor_taxon_level($ancestor);
my $dup_value = $ancestor->get_tagvalue("Duplication");
my $sis_value = $ancestor->get_tagvalue("duplication_confidence_score");
unless ($dup_value eq '') {
if($dup_value > 0 && $sis_value ne '0') {
$genepairlink->add_tag("orthotree_type", 'between_species_paralog');
$genepairlink->add_tag("orthotree_subtype", $taxon->name);
if ('' eq $ancestor->get_tagvalue("duplication_confidence_score")) {
$self->duplication_confidence_score($ancestor);
$genepairlink->{duplication_confidence_score} = $ancestor->get_tagvalue("duplication_confidence_score");
}
} else {
$genepairlink->add_tag("orthotree_type", 'ortholog_many2many');
$genepairlink->add_tag("orthotree_subtype", $taxon->name);
}
}
return 1;
}
} |
sub print_params
{ my $self = shift;
print("params:\n");
printf(" tree_id : %d\n", $self->{'protein_tree_id'}); } |
sub run
{
my $self = shift;
$self->run_analysis; } |
sub run_analysis
{ my $self = shift;
my $starttime = time()*1000;
my $tmp_time = time();
my $tree = $self->{'protein_tree'};
print "Getting all leaves\n";
my @all_protein_leaves = @{$tree->get_all_leaves};
my $tree_node_id = $tree->node_id;
printf("Calculating ancestor species hash\n") if ($self->debug);
$self->get_ancestor_species_hash($tree);
if($self->debug) {
$self->{'protein_tree'}->print_tree($self->{'tree_scale'});
printf("%d proteins in tree\n", scalar(@all_protein_leaves));
}
$tmp_time = time();
printf("build fully linked graph\n") if($self->debug);
my @genepairlinks;
my $graphcount = 0;
while (my $protein1 = shift @all_protein_leaves) {
foreach my $protein2 (@all_protein_leaves) {
my $ancestor = $protein1->find_first_shared_ancestor($protein2);
my $taxon_level = $self->get_ancestor_taxon_level($ancestor);
my $distance = $protein1->distance_to_ancestor($ancestor) +
$protein2->distance_to_ancestor($ancestor);
my $genepairlink = new Bio::EnsEMBL::Compara::Graph::Link
(
$protein1, $protein2, $distance
);
$genepairlink->add_tag("hops", 0);
$genepairlink->add_tag("ancestor", $ancestor);
$genepairlink->add_tag("taxon_name", $taxon_level->name);
$genepairlink->add_tag("tree_node_id", $tree_node_id);
push @genepairlinks, $genepairlink;
}
print STDERR "build graph $graphcount\n" if ($graphcount++ % 10 == 0);
}
printf("%1.3f secs build links and features\n", time()-$tmp_time)
if($self->debug>1);
$self->{'protein_tree'}->print_tree($self->{'tree_scale'})
if($self->debug);
$tmp_time = time();
printf("sort links\n") if($self->debug);
my @sorted_genepairlinks =
sort {$a->distance_between <=> $b->distance_between} @genepairlinks;
printf("%1.3f secs to sort links\n", time()-$tmp_time) if($self->debug > 1);
printf("analyze links\n") if($self->debug);
printf("%d links\n", scalar(@genepairlinks)) if ($self->debug);
$tmp_time = time();
$self->{'old_homology_count'} = 0;
$self->{'orthotree_homology_counts'} = {};
foreach my $genepairlink (@sorted_genepairlinks) {
$self->analyze_genepairlink($genepairlink);
}
printf("%1.3f secs to analyze genepair links\n", time()-$tmp_time)
if($self->debug > 1);
my $runtime = time()*1000-$starttime;
$self->{'protein_tree'}->store_tag('OrthoTree_runtime_msec', $runtime)
unless ($self->{'_readonly'});
if($self->debug) {
printf("%d proteins in tree\n", scalar(@{$tree->get_all_leaves}));
printf("%d pairings\n", scalar(@genepairlinks));
printf("%d old homologies\n", $self->{'old_homology_count'});
printf("orthotree homologies\n");
foreach my $type (keys(%{$self->{'orthotree_homology_counts'}})) {
printf
(
" %13s : %d\n", $type, $self->{'orthotree_homology_counts'}->{$type}
);
}
}
$self->{'homology_links'} =\@ sorted_genepairlinks;
$DB::single=1;1;
return undef; } |
sub store_gene_link_as_homology
{
my $self = shift;
my $genepairlink = shift;
my $type = $genepairlink->get_tagvalue('orthotree_type');
return unless($type);
my $subtype = $genepairlink->get_tagvalue('taxon_name');
my $ancestor = $genepairlink->get_tagvalue('ancestor');
my $tree_node_id = $genepairlink->get_tagvalue('tree_node_id');
warn("Tag tree_node_id undefined\n") unless(defined($tree_node_id) && $tree_node_id ne '');
my ($protein1, $protein2) = $genepairlink->get_nodes;
my $mlss = new Bio::EnsEMBL::Compara::MethodLinkSpeciesSet;
$mlss->method_link_type("ENSEMBL_ORTHOLOGUES")
unless ($type eq 'between_species_paralog' || $type eq 'within_species_paralog');
$mlss->method_link_type("ENSEMBL_PARALOGUES") if ($type eq 'between_species_paralog' || $type eq 'within_species_paralog');
if ($protein1->genome_db->dbID == $protein2->genome_db->dbID) {
$mlss->species_set([$protein1->genome_db]);
} else {
$mlss->species_set([$protein1->genome_db, $protein2->genome_db]);
}
$self->{'comparaDBA'}->get_MethodLinkSpeciesSetAdaptor->store($mlss) unless ($self->{'_readonly'});
my $homology = new Bio::EnsEMBL::Compara::Homology;
$homology->description($type);
$homology->subtype($subtype);
$homology->ancestor_node_id($ancestor->node_id);
$homology->tree_node_id($tree_node_id);
$homology->method_link_type($mlss->method_link_type);
$homology->method_link_species_set($mlss);
my $key = $mlss->dbID . "_" . $protein1->dbID;
$self->{_homology_consistency}{$key}{$type} = 1;
my ($cigar_line1, $perc_id1, $perc_pos1,
$cigar_line2, $perc_id2, $perc_pos2) =
$self->generate_attribute_arguments($protein1, $protein2,$type);
my $attribute;
$attribute = new Bio::EnsEMBL::Compara::Attribute;
$attribute->peptide_member_id($protein1->dbID);
$attribute->cigar_line($cigar_line1);
$attribute->perc_cov(100);
$attribute->perc_id(int($perc_id1));
$attribute->perc_pos(int($perc_pos1));
$homology->add_Member_Attribute([$protein1->gene_member, $attribute]);
$attribute = new Bio::EnsEMBL::Compara::Attribute;
$attribute->peptide_member_id($protein2->dbID);
$attribute->cigar_line($cigar_line2);
$attribute->perc_cov(100);
$attribute->perc_id(int($perc_id2));
$attribute->perc_pos(int($perc_pos2));
$homology->add_Member_Attribute([$protein2->gene_member, $attribute]);
my $matching_homology = 0;
if ($self->input_job->retry_count > 0 && !defined($self->{old_homologies_deleted})) {
my $member_id1 = $protein1->gene_member->member_id;
my $member_id2 = $protein2->gene_member->member_id;
if ($member_id1 == $member_id2) {
my $tree_id = $self->{protein_tree}->node_id;
my $pmember_id1 = $protein1->member_id; my $pstable_id1 = $protein1->stable_id;
my $pmember_id2 = $protein2->member_id; my $pstable_id2 = $protein2->stable_id;
$self->throw("$member_id1 ($pmember_id1 - $pstable_id1) and $member_id2 ($pmember_id2 - $pstable_id2) shouldn't be the same");
}
my $stored_homology = @{$self->{homologyDBA}->fetch_by_Member_id_Member_id($member_id1,$member_id2)}[0];
if (defined($stored_homology)) {
$matching_homology = 1;
$matching_homology = 0 if ($stored_homology->description ne $homology->description);
$matching_homology = 0 if ($stored_homology->subtype ne $homology->subtype);
$matching_homology = 0 if ($stored_homology->ancestor_node_id ne $homology->ancestor_node_id);
$matching_homology = 0 if ($stored_homology->tree_node_id ne $homology->tree_node_id);
$matching_homology = 0 if ($stored_homology->method_link_type ne $homology->method_link_type);
$matching_homology = 0 if ($stored_homology->method_link_species_set->dbID ne $homology->method_link_species_set->dbID);
}
if (defined($stored_homology) && (0 == $matching_homology)) {
my $homology_id = $stored_homology->dbID;
my $sql1 = "delete from homology where homology_id=$homology_id";
my $sql2 = "delete from homology_member where homology_id=$homology_id";
my $sth1 = $self->dbc->prepare($sql1);
my $sth2 = $self->dbc->prepare($sql2);
$sth1->execute;
$sth2->execute;
$sth1->finish;
$sth2->finish;
}
}
if($self->{'store_homologies'} && 0 == $matching_homology) {
$self->{'homologyDBA'}->store($homology);
}
my $stable_id;
if($protein1->taxon_id < $protein2->taxon_id) {
$stable_id = $protein1->taxon_id . "_" . $protein2->taxon_id . "_";
} else {
$stable_id = $protein2->taxon_id . "_" . $protein1->taxon_id . "_";
}
$stable_id .= sprintf ("%011.0d",$homology->dbID) if($homology->dbID);
$homology->stable_id($stable_id);
return undef; } |
sub store_homologies
{
my $self = shift;
my $a=1;
my $hlinkscount = 0;
foreach my $genepairlink (@{$self->{'homology_links'}}) {
$self->display_link_analysis($genepairlink) if($self->debug>2);
my $type = $genepairlink->get_tagvalue("orthotree_type");
my $dcs = $genepairlink->{duplication_confidence_score};
$DB::single=$a;1;
next if ($type eq 'between_species_paralog' && $dcs > $self->{no_between});
$self->store_gene_link_as_homology($genepairlink);
print STDERR "homology links $hlinkscount\n" if ($hlinkscount++ % 500 == 0);
}
my $counts_str = $self->encode_hash($self->{'orthotree_homology_counts'});
printf("$counts_str\n");
$self->check_homology_consistency;
$self->{'protein_tree'}->store_tag(
'OrthoTree_types_hashstr',
$self->encode_hash($self->{'orthotree_homology_counts'})) unless ($self->{'_readonly'});
$DB::single=1;1;
return undef; } |
sub write_output
{ my $self = shift;
$self->store_homologies; } |
General documentation
The rest of the documentation details each of the object methods.
Internal methods are usually preceded with a _