Raw content of Bio::EnsEMBL::Compara::DBSQL::GenomicAlignTreeAdaptor
=head1 NAME
GenomicAlignTreeAdaptor - Object used to store and retrieve GenomicAlignTrees to/from the databases
=head1 SYNOPSIS
=head1 DESCRIPTION
This version of the module is still very experimental.
=head1 CONTACT
=head1 APPENDIX
The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _
=cut
package Bio::EnsEMBL::Compara::DBSQL::GenomicAlignTreeAdaptor;
use strict;
use Bio::EnsEMBL::Compara::GenomicAlignTree;
use Bio::EnsEMBL::Compara::GenomicAlignGroup;
use Bio::EnsEMBL::Compara::GenomicAlign;
use Bio::EnsEMBL::Utils::Exception qw(throw warning);
use Bio::EnsEMBL::Compara::DBSQL::NestedSetAdaptor;
use Bio::EnsEMBL::Compara::DBSQL::GenomicAlignAdaptor;
our @ISA = qw(Bio::EnsEMBL::Compara::DBSQL::NestedSetAdaptor);
###########################
# FETCH methods
###########################
=head2 fetch_all_by_MethodLinkSpeciesSet
Arg 1 : Bio::EnsEMBL::Compara::MethodLinkSpeciesSet $method_link_species_set
Arg 2 : integer $limit_number [optional]
Arg 3 : integer $limit_index_start [optional]
Example : my $genomic_align_trees =
$genomic_align_tree_adaptor->
fetch_all_by_MethodLinkSpeciesSet($mlss);
Description: Retrieve the corresponding
Bio::EnsEMBL::Compara::GenomicAlignTree objects. Objects
Returntype : ref. to an array of Bio::EnsEMBL::Compara::GenomicAlignTree objects.
Exceptions : Returns ref. to an empty array if no matching
Bio::EnsEMBL::Compara::GenomicAlignTree object can be retrieved
Caller : none
Status : At risk
=cut
sub fetch_all_by_MethodLinkSpeciesSet {
my ($self, $method_link_species_set, $limit_number, $limit_index_start) = @_;
my $genomic_align_trees = [];
throw("[$method_link_species_set] is not a Bio::EnsEMBL::Compara::MethodLinkSpeciesSet object")
unless ($method_link_species_set and ref $method_link_species_set and
$method_link_species_set->isa("Bio::EnsEMBL::Compara::MethodLinkSpeciesSet"));
my $method_link_species_set_id = $method_link_species_set->dbID;
throw("[$method_link_species_set_id] has no dbID") if (!$method_link_species_set_id);
my $constraint = "WHERE ga.method_link_species_set_id = $method_link_species_set_id AND gat.parent_id = 0";
my $final_clause = "";
if ($limit_number) {
$limit_index_start = 0 if (!$limit_index_start);
$final_clause = "LIMIT $limit_index_start, $limit_number";
}
$genomic_align_trees = $self->_generic_fetch($constraint, undef, $final_clause);
return $genomic_align_trees;
}
=head2 fetch_all_by_MethodLinkSpeciesSet_DnaFrag
Arg 1 : Bio::EnsEMBL::Compara::MethodLinkSpeciesSet $method_link_species_set
Arg 2 : Bio::EnsEMBL::Compara::DnaFrag $dnafrag
Arg 3 : integer $start [optional, default = 1]
Arg 4 : integer $end [optional, default = dnafrag_length]
Arg 5 : integer $limit_number [optional, default = no limit]
Arg 6 : integer $limit_index_start [optional, default = 0]
Arg 7 : boolean $restrict_resulting_blocks [optional, default = no restriction]
Example : my $genomic_align_trees =
$genomic_align_tree_adaptor->fetch_all_by_MethodLinkSpeciesSet_DnaFrag(
$mlss, $dnafrag, 50000000, 50250000);
Description: Retrieve the corresponding
Bio::EnsEMBL::Compara::GenomicAlignTree objects.
Returntype : ref. to an array of Bio::EnsEMBL::Compara::GenomicAlignTree objects. Only dbID,
adaptor and method_link_species_set are actually stored in the objects. The remaining
attributes are only retrieved when requiered.
Exceptions : Returns ref. to an empty array if no matching
Bio::EnsEMBL::Compara::GenomicAlignTree object can be retrieved
Caller : none
Status : At risk
=cut
sub fetch_all_by_MethodLinkSpeciesSet_DnaFrag {
my ($self, $method_link_species_set, $dnafrag, $start, $end, $limit_number, $limit_index_start, $restrict) = @_;
## Get internal IDs from the objects
my $method_link_species_set_id = $method_link_species_set->dbID;
my $dnafrag_id = $dnafrag->dbID;
###########################################################################
## FIRST STEP:
## The query looks for GenomicAlign entries in the genomic region of interest
## and links to the GenomicAlignGroup and GenomicAlignTree entries. We extract
## the list of node IDs for the root of the GenomicAlignTrees
###########################################################################
my $constraint = "WHERE ga.method_link_species_set_id = $method_link_species_set_id
AND ga.dnafrag_id = $dnafrag_id";
if (defined($start) and defined($end)) {
my $max_alignment_length = $method_link_species_set->max_alignment_length;
my $lower_bound = $start - $max_alignment_length;
$constraint .= qq{
AND ga.dnafrag_start <= $end
AND ga.dnafrag_start >= $lower_bound
AND ga.dnafrag_end >= $start
};
}
my $sql = $self->_construct_sql_query($constraint);
my $sth = $self->prepare($sql);
$sth->execute();
my $ref_to_root_hash = {};
while(my $rowhash = $sth->fetchrow_hashref) {
my $root_node_id = $rowhash->{root_id};
my $reference_genomic_align_id = $rowhash->{genomic_align_id};
$ref_to_root_hash->{$reference_genomic_align_id} = $root_node_id;
print "REF $reference_genomic_align_id} = $root_node_id\n";
}
$sth->finish();
return [] if (!%$ref_to_root_hash);
###########################################################################
## SECOND STEP:
## Get all the nodes for the root IDs we got in step 1
###########################################################################
my $genomic_align_trees = [];
while (my ($reference_genomic_align_id, $root_node_id) = each %$ref_to_root_hash) {
$constraint = "WHERE gat.root_id = $root_node_id";
my $genomic_align_nodes = $self->_generic_fetch($constraint);
my $root = $self->_build_tree_from_nodes($genomic_align_nodes);
my $all_leaves = $root->get_all_leaves;
for (my $i = 0; $i < @$all_leaves; $i++) {
my $this_leaf = $all_leaves->[$i];
my $all_genomic_aligns = $this_leaf->get_all_GenomicAligns;
foreach my $this_genomic_align (@$all_genomic_aligns) {
if ($this_genomic_align->dbID == $reference_genomic_align_id) {
$root->reference_genomic_align($this_genomic_align);
$root->reference_genomic_align_node($this_leaf);
if (@$all_genomic_aligns > 1) {
## Reference hits a composite GenomicAlign. We have to restrict the tree
my $cigar_line = $this_genomic_align->cigar_line;
my ($start, $end) = (1, $this_genomic_align->length);
if ($cigar_line =~ /^(\d*)X/) {
$start += ($1 eq "")?1:$1;
}
if ($cigar_line =~ /(\d*)X$/) {
$end -= ($1 eq "")?1:$1;
}
$root = $root->restrict_between_alignment_positions($start, $end, "skip");
}
$i += @$all_leaves; # exit external loop as well
last;
}
}
}
push(@$genomic_align_trees, $root);
}
return $genomic_align_trees;
}
=head2 fetch_all_by_MethodLinkSpeciesSet_Slice
Arg 1 : Bio::EnsEMBL::Compara::MethodLinkSpeciesSet $method_link_species_set
Arg 2 : Bio::EnsEMBL::Slice $original_slice
Arg 3 : integer $limit_number [optional]
Arg 4 : integer $limit_index_start [optional]
Arg 5 : boolean $restrict_resulting_blocks [optional]
Example : my $genomic_align_trees =
$genomic_align_tree_adaptor->fetch_all_by_MethodLinkSpeciesSet_Slice(
$method_link_species_set, $original_slice);
Description: Retrieve the corresponding
Bio::EnsEMBL::Compara::GenomicAlignTree objects. The alignments may be
reverse-complemented in order to match the strand of the original slice.
Returntype : ref. to an array of Bio::EnsEMBL::Compara::GenomicAlignTree objects. Only dbID,
adaptor and method_link_species_set are actually stored in the objects. The remaining
attributes are only retrieved when required.
Exceptions : Returns ref. to an empty array if no matching
Bio::EnsEMBL::Compara::GenomicAlignTree object can be retrieved
Caller : $object->method_name
Status : At risk
=cut
sub fetch_all_by_MethodLinkSpeciesSet_Slice {
my ($self, $method_link_species_set, $reference_slice, $limit_number, $limit_index_start, $restrict) = @_;
my $all_genomic_align_trees = []; # Returned value
###########################################################################
## The strategy here is very much the same as in the corresponging method
## of the Bio::EnsEMBL::Compara::DBSQL::GenomicAlignBlockAdaptor
###########################################################################
my $genome_db = $self->db->get_GenomeDBAdaptor->fetch_by_Slice($reference_slice);
my $dnafrag_adaptor = $self->db->get_DnaFragAdaptor($reference_slice);
my $projection_segments = $reference_slice->project('toplevel');
return [] if(!@$projection_segments);
foreach my $this_projection_segment (@$projection_segments) {
my $this_slice = $this_projection_segment->to_Slice;
my $coord_system_name = $this_slice->coord_system->name;
my $this_dnafrag = $dnafrag_adaptor->fetch_by_GenomeDB_and_name(
$genome_db, $this_slice->seq_region_name
);
next if (!$this_dnafrag);
my $these_genomic_align_trees = $self->fetch_all_by_MethodLinkSpeciesSet_DnaFrag(
$method_link_species_set,
$this_dnafrag,
$this_slice->start,
$this_slice->end,
$limit_number,
$limit_index_start,
$restrict
);
my $top_slice = $this_slice->seq_region_Slice;
throw if ($top_slice->name ne $this_slice->seq_region_Slice->name);
print join("\n", $top_slice->name, $this_slice->seq_region_Slice->name), "\n";
# need to convert features to requested coord system
# if it was different then the one we used for fetching
if($top_slice->name ne $reference_slice->name) {
foreach my $this_genomic_align_tree (@$these_genomic_align_trees) {
my $feature = new Bio::EnsEMBL::Feature(
-slice => $top_slice,
-start => $this_genomic_align_tree->reference_genomic_align->dnafrag_start,
-end => $this_genomic_align_tree->reference_genomic_align->dnafrag_end,
-strand => $this_genomic_align_tree->reference_genomic_align->dnafrag_strand
);
$feature = $feature->transfer($reference_slice);
next if (!$feature);
$this_genomic_align_tree->reference_slice($reference_slice);
$this_genomic_align_tree->reference_slice_start($feature->start);
$this_genomic_align_tree->reference_slice_end($feature->end);
$this_genomic_align_tree->reference_slice_strand($reference_slice->strand);
$this_genomic_align_tree->reverse_complement()
if ($reference_slice->strand != $this_genomic_align_tree->reference_genomic_align->dnafrag_strand);
push (@$all_genomic_align_trees, $this_genomic_align_tree);
}
} else {
# foreach my $this_genomic_align_block (@$these_genomic_align_blocks) {
# $this_genomic_align_block->reference_slice($top_slice);
# $this_genomic_align_block->reference_slice_start(
# $this_genomic_align_block->reference_genomic_align->dnafrag_start);
# $this_genomic_align_block->reference_slice_end(
# $this_genomic_align_block->reference_genomic_align->dnafrag_end);
# $this_genomic_align_block->reference_slice_strand($reference_slice->strand);
# $this_genomic_align_block->reverse_complement()
# if ($reference_slice->strand != $this_genomic_align_block->reference_genomic_align->dnafrag_strand);
# push (@$all_genomic_align_blocks, $this_genomic_align_block);
# }
}
}
return $all_genomic_align_trees;
}
=head2 fetch_by_GenomicAlignBlock
Arg 1 : Bio::EnsEMBL::Compara::GenomicAlignBlock $genomic_align_block
Example : my $genomic_align_tree =
$genomic_align_tree_adaptor->fetch_by_GenomicAlignBlock($gab_id);
Description: Retrieve the corresponding
Bio::EnsEMBL::Compara::GenomicAlignTree object.
Returntype : Bio::EnsEMBL::Compara::GenomicAlignTree object.
Exceptions : Returns ref. to an empty array if no matching
Bio::EnsEMBL::Compara::GenomicAlignTree object can be retrieved
Caller : $object->method_name
Status : At risk
=cut
sub fetch_by_GenomicAlignBlock {
my ($self, $genomic_align_block) = @_;
my $genomic_align_block_id = $genomic_align_block->dbID;
# my $join = [
# [["genomic_align_tree","gat2"], "gat2.root_id = gat.node_id", undef],
# [["genomic_align_group","gag2"], "gag2.group_id = gat2.node_id", undef],
# [["genomic_align","ga2"], "ga2.genomic_align_id = gag2.genomic_align_id", undef],
# ];
# my $constraint = "WHERE ga2.genomic_align_block_id = $genomic_align_block_id";
# my $genomic_align_trees = $self->_generic_fetch($constraint, $join);
my $sql = "SELECT root_id FROM genomic_align
LEFT JOIN genomic_align_group USING (genomic_align_id)
LEFT JOIN genomic_align_tree ON (group_id = node_id)
WHERE genomic_align_block_id = $genomic_align_block_id";
my $sth = $self->prepare($sql);
$sth->execute;
my ($root_id) = $sth->fetchrow_array();
$sth->finish();
#print "root_id $root_id\n";
#whole tree
$sql = "SELECT " . join(",", @{$self->columns}) .
" FROM genomic_align_tree gat". " LEFT JOIN genomic_align_group gag ON (gat.node_id = gag.group_id) LEFT JOIN genomic_align ga ON (gag.genomic_align_id = ga.genomic_align_id) WHERE gat.root_id = $root_id";
#root only
#$sql = "SELECT " . join(",", @{$self->columns}) .
# " FROM genomic_align_tree gat LEFT JOIN genomic_align_group gag ON (gat.node_id = gag.group_id) LEFT JOIN genomic_align ga ON (gag.genomic_align_id = ga.genomic_align_id) WHERE gat.node_id = $root_id";
$sth = $self->prepare($sql);
$sth->execute;
my $genomic_align_trees = $self->_objs_from_sth($sth);
$sth->finish;
my $root = $self->_build_tree_from_nodes($genomic_align_trees);
$genomic_align_trees = [$root];
#my $constraint = "WHERE gat.node_id = $root_id";
# my $genomic_align_trees = $self->_generic_fetch($constraint);
if (@$genomic_align_trees > 1) {
warning("Found more than 1 tree. This shouldn't happen. Returning the first one only");
}
if (@$genomic_align_trees == 0) {
return;
}
my $genomic_align_tree = $genomic_align_trees->[0];
if ($genomic_align_block->reference_genomic_align) {
my $ref_genomic_align = $genomic_align_block->reference_genomic_align;
LEAF: foreach my $this_leaf (@{$genomic_align_tree->get_all_leaves}) {
foreach my $this_genomic_align (@{$this_leaf->get_all_GenomicAligns}) {
if ($this_genomic_align->genome_db->name eq $ref_genomic_align->genome_db->name and
$this_genomic_align->dnafrag->name eq $ref_genomic_align->dnafrag->name and
$this_genomic_align->dnafrag_start eq $ref_genomic_align->dnafrag_start and
$this_genomic_align->dnafrag_end eq $ref_genomic_align->dnafrag_end) {
$genomic_align_tree->reference_genomic_align_node($this_leaf);
$genomic_align_tree->reference_genomic_align($this_genomic_align);
last LEAF;
}
}
}
}
if ($genomic_align_block->reference_slice) {
$genomic_align_tree->reference_slice($genomic_align_block->reference_slice);
$genomic_align_tree->reference_slice_start($genomic_align_block->reference_slice_start);
$genomic_align_tree->reference_slice_end($genomic_align_block->reference_slice_end);
$genomic_align_tree->reference_slice_strand($genomic_align_block->reference_slice_strand);
}
#if the genomic_align_block has been complemented, then complement the tree
if ($genomic_align_block->get_original_strand == 0) {
$genomic_align_tree->reverse_complement;
}
return $genomic_align_tree;
}
###########################
# STORE methods
###########################
=head2 store
Arg 1 : Bio::EnsEMBL::Compara::GenomicAlignTree $root
Arg[2] : [optional] bool $skip_left_right_indexes
Example : $gata->store($root);
Description : This method stores the GenomicAlign in the tree,
the corresponding GenomicAlignBlock(s) and all
the GenomicAlignTree nodes in this tree. If you set
the $skip_left_right_indexes flag to any true value,
the left and right indexes in the tree won't be build
at this point. This may be useful for production
purposes as building the indexes requires to lock the
table and can hamper other processes storing data at
that time.
This method expects a structure like this:
GENOMIC_ALIGN_TREE->
- GENOMIC_ALIGN_GROUP->
- GENOMIC_ALIGNs...
- GENOMIC_ALIGN_TREE->
- GENOMIC_ALIGN_GROUP->
- GENOMIC_ALIGNs...
- GENOMIC_ALIGN_TREE->
- GENOMIC_ALIGN_GROUP->
- GENOMIC_ALIGNs...
- GENOMIC_ALIGN_TREE->
- GENOMIC_ALIGN_GROUP->
- GENOMIC_ALIGN...
- GENOMIC_ALIGN_TREE->
- GENOMIC_ALIGN_GROUP->
- GENOMIC_ALIGN...
I.e. each node has 1 GenomicAlignGroup containing 1 or
more GenomicAligns and optionally 2 GenomicAlignTree objects
representing the sub_nodes. These will also contain 1
GenomicAlignGroup containing 1 or more GenomicAligns, etc.
No GenomicAlignBlock is expected. These will be created
and stored by this method.
Exceptions : throws if any of the nodes of the tree misses its
GenomicAlign object or this one misses its
GenomicAlignBlock objects.
Caller : general
Status : At risk
=cut
sub store {
my ($self, $node, $skip_left_right_indexes) = @_;
unless($node->isa('Bio::EnsEMBL::Compara::GenomicAlignTree')) {
throw("set arg must be a [Bio::EnsEMBL::Compara::GenomicAlignTree] not a $node");
}
## Check the tree
foreach my $this_node (@{$node->get_all_nodes}) {
# throw "[$this_node] has no GenomicAlignGroup" if (!$this_node->genomic_align_group);
# throw "[$this_node] has no GenomicAligns" if (!$this_node->get_all_GenomicAligns);
throw "[$this_node] does not belong to this tree" if ($this_node->root ne $node);
}
my $leaves = $node->get_all_leaves;
my $method_link_species_set = $leaves->[0]->get_all_GenomicAligns->[0]->method_link_species_set;
## Create and store all the GenomicAlignBlock objects (this stores the GenomicAlign objects as well)
my $genomic_align_block_adaptor = $self->db->get_GenomicAlignBlockAdaptor();
my $ancestral_genomic_align_block = new Bio::EnsEMBL::Compara::GenomicAlignBlock(
-method_link_species_set => $method_link_species_set,
-group_id => $node->group_id);
my $modern_genomic_align_block = new Bio::EnsEMBL::Compara::GenomicAlignBlock(
-method_link_species_set => $method_link_species_set,
-group_id => $node->group_id);
foreach my $genomic_align_node (@{$node->get_all_nodes}) {
if ($genomic_align_node->is_leaf()) {
foreach my $this_genomic_align (@{$genomic_align_node->get_all_GenomicAligns}) {
$modern_genomic_align_block->add_GenomicAlign($this_genomic_align);
}
} elsif ($genomic_align_node->genomic_align_group) {
foreach my $this_genomic_align (@{$genomic_align_node->get_all_GenomicAligns}) {
$ancestral_genomic_align_block->add_GenomicAlign($this_genomic_align);
}
}
}
if (@{$ancestral_genomic_align_block->get_all_GenomicAligns} > 0) {
$genomic_align_block_adaptor->store($ancestral_genomic_align_block);
}
$genomic_align_block_adaptor->store($modern_genomic_align_block);
$node->ancestral_genomic_align_block_id($ancestral_genomic_align_block->dbID);
$node->modern_genomic_align_block_id($modern_genomic_align_block->dbID);
## Store this node and, recursively, all the sub nodes
$self->store_node($node);
## Set and store the left and right indexes unless otherwise stated
if (!$skip_left_right_indexes) {
$self->sync_tree_leftright_index($node);
$self->update_subtree($node);
}
return $node->node_id;
}
=head2 store_group
Arg 1 : reference to Bio::EnsEMBL::Compara::GenomicAlignTree
Example : $genomic_align_tree_adaptor->store_group($genomic_align_tree);
Description: Method for storing the group_id for a genomic_align_tree. The
group_id is set as the genomic_align_block_id of the first
genomic_align object
Returntype : none
Exceptions : - cannot lock tables
- cannot update GenomicAlignBlock object
Caller : none
Status : At risk
=cut
sub store_group {
my ($self, $nodes) = @_;
#store trees in database
foreach my $this_node (@$nodes) {
$self->store($this_node);
}
### Check if this is defined or not!!!
my $group_id =
$nodes->[0]->genomic_align_group->get_all_GenomicAligns->[0]->genomic_align_block_id;
my $genomic_align_blocks = {};
foreach my $this_node (@$nodes) {
## Ancestral GAB
my $ancestral_genomic_align_block_id = $this_node->genomic_align_group->
get_all_GenomicAligns->[0]->genomic_align_block_id;
my $fake_ancestral_gab;
$fake_ancestral_gab->{dbID} = $ancestral_genomic_align_block_id;
bless $fake_ancestral_gab, "Bio::EnsEMBL::Compara::GenomicAlignBlock";
$genomic_align_blocks->{$ancestral_genomic_align_block_id} =
$fake_ancestral_gab;
## Modern GAB
my $modern_genomic_align_block_id =
$this_node->get_all_leaves->[0]->genomic_align_group->
get_all_GenomicAligns->[0]->genomic_align_block_id;
my $fake_modern_gab;
$fake_modern_gab->{dbID} = $modern_genomic_align_block_id;
bless $fake_modern_gab, "Bio::EnsEMBL::Compara::GenomicAlignBlock";
$genomic_align_blocks->{$modern_genomic_align_block_id} =
$fake_modern_gab;
}
my $genomic_align_block_adaptor =
$self->db->get_GenomicAlignBlockAdaptor;
foreach my $gab (values %$genomic_align_blocks) {
$genomic_align_block_adaptor->store_group_id($gab, $group_id);
}
}
=head2 store_node
Arg 1 : reference to Bio::EnsEMBL::Compara::GenomicAlignTree
Example : $genomic_align_tree_adaptor->store_node($genomic_align_tree);
Description: Method for storing a single node. Called recursively.
Returntype : none
Exceptions : throw if no genomic_align_group ID has been set
Caller : none
Status : At risk
=cut
sub store_node {
my ($self, $node) = @_;
unless($node->isa('Bio::EnsEMBL::Compara::GenomicAlignTree')) {
throw("set arg must be a [Bio::EnsEMBL::Compara::GenomicAlignTree] not a $node");
}
my $parent_id = 0;
my $root_id = 0;
if($node->parent) {
$parent_id = $node->parent->node_id ;
$root_id = $node->root->node_id;
}
#printf("inserting parent_id = %d, root_id = %d\n", $parent_id, $root_id);
my $sth = $self->prepare("INSERT INTO genomic_align_tree
(node_id,
parent_id,
root_id,
left_index,
right_index,
distance_to_parent) VALUES (?,?,?,?,?,?)");
$sth->execute("NULL", $parent_id, $root_id, $node->left_index, $node->right_index, $node->distance_to_parent);
#print STDERR "LAST ID: ", $sth->{'mysql_insertid'}, "\n";
$node->node_id($sth->{'mysql_insertid'});
$sth->finish;
#set root_id to be node_id for the root node.
if ($root_id == 0) {
my $sql = "UPDATE genomic_align_tree SET root_id = node_id WHERE node_id=?";
my $sth = $self->prepare($sql);
$sth->execute($node->node_id);
$sth->finish;
}
$node->adaptor($self);
if ($node->genomic_align_group) {
my $genomic_align_group_adaptor = $self->db->get_GenomicAlignGroupAdaptor();
$node->genomic_align_group->dbID($node->node_id);
$genomic_align_group_adaptor->store($node->genomic_align_group);
if (!$node->genomic_align_group or !$node->genomic_align_group->dbID) {
throw("Cannot store before setting the genomic_align_group ID");
}
#print STDERR "NODE ", $node->node_id, " ", $node->name, " -- GROUP: ",
# $node->genomic_align_group->dbID, "\n";
} else {
#print STDERR "NODE ", $node->node_id, " ", $node->name, " -- NO GROUP\n";
}
foreach my $this_child (@{$node->children}) {
$self->store_node($this_child);
}
return $node->node_id;
}
=head2 fetch_node_by_node_id
Arg 1 : $node_id
Example : my $node = $self->adaptor->fetch_node_by_node_id($node_id);
Description: Over-ride NestedSetAdaptor method for getting a node from its id
Returntype : reference to Bio::EnsEMBL::Compara::GenomicAlignTree
Exceptions : throw if not Bio::EnsEMBL::Compara::NestedSet
Caller :
Status : At risk
=cut
sub fetch_node_by_node_id {
my ($self, $node_id) = @_;
#my $table= $self->tables->[0]->[1];
#my $constraint = "WHERE $table.node_id = $node_id";
#my ($node) = @{$self->_generic_fetch($constraint)};
my $sql = "SELECT " . join(",", @{$self->columns}) .
" FROM genomic_align_tree gat". " LEFT JOIN genomic_align_group gag ON (gat.node_id = gag.group_id) LEFT JOIN genomic_align ga ON (gag.genomic_align_id = ga.genomic_align_id) WHERE gat.node_id = " . $node_id;
my $sth = $self->prepare($sql);
$sth->execute;
my ($node) = @{$self->_objs_from_sth($sth)};
$sth->finish;
return $node;
}
=head2 fetch_parent_for_node
Arg 1 : reference to Bio::EnsEMBL::Compara::GenomicAlignTree
Example : my $parent = $self->adaptor->fetch_parent_for_node($self);
Description: Over-ride NestedSetAdaptor method for getting the parent of a node
Returntype : reference to Bio::EnsEMBL::Compara::GenomicAlignTree
Exceptions : throw if not Bio::EnsEMBL::Compara::NestedSet
Caller :
Status : At risk
=cut
sub fetch_parent_for_node {
my ($self, $node) = @_;
unless($node->isa('Bio::EnsEMBL::Compara::NestedSet')) {
throw("set arg must be a [Bio::EnsEMBL::Compara::NestedSet] not a $node");
}
#my $table= $self->tables->[0]->[1];
#my $constraint = "WHERE $table.node_id = " . $node->_parent_id;
#my ($parent) = @{$self->_generic_fetch($constraint)};
my $sql = "SELECT " . join(",", @{$self->columns}) .
" FROM genomic_align_tree gat". " LEFT JOIN genomic_align_group gag ON (gat.node_id = gag.group_id) LEFT JOIN genomic_align ga ON (gag.genomic_align_id = ga.genomic_align_id) WHERE gat.node_id = " . $node->_parent_id;
my $sth = $self->prepare($sql);
$sth->execute;
my ($parent) = @{$self->_objs_from_sth($sth)};
$sth->finish;
return $parent;
}
=head2 fetch_all_children_for_node
Arg 1 : reference to Bio::EnsEMBL::Compara::GenomicAlignTree
Example : my $node = $self->adaptor->fetch_all_children_for_node($self);
Description: Over-ride NestedSetAdaptor method for getting the all the children of a node
Returntype : reference to Bio::EnsEMBL::Compara::GenomicAlignTree
Exceptions : throw if not Bio::EnsEMBL::Compara::NestedSet
Caller :
Status : At risk
=cut
sub fetch_all_children_for_node {
my ($self, $node) = @_;
unless($node->isa('Bio::EnsEMBL::Compara::NestedSet')) {
throw("set arg must be a [Bio::EnsEMBL::Compara::NestedSet] not a $node");
}
my $sql = "SELECT " . join(",", @{$self->columns}) .
" FROM genomic_align_tree gat". " LEFT JOIN genomic_align_group gag ON (gat.node_id = gag.group_id) LEFT JOIN genomic_align ga ON (gag.genomic_align_id = ga.genomic_align_id) WHERE gat.parent_id = " . $node->node_id;
my $sth = $self->prepare($sql);
$sth->execute;
my $kids = $self->_objs_from_sth($sth);
$sth->finish;
foreach my $child (@{$kids}) { $node->add_child($child); }
return $node;
}
=head2 fetch_root_by_node
Arg 1 : reference to Bio::EnsEMBL::Compara::GenomicAlignTree
Example : my $root = $self->adaptor->fetch_root_by_node($self);
Description: Over-ride NestedSetAdaptor method for getting the root of a node
Returntype : reference to Bio::EnsEMBL::Compara::GenomicAlignTree
Exceptions : throw if not Bio::EnsEMBL::Compara::NestedSet
Caller :
Status : At risk
=cut
sub fetch_root_by_node {
my ($self, $node) = @_;
unless(UNIVERSAL::isa($node, 'Bio::EnsEMBL::Compara::NestedSet')) {
throw("set arg must be a [Bio::EnsEMBL::Compara::NestedSet] not a $node");
}
my $alias = $self->tables->[0]->[1];
my $left_index = $node->left_index;
my $right_index = $node->right_index;
# my $constraint = "WHERE $alias.left_index <= $left_index AND $alias.right_index >= $right_index";
# my $nodes = $self->_generic_fetch($constraint);
my $sql = "SELECT " . join(",", @{$self->columns}) .
" FROM genomic_align_tree gat". " LEFT JOIN genomic_align_group gag ON (gat.node_id = gag.group_id) LEFT JOIN genomic_align ga ON (gag.genomic_align_id = ga.genomic_align_id) WHERE gat.left_index <= $left_index AND gat.right_index >= $right_index";
my $sth = $self->prepare($sql);
$sth->execute;
my $nodes = $self->_objs_from_sth($sth);
$sth->finish;
my $root = $self->_build_tree_from_nodes($nodes);
return $root;
}
=head2 delete
Arg 1 : reference to Bio::EnsEMBL::Compara::GenomicAlignTree
Example : $genomic_align_tree_adaptor->delete($root);
Description: Method for deleting a Bio::EnsEMBL::Compara::GenomicAlignTree
from a database. Must give the root ie does not delete
sub-trees.
Returntype : none
Exceptions : none
Caller : none
Status : At risk
=cut
sub delete {
my ($self, $root) = @_;
if (!$root) {
throw("Nothing to delete");
}
if ($root->root ne $root) {
warn("Cowardly refusing to delete a subtree only");
return;
}
my $sth = $self->prepare(
"DELETE
genomic_align_group.*,
genomic_align_tree.*,
genomic_align.*,
genomic_align_block.*
FROM
genomic_align_tree
LEFT JOIN genomic_align_group ON (node_id = group_id)
LEFT JOIN genomic_align USING (genomic_align_id)
LEFT JOIN genomic_align_block USING (genomic_align_block_id)
WHERE root_id = ?");
$sth->execute($root->node_id);
}
=head2 update_neighbourhood_data
Arg 1 : reference to Bio::EnsEMBL::Compara::GenomicAlignTree
Arg 2 : boolean $no_recursivity
Example : $self->update_neighbourhood_data($node);
Description: Update the left and right node_ids of a genomic_align_tree
table in a database
Returntype : none
Exceptions : none
Caller : none
Status : At risk
=cut
sub update_neighbourhood_data {
my ($self, $node, $no_recursivity) = @_;
my $sth = $self->prepare("UPDATE genomic_align_tree
SET left_node_id = ?, right_node_id = ?
WHERE node_id = ?");
#print "update_neighbourhood_data " . $node->left_node_id . " " .$node->right_node_id . " " . $node->node_id . "\n";
$sth->execute($node->left_node_id, $node->right_node_id, $node->node_id);
if (!$no_recursivity) {
foreach my $this_children (@{$node->children}) {
$self->update_neighbourhood_data($this_children);
}
}
return $node;
}
=head2 set_neighbour_nodes_for_leaf
Arg 1 : reference to Bio::EnsEMBL::Compara::GenomicAlignTree
Arg 2 : int $flanking
Example : $self->update_neighbourhood_data($node);
Description: Update the left and right node_ids of a genomic_align_tree
table in a database
Returntype : none
Exceptions : none
Caller : none
Status : At risk
=cut
sub set_neighbour_nodes_for_leaf {
my ($self, $node, $flanking) = @_;
$flanking = 1000000 if (!$flanking);
next if (!$node->is_leaf());
next if (!$node->genomic_align_group);
my $genomic_aligns = $node->genomic_align_group->get_all_GenomicAligns;
my $sth = $self->prepare("SELECT group_id, dnafrag_start, dnafrag_end, dnafrag_strand
FROM genomic_align LEFT JOIN genomic_align_group USING (genomic_align_id)
WHERE type = 'epo'
AND dnafrag_id = ?
AND method_link_species_set_id = ?
AND dnafrag_start <= ?
AND dnafrag_start > ?
AND dnafrag_end >= ?
ORDER BY dnafrag_start");
my $genomic_align = $genomic_aligns->[0];
my $dnafrag_start = $genomic_align->dnafrag_start;
my $dnafrag_end = $genomic_align->dnafrag_end;
$sth->execute(
$genomic_align->dnafrag_id,
$genomic_align->method_link_species_set_id,
$dnafrag_end + $flanking,
$dnafrag_start - $flanking - $genomic_align->method_link_species_set->max_alignment_length,
$dnafrag_start - $flanking,
);
my $table = $sth->fetchall_arrayref;
if (@$genomic_aligns == 1) {
for (my $i = 0; $i < @$table; $i++) {
my ($this_group_id, $this_dnafrag_start, $this_dnafrag_end, $this_dnafrag_strand) = @{$table->[$i]};
if ($this_dnafrag_start == $dnafrag_start and $this_dnafrag_end == $dnafrag_end) {
## $table->[$i] correspond to the query node
if ($this_dnafrag_strand == 1) {
$node->left_node_id($table->[$i-1]->[0]) if ($i > 0);
$node->right_node_id($table->[$i+1]->[0]) if ($i + 1 < @$table);
} elsif ($this_dnafrag_strand == -1) {
$node->right_node_id($table->[$i-1]->[0]) if ($i > 0);
$node->left_node_id($table->[$i+1]->[0]) if ($i + 1 < @$table);
}
last;
}
}
} else {
## Use the first GenomicAlign to set the LEFT NODE
for (my $i = 0; $i < @$table; $i++) {
my ($this_group_id, $this_dnafrag_start, $this_dnafrag_end, $this_dnafrag_strand) = @{$table->[$i]};
if ($this_dnafrag_start == $dnafrag_start and $this_dnafrag_end == $dnafrag_end) {
## $table->[$i] correspond to the query node
if ($this_dnafrag_strand == 1) {
$node->left_node_id($table->[$i-1]->[0]) if ($i > 0);
} elsif ($this_dnafrag_strand == -1) {
$node->left_node_id($table->[$i+1]->[0]) if ($i + 1 < @$table);
}
last;
}
}
## Use the last GenomicAlign to set the RIGHT NODE
$genomic_align = $genomic_aligns->[-1];
$dnafrag_start = $genomic_align->dnafrag_start;
$dnafrag_end = $genomic_align->dnafrag_end;
$sth->execute(
$genomic_align->dnafrag_id,
$genomic_align->method_link_species_set_id,
$dnafrag_end + $flanking,
$dnafrag_start - $flanking - $genomic_align->method_link_species_set->max_alignment_length,
$dnafrag_start - $flanking,
);
$table = $sth->fetchall_arrayref;
for (my $i = 0; $i < @$table; $i++) {
my ($this_group_id, $this_dnafrag_start, $this_dnafrag_end, $this_dnafrag_strand) = @{$table->[$i]};
if ($this_dnafrag_start == $dnafrag_start and $this_dnafrag_end == $dnafrag_end) {
## $table->[$i] correspond to the query node
if ($this_dnafrag_strand == 1) {
$node->right_node_id($table->[$i+1]->[0]) if ($i + 1 < @$table);
} elsif ($this_dnafrag_strand == -1) {
$node->right_node_id($table->[$i-1]->[0]) if ($i > 0);
}
last;
}
}
}
$sth->finish;
# Store this in the DB
if ($node->left_node_id or $node->right_node_id) {
$self->update_neighbourhood_data($node);
}
return $node;
}
=head2 columns
Args : none
Example : $columns = $self->columns()
Description: a list of [tablename, alias] pairs for use with generic_fetch
Returntype : list of [tablename, alias] pairs
Exceptions : none
Caller : NestedSetAdaptor::generic_fetch
Status : At risk
=cut
sub columns {
my $self = shift;
return ['gat.node_id',
'gat.parent_id',
'gat.root_id',
'gat.left_index',
'gat.right_index',
'gat.distance_to_parent',
'gat.left_node_id',
'gat.right_node_id',
'gag.group_id',
'gag.type',
'ga.genomic_align_id',
'ga.genomic_align_block_id',
'ga.method_link_species_set_id',
'ga.dnafrag_id',
'ga.dnafrag_start',
'ga.dnafrag_end',
'ga.dnafrag_strand',
'ga.cigar_line',
'ga.level_id',
];
}
=head2 tables
Args : none
Example : $tables = $self->_tables()
Description: a list of [tablename, alias] pairs for use with generic_fetch
Returntype : list of [tablename, alias] pairs
Exceptions : none
Caller : NestedSetAdaptor::generic_fetch
Status : At risk
=cut
sub tables {
my $self = shift;
return [
['genomic_align_tree', 'gat'],
['genomic_align_group', 'gag'],
['genomic_align', 'ga'],
];
}
=head2 left_join_clause
Args : none
Example : none
Description: a left join clause for use with generic_fetch
Returntype : none
Exceptions : none
Caller : NestedSetAdaptor::generic_fetch
Status : At risk
=cut
sub left_join_clause {
# return "LEFT JOIN genomic_align_group gag ON (gat.node_id = gag.group_id)".
# " LEFT JOIN genomic_align ga ON (gag.genomic_align_id = ga.genomic_align_id)";
return "";
}
=head2 default_where_clause
Args : none
Example : none
Description: a where clause for use with generic_fetch
Returntype : none
Exceptions : none
Caller : NestedSetAdaptor::generic_fetch
Status : At risk
=cut
sub default_where_clause {
return "gat.node_id = gag.group_id AND gag.genomic_align_id = ga.genomic_align_id";
# return "";
}
=head2 _objs_from_sth
Args[1] : DBI::row_hashref $hashref containing key-value pairs
Example : my $genomic_align_trees = $self->_objs_from_sth($sth);
Description: convert DBI row hash reference into a
Bio::EnsEMBL::Compara::GenomicAlignTreeAdaptor object
Returntype : listref of Bio::EnsEMBL::Compara::GenomicAlignTree objects
Exceptions : none
Caller : general
Status : At risk
=cut
sub _objs_from_sth {
my ($self, $sth) = @_;
my $node_list = [];
my $genomic_align_groups = {};
my $genomic_aligns = {};
while(my $rowhash = $sth->fetchrow_hashref) {
if (!defined($rowhash->{group_id})) {
my $node = $self->create_instance_from_rowhash($rowhash);
push @$node_list, $node;
} else {
my $genomic_align_group = $genomic_align_groups->{$rowhash->{group_id}};
if (!defined($genomic_align_group)) {
## This is a new node
my $node = $self->create_instance_from_rowhash($rowhash);
$genomic_align_group = $node->genomic_align_group;
$genomic_align_groups->{$rowhash->{group_id}} = $genomic_align_group;
push @$node_list, $node;
}
if (!defined($genomic_aligns->{$rowhash->{genomic_align_id}})) {
my $genomic_align = $self->_create_GenomicAlign_object_from_rowhash($rowhash);
$genomic_align_group->add_GenomicAlign($genomic_align);
$genomic_aligns->{$rowhash->{genomic_align_id}} = 1;
}
}
}
return $node_list;
}
=head2 create_instance_from_rowhash
Args[1] : DBI::row_hashref $hashref containing key-value pairs
Example : my $node = $self->create_instance_from_rowhash($rowhash);
Description: convert DBI row hash reference into a
Bio::EnsEMBL::Compara::GenomicAlignTree object
Returntype : Bio::EnsEMBL::Compara::GenomicAlignTree object
Exceptions : none
Caller : general
Status : At risk
=cut
sub create_instance_from_rowhash {
my $self = shift;
my $rowhash = shift;
my $node = new Bio::EnsEMBL::Compara::GenomicAlignTree;
$self->init_instance_from_rowhash($node, $rowhash);
my $genomic_align_group = $self->_create_GenomicAlignGroup_object_from_rowhash($rowhash);
$node->genomic_align_group($genomic_align_group) if ($genomic_align_group);
return $node;
}
=head2 init_instance_from_rowhash
Args[1] : Bio::EnsEMBL::Compara::GenomicAlignTree object
Args[2] : DBI::row_hashref $hashref containing key-value pairs
Example : $self->init_instance_from_rowhash($node, $rowhash);
Description: convert DBI row hash reference into a
Bio::EnsEMBL::Compara::GenomicAlignTree object
Returntype : Bio::EnsEMBL::Compara::GenomicAlignTree object
Exceptions : none
Caller : general
Status : At risk
=cut
sub init_instance_from_rowhash {
my $self = shift;
my $node = shift;
my $rowhash = shift;
#SUPER is NestedSetAdaptor
$self->SUPER::init_instance_from_rowhash($node, $rowhash);
$node->left_node_id($rowhash->{'left_node_id'});
$node->right_node_id($rowhash->{'right_node_id'});
$node->adaptor($self);
return $node;
}
=head2 _create_GenomicAlignGroup_object_from_rowhash
Args[1] : DBI::row_hashref $hashref containing key-value pairs
Example : my $genomic_align_group = $self->_create_GenomicAlignGroup_object_from_rowhash($rowhash);
Description: convert DBI row hash reference into a
Bio::EnsEMBL::Compara::GenomicAlignGroup object
Returntype : Bio::EnsEMBL::Compara::GenomicAlignGroup object
Exceptions : none
Caller : general
Status : At risk
=cut
sub _create_GenomicAlignGroup_object_from_rowhash {
my ($self, $rowhash) = @_;
return undef if (!$rowhash->{group_id});
my $genomic_align_group = new Bio::EnsEMBL::Compara::GenomicAlignGroup;
$genomic_align_group->dbID($rowhash->{group_id});
$genomic_align_group->adaptor($self->db->get_GenomicAlignGroupAdaptor);
$genomic_align_group->type($rowhash->{type});
return $genomic_align_group;
}
=head2 _create_GenomicAlign_object_from_rowhash
Args[1] : DBI::row_hashref $hashref containing key-value pairs
Example : my $genomic_align = $self->_create_GenomicAlign_object_from_rowhash($rowhash);
Description: convert DBI row hash reference into a
Bio::EnsEMBL::Compara::GenomicAlign object
Returntype : Bio::EnsEMBL::Compara::GenomicAlign object
Exceptions : none
Caller : general
Status : At risk
=cut
sub _create_GenomicAlign_object_from_rowhash {
my ($self, $rowhash) = @_;
my $genomic_align = new Bio::EnsEMBL::Compara::GenomicAlign;
$genomic_align->dbID($rowhash->{genomic_align_id});
$genomic_align->adaptor($self->db->get_GenomicAlignAdaptor);
$genomic_align->genomic_align_block_id($rowhash->{genomic_align_block_id});
$genomic_align->method_link_species_set_id($rowhash->{method_link_species_set_id});
$genomic_align->dnafrag_id($rowhash->{dnafrag_id});
$genomic_align->dnafrag_start($rowhash->{dnafrag_start});
$genomic_align->dnafrag_end($rowhash->{dnafrag_end});
$genomic_align->dnafrag_strand($rowhash->{dnafrag_strand});
$genomic_align->cigar_line($rowhash->{cigar_line});
$genomic_align->level_id($rowhash->{level_id});
return $genomic_align;
}
1;