Raw content of Bio::Ontology::SimpleOntologyEngine # $Id: SimpleOntologyEngine.pm,v 1.3.2.5 2003/07/03 00:41:40 lapp Exp $ # # BioPerl module for SimpleOntologyEngine # # Cared for by Peter Dimitrov <dimitrov@gnf.org> # # Copyright Peter Dimitrov # (c) Peter Dimitrov, dimitrov@gnf.org, 2002. # (c) GNF, Genomics Institute of the Novartis Research Foundation, 2002. # # You may distribute this module under the same terms as perl itself. # Refer to the Perl Artistic License (see the license accompanying this # software package, or see http://www.perl.com/language/misc/Artistic.html) # for the terms under which you may use, modify, and redistribute this module. # # THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED # WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF # MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. # # POD documentation - main docs before the code =head1 NAME SimpleOntologyEngine - Implementation of OntologyEngineI interface =head1 SYNOPSIS my $soe = Bio::Ontology::SimpleOntologyEngine->new; =head1 DESCRIPTION This is a "simple" implementation of Bio::Ontology::OntologyEngineI. =head1 FEEDBACK =head2 Mailing Lists User feedback is an integral part of the evolution of this and other Bioperl modules. Send your comments and suggestions preferably to the Bioperl mailing list. Your participation is much appreciated. bioperl-l@bioperl.org - General discussion http://bioperl.org/MailList.shtml - About the mailing lists =head2 Reporting Bugs Report bugs to the Bioperl bug tracking system to help us keep track of the bugs and their resolution. Bug reports can be submitted via email or the web: bioperl-bugs@bioperl.org http://bioperl.org/bioperl-bugs/ =head1 AUTHOR - Peter Dimitrov Email dimitrov@gnf.org =head1 CONTRIBUTORS Hilmar Lapp, hlapp at gmx.net =head1 APPENDIX The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _ =cut # Let the code begin... package Bio::Ontology::SimpleOntologyEngine; use vars qw(@ISA); use strict; use Carp; use Bio::Root::Root; use Bio::Ontology::RelationshipFactory; use Bio::Ontology::OntologyEngineI; use Data::Dumper; @ISA = qw( Bio::Root::Root Bio::Ontology::OntologyEngineI ); =head2 new Title : new Usage : $soe = Bio::Ontology::SimpleOntologyEngine->new; Function: Initializes the ontology engine. Example : $soe = Bio::Ontology::SimpleOntologyEngine->new; Returns : Object of class SimpleOntologyEngine. Args : =cut sub new{ my ($class, @args) = @_; my $self = $class->SUPER::new(@args); # my %param = @args; $self->_term_store( {} ); $self->_relationship_store( {} ); $self->_inverted_relationship_store( {} ); $self->_relationship_type_store( {} ); $self->_instantiated_terms_store( {} ); # set defaults for the factories $self->relationship_factory(Bio::Ontology::RelationshipFactory->new( -type => "Bio::Ontology::Relationship")); return $self; } =head2 _instantiated_terms_store Title : _instantiated_terms_store Usage : $obj->_instantiated_terms_store($newval) Function: Example : Returns : hash Args : empty hash =cut sub _instantiated_terms_store{ my ($self, $value) = @_; if( defined $value) { $self->{'_instantiated_terms_store'} = $value; } return $self->{'_instantiated_terms_store'}; } =head2 mark_instantiated Title : mark_instantiated Usage : $self->mark_instantiated(TermI[] terms): TermI[] Function: Marks TermI objects as fully instantiated, allowing for proper counting of the number of terms in the term store. The TermI objects has to be already stored in the term store in order to be marked. Example : $self->mark_instantiated($term); Returns : its argument or throws an exception if a term is not in the term store. Args : array of objects of class TermI. =cut sub mark_instantiated{ my ($self, @terms) = @_; foreach my $term (@terms) { $self->throw( "term ".$term->identifier." not in the term store\n" ) if !defined $self->_term_store->{$term->identifier}; $self->_instantiated_terms_store->{$term->identifier} = 1; } return @terms; } =head2 mark_uninstantiated Title : mark_uninstantiated Usage : $self->mark_uninstantiated(TermI[] terms): TermI[] Function: Marks TermI objects as not fully instantiated, Example : $self->mark_uninstantiated($term); Returns : its argument or throws an exception if a term is not in the term store(if the term is not marked it does nothing). Args : array of objects of class TermI. =cut sub mark_uninstantiated{ my ($self, @terms) = @_; foreach my $term (@terms) { $self->throw( "term ".$term->identifier." not in the term store\n" ) if !defined $self->_term_store->{$term->identifier}; delete $self->_instantiated_terms_store->{$term->identifier} if defined $self->_instantiated_terms_store->{$term->identifier}; } return @terms; } =head2 _term_store Title : term_store Usage : $obj->_term_store($newval) Function: Example : Returns : reference to an array of Bio::Ontology::TermI objects Args : reference to an array of Bio::Ontology::TermI objects =cut sub _term_store{ my ($self, $value) = @_; if( defined $value) { if ( defined $self->{'_term_store'}) { $self->throw("_term_store already defined\n"); } else { $self->{'_term_store'} = $value; } } return $self->{'_term_store'}; } =head2 add_term Title : add_term Usage : add_term(TermI term): TermI Function: Adds TermI object to the ontology engine term store. Marks the term fully instantiated by default. Example : $soe->add_term($term) Returns : its argument. Args : object of class TermI. =cut sub add_term{ my ($self, $term) = @_; my $term_store = $self->_term_store; if ( defined $term_store -> {$term->identifier}) { $self->throw( "term ".$term->identifier." already defined\n" ); } else { $term_store->{$term->identifier} = $term; $self->_instantiated_terms_store->{$term->identifier} = 1; } return $term; } =head2 get_term_by_identifier Title : get_term_by_identifier Usage : get_term_by_identifier(String[] id): TermI[] Function: Retrieves terms from the term store by their identifier field, or undef if not there. Example : $term = $soe->get_term_by_identifier("IPR000001"); Returns : An array of zero or more Bio::Ontology::TermI objects. Args : An array of identifier strings =cut sub get_term_by_identifier{ my ($self, @ids) = @_; my @ans = (); foreach my $id (@ids) { my $term = $self->_term_store->{$id}; push @ans, $term if defined $term; } return @ans; } =head2 _get_number_rels Title : get_number_rels Usage : Function: Example : Returns : Args : =cut sub _get_number_rels{ my ($self) = @_; my $num_rels = 0; foreach my $entry ($self->_relationship_store) { $num_rels += scalar keys %$entry; } return $num_rels; } =head2 _get_number_terms Title : _get_number_terms Usage : Function: Example : Returns : Args : =cut sub _get_number_terms{ my ($self) = @_; return scalar $self->_filter_unmarked( values %{$self->_term_store} ); } =head2 _relationship_store Title : _storerelationship_store Usage : $obj->relationship_store($newval) Function: Example : Returns : reference to an array of Bio::Ontology::TermI objects Args : reference to an array of Bio::Ontology::TermI objects =cut sub _relationship_store{ my ($self, $value) = @_; if( defined $value) { if ( defined $self->{'_relationship_store'}) { $self->throw("_relationship_store already defined\n"); } else { $self->{'_relationship_store'} = $value; } } return $self->{'_relationship_store'}; } =head2 _inverted_relationship_store Title : _inverted_relationship_store Usage : Function: Example : Returns : reference to an array of Bio::Ontology::TermI objects Args : reference to an array of Bio::Ontology::TermI objects =cut sub _inverted_relationship_store{ my ($self, $value) = @_; if( defined $value) { if ( defined $self->{'_inverted_relationship_store'}) { $self->throw("_inverted_relationship_store already defined\n"); } else { $self->{'_inverted_relationship_store'} = $value; } } return $self->{'_inverted_relationship_store'}; } =head2 _relationship_type_store Title : _relationship_type_store Usage : $obj->_relationship_type_store($newval) Function: Example : Returns : reference to an array of Bio::Ontology::RelationshipType objects Args : reference to an array of Bio::Ontology::RelationshipType objects =cut sub _relationship_type_store{ my ($self, $value) = @_; if( defined $value) { if ( defined $self->{'_relationship_type_store'}) { $self->throw("_relationship_type_store already defined\n"); } else { $self->{'_relationship_type_store'} = $value; } } return $self->{'_relationship_type_store'}; } =head2 _add_relationship_simple Title : _add_relationship_simple Usage : Function: Example : Returns : Args : =cut sub _add_relationship_simple{ my ($self, $store, $rel, $inverted) = @_; my $parent_id; my $child_id; if ($inverted) { $parent_id = $rel->subject_term->identifier; $child_id = $rel->object_term->identifier; } else { $parent_id = $rel->object_term->identifier; $child_id = $rel->subject_term->identifier; } if((defined $store->{$parent_id}->{$child_id}) && ($store->{$parent_id}->{$child_id}->name != $rel->predicate_term->name)){ $self->throw("relationship ".Dumper($rel->predicate_term). " between ".$parent_id." and ".$child_id. " already defined as ". Dumper($store->{$parent_id}->{$child_id})."\n"); } else { $store->{$parent_id}->{$child_id} = $rel->predicate_term; } } =head2 add_relationship Title : add_relationship Usage : add_relationship(RelationshipI relationship): RelationshipI Function: Adds a relationship object to the ontology engine. Example : Returns : Its argument. Args : A RelationshipI object. =cut sub add_relationship{ my ($self, $rel) = @_; $self->_add_relationship_simple($self->_relationship_store, $rel, 0); $self->_add_relationship_simple($self->_inverted_relationship_store, $rel, 1); $self->_relationship_type_store->{ $self->_unique_termid($rel->predicate_term)} = $rel->predicate_term; return $rel; } =head2 get_relationships Title : get_relationships Usage : get_relationships(): RelationshipI[] Function: Retrieves all relationship objects. Example : Returns : Array of RelationshipI objects Args : =cut sub get_relationships{ my $self = shift; my $term = shift; my @rels; my $store = $self->_relationship_store; my $relfact = $self->relationship_factory(); my @parent_ids = $term ? # if a term is supplied then only get the term's parents (map { $_->identifier(); } $self->get_parent_terms($term)) : # otherwise use all parent ids (keys %{$store}); # add the term as a parent too if one is supplied push(@parent_ids,$term->identifier) if $term; foreach my $parent_id (@parent_ids) { my $parent_entry = $store->{$parent_id}; # if a term is supplied, add a relationship for the parent to the term # except if the parent is the term itself (we added that one before) if($term && ($parent_id ne $term->identifier())) { my $parent_term = $self->get_term_by_identifier($parent_id); push(@rels, $relfact->create_object(-object_term => $parent_term, -subject_term => $term, -predicate_term => $parent_entry->{$term->identifier}, -ontology => $term->ontology() ) ); } else { # otherwise, i.e., no term supplied, or the parent equals the # supplied term my $parent_term = $term ? $term : $self->get_term_by_identifier($parent_id); foreach my $child_id (keys %$parent_entry) { my $rel_info = $parent_entry->{$child_id}; push(@rels, $relfact->create_object(-object_term => $parent_term, -subject_term => $self->get_term_by_identifier( $child_id), -predicate_term => $rel_info, -ontology =>$parent_term->ontology ) ); } } } return @rels; } =head2 get_all_relationships Title : get_all_relationships Usage : get_all_relationships(): RelationshipI[] Function: Retrieves all relationship objects. Example : Returns : Array of RelationshipI objects Args : =cut sub get_all_relationships{ return shift->get_relationships(); } =head2 get_predicate_terms Title : get_predicate_terms Usage : get_predicate_terms(): TermI[] Function: Retrives all relationship types stored in the engine Example : Returns : reference to an array of Bio::Ontology::RelationshipType objects Args : =cut sub get_predicate_terms{ my ($self) = @_; return values %{$self->_relationship_type_store}; } =head2 _is_rel_type Title : _is_rel_type Usage : Function: Example : Returns : Args : =cut sub _is_rel_type{ my ($self, $term, @rel_types) = @_; foreach my $rel_type (@rel_types) { if($rel_type->identifier || $term->identifier) { return 1 if $rel_type->identifier eq $term->identifier; } else { return 1 if $rel_type->name eq $term->name; } } return 0; } =head2 _typed_traversal Title : _typed_traversal Usage : Function: Example : Returns : Args : =cut sub _typed_traversal{ my ($self, $rel_store, $level, $term_id, @rel_types) = @_; return undef if !defined($rel_store->{$term_id}); my %parent_entry = %{$rel_store->{$term_id}}; my @children = keys %parent_entry; my @ans; if (@rel_types > 0) { @ans = (); foreach my $child_id (@children) { push @ans, $child_id if $self->_is_rel_type( $rel_store->{$term_id}->{$child_id}, @rel_types); } } else { @ans = @children; } if ($level < 1) { my @ans1 = (); foreach my $child_id (@ans) { push @ans1, $self->_typed_traversal($rel_store, $level - 1, $child_id, @rel_types) if defined $rel_store->{$child_id}; } push @ans, @ans1; } return @ans; } =head2 get_child_terms Title : get_child_terms Usage : get_child_terms(TermI term, TermI[] predicate_terms): TermI[] get_child_terms(TermI term, RelationshipType[] predicate_terms): TermI[] Function: Retrieves all child terms of a given term, that satisfy a relationship among those that are specified in the second argument or undef otherwise. get_child_terms is a special case of get_descendant_terms, limiting the search to the direct descendants. Example : Returns : Array of TermI objects. Args : First argument is the term of interest, second is the list of relationship type terms. =cut sub get_child_terms{ my ($self, $term, @relationship_types) = @_; $self->throw("must provide TermI compliant object") unless defined($term) && $term->isa("Bio::Ontology::TermI"); return $self->_filter_unmarked( $self->get_term_by_identifier( $self->_typed_traversal($self->_relationship_store, 1, $term->identifier, @relationship_types) ) ); } =head2 get_descendant_terms Title : get_descendant_terms Usage : get_descendant_terms(TermI term, TermI[] rel_types): TermI[] get_child_terms(TermI term, RelationshipType[] predicate_terms): TermI[] Function: Retrieves all descendant terms of a given term, that satisfy a relationship among those that are specified in the second argument or undef otherwise. Uses _typed_traversal to find all descendants. Example : Returns : Array of TermI objects. Args : First argument is the term of interest, second is the list of relationship type terms. =cut sub get_descendant_terms{ my ($self, $term, @relationship_types) = @_; $self->throw("must provide TermI compliant object") unless defined($term) && $term->isa("Bio::Ontology::TermI"); return $self->_filter_unmarked( $self->_filter_repeated( $self->get_term_by_identifier( $self->_typed_traversal($self->_relationship_store, 0, $term->identifier, @relationship_types) ) ) ); } =head2 get_parent_terms Title : get_parent_terms Usage : get_parent_terms(TermI term, TermI[] predicate_terms): TermI[] get_child_terms(TermI term, RelationshipType[] predicate_terms): TermI[] Function: Retrieves all parent terms of a given term, that satisfy a relationship among those that are specified in the second argument or undef otherwise. get_parent_terms is a special case of get_ancestor_terms, limiting the search to the direct ancestors. Example : Returns : Array of TermI objects. Args : First argument is the term of interest, second is the list of relationship type terms. =cut sub get_parent_terms{ my ($self, $term, @relationship_types) = @_; $self->throw("term must be a valid object, not undef") unless defined $term; return $self->_filter_unmarked( $self->get_term_by_identifier( $self->_typed_traversal($self->_inverted_relationship_store, 1, $term->identifier, @relationship_types) ) ); } =head2 get_ancestor_terms Title : get_ancestor_terms Usage : get_ancestor_terms(TermI term, TermI[] predicate_terms): TermI[] get_child_terms(TermI term, RelationshipType[] predicate_terms): TermI[] Function: Retrieves all ancestor terms of a given term, that satisfy a relationship among those that are specified in the second argument or undef otherwise. Uses _typed_traversal to find all ancestors. Example : Returns : Array of TermI objects. Args : First argument is the term of interest, second is the list of relationship type terms. =cut sub get_ancestor_terms{ my ($self, $term, @relationship_types) = @_; $self->throw("term must be a valid object, not undef") unless defined $term; return $self->_filter_unmarked( $self->_filter_repeated( $self->get_term_by_identifier( $self->_typed_traversal($self->_inverted_relationship_store, 0, $term->identifier, @relationship_types) ) ) ); } =head2 get_leaf_terms Title : get_leaf_terms Usage : get_leaf_terms(): TermI[] Function: Retrieves all leaf terms from the ontology. Leaf term is a term w/o descendants. Example : @leaf_terms = $obj->get_leaf_terms() Returns : Array of TermI objects. Args : =cut sub get_leaf_terms{ my ($self) = @_; my @leaf_terms; foreach my $term (values %{$self->_term_store}) { push @leaf_terms, $term if !defined $self->_relationship_store->{$term->identifier} && defined $self->_instantiated_terms_store->{$term->identifier}; } return @leaf_terms; } =head2 get_root_terms Title : get_root_terms Usage : get_root_terms(): TermI[] Function: Retrieves all root terms from the ontology. Root term is a term w/o descendants. Example : @root_terms = $obj->get_root_terms() Returns : Array of TermI objects. Args : =cut sub get_root_terms{ my ($self) = @_; my @root_terms; foreach my $term (values %{$self->_term_store}) { push @root_terms, $term if !defined $self->_inverted_relationship_store->{$term->identifier} && defined $self->_instantiated_terms_store->{$term->identifier}; } return @root_terms; } =head2 _filter_repeated Title : _filter_repeated Usage : @lst = $self->_filter_repeated(@old_lst); Function: Removes repeated terms Example : Returns : List of unique TermI objects Args : List of TermI objects =cut sub _filter_repeated{ my ($self, @args) = @_; my %h; foreach my $element (@args) { $h{$element->identifier} = $element if !defined $h{$element->identifier}; } return values %h; } =head2 get_all_terms Title : get_all_terms Usage : get_all_terms(): TermI[] Function: Retrieves all terms currently stored in the ontology. Example : @all_terms = $obj->get_all_terms() Returns : Array of TermI objects. Args : =cut sub get_all_terms{ my ($self) = @_; return $self->_filter_unmarked( values %{$self->_term_store} ); } =head2 find_terms Title : find_terms Usage : ($term) = $oe->find_terms(-identifier => "SO:0000263"); Function: Find term instances matching queries for their attributes. This implementation can efficiently resolve queries by identifier. Example : Returns : an array of zero or more Bio::Ontology::TermI objects Args : Named parameters. The following parameters should be recognized by any implementations: -identifier query by the given identifier -name query by the given name =cut sub find_terms{ my ($self,@args) = @_; my @terms; my ($id,$name) = $self->_rearrange([qw(IDENTIFIER NAME)],@args); if(defined($id)) { @terms = $self->get_term_by_identifier($id); } else { @terms = $self->get_all_terms(); } if(defined($name)) { @terms = grep { $_->name() eq $name; } @terms; } return @terms; } =head2 relationship_factory Title : relationship_factory Usage : $fact = $obj->relationship_factory() Function: Get/set the object factory to be used when relationship objects are created by the implementation on-the-fly. Example : Returns : value of relationship_factory (a Bio::Factory::ObjectFactoryI compliant object) Args : on set, a Bio::Factory::ObjectFactoryI compliant object =cut sub relationship_factory{ my $self = shift; return $self->{'relationship_factory'} = shift if @_; return $self->{'relationship_factory'}; } =head2 term_factory Title : term_factory Usage : $fact = $obj->term_factory() Function: Get/set the object factory to be used when term objects are created by the implementation on-the-fly. Note that this ontology engine implementation does not create term objects on the fly, and therefore setting this attribute is meaningless. Example : Returns : value of term_factory (a Bio::Factory::ObjectFactoryI compliant object) Args : on set, a Bio::Factory::ObjectFactoryI compliant object =cut sub term_factory{ my $self = shift; if(@_) { $self->warn("setting term factory, but ".ref($self). " does not create terms on-the-fly"); return $self->{'term_factory'} = shift; } return $self->{'term_factory'}; } =head2 _filter_unmarked Title : _filter_unmarked Usage : _filter_unmarked(TermI[] terms): TermI[] Function: Removes the uninstantiated terms from the list of terms Example : Returns : array of fully instantiated TermI objects Args : array of TermI objects =cut sub _filter_unmarked{ my ($self, @terms) = @_; my @filtered_terms = (); if ( scalar(@terms) >= 1) { foreach my $term (@terms) { push @filtered_terms, $term if defined $self->_instantiated_terms_store->{$term->identifier}; } } return @filtered_terms; } =head2 remove_term_by_id Title : remove_term_by_id Usage : remove_term_by_id(String id): TermI Function: Removes TermI object from the ontology engine using the string id as an identifier. Current implementation does not enforce consistency of the relationships using that term. Example : $term = $soe->remove_term_by_id($id); Returns : Object of class TermI or undef if not found. Args : The string identifier of a term. =cut sub remove_term_by_id{ my ($self, $id) = @_; if ( $self->get_term_by_identifier($id) ) { my $term = $self->{_term_store}->{$id}; delete $self->{_term_store}->{$id}; return $term; } else { $self->warn("Term with id '$id' is not in the term store"); return undef; } } =head2 to_string Title : to_string Usage : print $sv->to_string(); Function: Currently returns formatted string containing the number of terms and number of relationships from the ontology engine. Example : print $sv->to_string(); Returns : Args : =cut sub to_string{ my ($self) = @_; my $s = ""; $s .= "-- # Terms:\n"; $s .= scalar($self->get_all_terms)."\n"; $s .= "-- # Relationships:\n"; $s .= $self->_get_number_rels."\n"; return $s; } =head2 _unique_termid Title : _unique_termid Usage : Function: Returns a string that can be used as ID using fail-over approaches. If the identifier attribute is not set, it uses the combination of name and ontology name, provided both are set. If they aren't, it returns the name alone. Note that this is a private method. Call from inheriting classes but not from outside. Example : Returns : a string Args : a Bio::Ontology::TermI compliant object =cut sub _unique_termid{ my $self = shift; my $term = shift; return $term->identifier() if $term->identifier(); my $id = $term->ontology->name() if $term->ontology(); if($id) { $id .= '|'; } else { $id = ''; } $id .= $term->name(); } ################################################################# # aliases ################################################################# *get_relationship_types = \&get_predicate_terms; 1;