Raw content of Bio::Ontology::SimpleGOEngine # $Id: SimpleGOEngine.pm,v 1.3.2.6 2003/06/30 05:04:06 lapp Exp $ # # BioPerl module for Bio::Ontology::SimpleGOEngine # # Cared for by Christian M. Zmasek <czmasek@gnf.org> or <cmzmasek@yahoo.com> # # (c) Christian M. Zmasek, czmasek@gnf.org, 2002. # (c) GNF, Genomics Institute of the Novartis Research Foundation, 2002. # # You may distribute this module under the same terms as perl itself. # Refer to the Perl Artistic License (see the license accompanying this # software package, or see http://www.perl.com/language/misc/Artistic.html) # for the terms under which you may use, modify, and redistribute this module. # # THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED # WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF # MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. # # You may distribute this module under the same terms as perl itself # POD documentation - main docs before the code =head1 NAME SimpleGOEngine - a Ontology Engine for GO implementing OntologyEngineI =head1 SYNOPSIS use Bio::Ontology::SimpleGOEngine; my $parser = Bio::Ontology::SimpleGOEngine->new ( -defs_file => "/home/czmasek/GO/GO.defs", -files => ["/home/czmasek/GO/component.ontology", "/home/czmasek/GO/function.ontology", "/home/czmasek/GO/process.ontology"] ); my $engine = $parser->parse(); my $IS_A = Bio::Ontology::RelationshipType->get_instance( "IS_A" ); my $PART_OF = Bio::Ontology::RelationshipType->get_instance( "PART_OF" ); =head1 DESCRIPTION Needs Graph.pm from CPAN. =head1 FEEDBACK =head2 Mailing Lists User feedback is an integral part of the evolution of this and other Bioperl modules. Send your comments and suggestions preferably to the Bioperl mailing lists Your participation is much appreciated. bioperl-l@bioperl.org - General discussion http://bio.perl.org/MailList.html - About the mailing lists =head2 Reporting Bugs report bugs to the Bioperl bug tracking system to help us keep track the bugs and their resolution. Bug reports can be submitted via email or the web: bioperl-bugs@bio.perl.org http://bugzilla.bioperl.org/ =head1 AUTHOR Christian M. Zmasek Email: czmasek@gnf.org or cmzmasek@yahoo.com WWW: http://www.genetics.wustl.edu/eddy/people/zmasek/ Address: Genomics Institute of the Novartis Research Foundation 10675 John Jay Hopkins Drive San Diego, CA 92121 =head1 APPENDIX The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _ =cut # Let the code begin... package Bio::Ontology::SimpleGOEngine; use Graph::Directed; use vars qw( @ISA ); use strict; use Bio::Root::Root; use Bio::Ontology::RelationshipType; use Bio::Ontology::RelationshipFactory; use Bio::Ontology::OntologyEngineI; use constant TRUE => 1; use constant FALSE => 0; use constant IS_A => "IS_A"; use constant PART_OF => "PART_OF"; use constant TERM => "TERM"; use constant TYPE => "TYPE"; use constant ONTOLOGY => "ONTOLOGY"; @ISA = qw( Bio::Root::Root Bio::Ontology::OntologyEngineI ); =head2 new Title : new Usage : $engine = Bio::Ontology::SimpleGOEngine->new() Function: Creates a new SimpleGOEngine Returns : A new SimpleGOEngine object Args : =cut sub new { my( $class, @args ) = @_; my $self = $class->SUPER::new( @args ); $self->init(); return $self; } # new =head2 init Title : init() Usage : $engine->init(); Function: Initializes this Engine. Returns : Args : =cut sub init { my ( $self ) = @_; $self->{ "_is_a_relationship" } = Bio::Ontology::RelationshipType->get_instance( IS_A ); $self->{ "_part_of_relationship" } = Bio::Ontology::RelationshipType->get_instance( PART_OF ); $self->graph( Graph::Directed->new() ); # set defaults for the factories $self->relationship_factory(Bio::Ontology::RelationshipFactory->new( -type => "Bio::Ontology::Relationship")); } # init =head2 is_a_relationship Title : is_a_relationship() Usage : $IS_A = $engine->is_a_relationship(); Function: Returns a Bio::Ontology::RelationshipType object for "is-a" relationships Returns : Bio::Ontology::RelationshipType set to "IS_A" Args : =cut sub is_a_relationship { my ( $self, $value ) = @_; if ( defined $value ) { $self->throw( "Attempted to change immutable field" ); } return $self->{ "_is_a_relationship" }; } # is_a_relationship =head2 part_of_relationship Title : part_of_relationship() Usage : $PART_OF = $engine->part_of_relationship(); Function: Returns a Bio::Ontology::RelationshipType object for "part-of" relationships Returns : Bio::Ontology::RelationshipType set to "PART_OF" Args : =cut sub part_of_relationship { my ( $self, $value ) = @_; if ( defined $value ) { $self->throw( "Attempted to change immutable field" ); } return $self->{ "_part_of_relationship" }; } # part_of_relationship =head2 add_term Title : add_term Usage : $engine->add_term( $term_obj ); Function: Adds a Bio::Ontology::TermI to this engine Returns : true if the term was added and false otherwise (e.g., if the term already existed in the ontology engine) Args : Bio::Ontology::TermI =cut sub add_term { my ( $self, $term ) = @_; return FALSE if $self->has_term( $term ); my $goid = $self->_get_id($term); $self->graph()->add_vertex( $goid ); $self->graph()->set_attribute( TERM, $goid, $term ); return TRUE; } # add_term =head2 has_term Title : has_term Usage : $engine->has_term( $term ); Function: Checks whether this engine contains a particular term Returns : true or false Args : Bio::Ontology::TermI or erm identifier (e.g. "GO:0012345") =cut sub has_term { my ( $self, $term ) = @_; $term = $self->_get_id( $term ); if ( $self->graph()->has_vertex( $term ) ) { return TRUE; } else { return FALSE; } } # has_term =head2 add_relationship Title : add_relationship Usage : $engine->add_relationship( $relationship ); $engine->add_relatioship( $subject_term, $predicate_term, $object_term, $ontology ); $engine->add_relatioship( $subject_id, $predicate_id, $object_id, $ontology); Function: Adds a relationship to this engine Returns : true if successfully added, false otherwise Args : term id, Bio::Ontology::TermI (rel.type), term id, ontology or Bio::Ontology::TermI, Bio::Ontology::TermI (rel.type), Bio::Ontology::TermI, ontology or Bio::Ontology::RelationshipI =cut # term objs or term ids sub add_relationship { my ( $self, $child, $type, $parent, $ont ) = @_; if ( scalar( @_ ) == 2 ) { $self->_check_class( $child, "Bio::Ontology::RelationshipI" ); $type = $child->predicate_term(); $parent = $child->object_term(); $ont = $child->ontology(); $child = $child->subject_term(); } $self->_check_class( $type, "Bio::Ontology::TermI" ); my $parentid = $self->_get_id( $parent ); my $childid = $self->_get_id( $child ); my $g = $self->graph(); $self->add_term($child) unless $g->has_vertex( $childid ); $self->add_term($parent) unless $g->has_vertex( $parentid ); # This prevents multi graphs. if ( $g->has_edge( $parentid, $childid ) ) { return FALSE; } $g->add_edge( $parentid, $childid ); $g->set_attribute( TYPE, $parentid, $childid, $type ); $g->set_attribute( ONTOLOGY, $parentid, $childid, $ont ); return TRUE; } # add_relationship =head2 get_relationships Title : get_relationships Usage : $engine->get_relationships( $term ); Function: Returns all relationships of a term, or all relationships in the graph if no term is specified. Returns : Relationship[] Args : term id or Bio::Ontology::TermI =cut sub get_relationships { my ( $self, $term ) = @_; my $g = $self->graph(); # obtain the ID if term provided my $termid; if($term) { $termid = $self->_get_id( $term ); # check for presence in the graph if ( ! $g->has_vertex( $termid ) ) { $self->throw( "no term with identifier \"$termid\" in ontology" ); } } # now build the relationships my $relfact = $self->relationship_factory(); # we'll build the relationships from edges my @rels = (); my @edges = $g->edges($termid); while(@edges) { my $startid = shift(@edges); my $endid = shift(@edges); my $rel = $relfact->create_object( -subject_term => $self->get_terms($endid), -object_term => $self->get_terms($startid), -predicate_term => $g->get_attribute(TYPE, $startid, $endid), -ontology => $g->get_attribute(ONTOLOGY, $startid, $endid)); push( @rels, $rel ); } return @rels; } # get_relationships =head2 get_all_relationships Title : get_all_relationships Usage : @rels = $engine->get_all_relationships(); Function: Returns all relationships in the graph. Returns : Relationship[] Args : =cut sub get_all_relationships { return shift->get_relationships(@_); } # get_all_relationships =head2 get_predicate_terms Title : get_predicate_terms Usage : $engine->get_predicate_terms(); Function: Returns the types of relationships this engine contains Returns : Bio::Ontology::RelationshipType[] Args : =cut sub get_predicate_terms { my ( $self ) = @_; my @a = ( $self->is_a_relationship(), $self->part_of_relationship() ); return @a; } # get_predicate_terms =head2 get_child_terms Title : get_child_terms Usage : $engine->get_child_terms( $term_obj, @rel_types ); $engine->get_child_terms( $term_id, @rel_types ); Function: Returns the children of this term Returns : Bio::Ontology::TermI[] Args : Bio::Ontology::TermI, Bio::Ontology::RelationshipType[] or term id, Bio::Ontology::RelationshipType[] if NO Bio::Ontology::RelationshipType[] is indicated: children of ALL types are returned =cut sub get_child_terms { my ( $self, $term, @types ) = @_; return $self->_get_child_parent_terms_helper( $term, TRUE, @types ); } # get_child_terms =head2 get_descendant_terms Title : get_descendant_terms Usage : $engine->get_descendant_terms( $term_obj, @rel_types ); $engine->get_descendant_terms( $term_id, @rel_types ); Function: Returns the descendants of this term Returns : Bio::Ontology::TermI[] Args : Bio::Ontology::TermI, Bio::Ontology::RelationshipType[] or term id, Bio::Ontology::RelationshipType[] if NO Bio::Ontology::RelationshipType[] is indicated: descendants of ALL types are returned =cut sub get_descendant_terms { my ( $self, $term, @types ) = @_; my %ids = (); my @ids = (); $term = $self->_get_id( $term ); if ( ! $self->graph()->has_vertex( $term ) ) { $self->throw( "Ontology does not contain a term with an identifier of \"$term\"" ); } $self->_get_descendant_terms_helper( $term, \%ids, \@types ); while( ( my $id ) = each ( %ids ) ) { push( @ids, $id ); } return $self->get_terms( @ids ); } # get_descendant_terms =head2 get_parent_terms Title : get_parent_terms Usage : $engine->get_parent_terms( $term_obj, @rel_types ); $engine->get_parent_terms( $term_id, @rel_types ); Function: Returns the parents of this term Returns : Bio::Ontology::TermI[] Args : Bio::Ontology::TermI, Bio::Ontology::RelationshipType[] or term id, Bio::Ontology::RelationshipType[] if NO Bio::Ontology::RelationshipType[] is indicated: parents of ALL types are returned =cut sub get_parent_terms { my ( $self, $term, @types ) = @_; return $self->_get_child_parent_terms_helper( $term, FALSE, @types ); } # get_parent_terms =head2 get_ancestor_terms Title : get_ancestor_terms Usage : $engine->get_ancestor_terms( $term_obj, @rel_types ); $engine->get_ancestor_terms( $term_id, @rel_types ); Function: Returns the ancestors of this term Returns : Bio::Ontology::TermI[] Args : Bio::Ontology::TermI, Bio::Ontology::RelationshipType[] or term id, Bio::Ontology::RelationshipType[] if NO Bio::Ontology::RelationshipType[] is indicated: ancestors of ALL types are returned =cut sub get_ancestor_terms { my ( $self, $term, @types ) = @_; my %ids = (); my @ids = (); $term = $self->_get_id( $term ); if ( ! $self->graph()->has_vertex( $term ) ) { $self->throw( "Ontology does not contain a term with an identifier of \"$term\"" ); } $self->_get_ancestor_terms_helper( $term, \%ids, \@types ); while( ( my $id ) = each ( %ids ) ) { push( @ids, $id ); } return $self->get_terms( @ids ); } # get_ancestor_terms =head2 get_leaf_terms Title : get_leaf_terms Usage : $engine->get_leaf_terms(); Function: Returns the leaf terms Returns : Bio::Ontology::TermI[] Args : =cut sub get_leaf_terms { my ( $self ) = @_; my @a = $self->graph()->sink_vertices(); return $self->get_terms( @a ); } =head2 get_root_terms() Title : get_root_terms Usage : $engine->get_root_terms(); Function: Returns the root terms Returns : Bio::Ontology::TermI[] Args : =cut sub get_root_terms { my ( $self ) = @_; my @a = $self->graph()->source_vertices(); return $self->get_terms( @a ); } =head2 get_terms Title : get_terms Usage : @terms = $engine->get_terms( "GO:1234567", "GO:2234567" ); Function: Returns term objects with given identifiers Returns : Bio::Ontology::TermI[], or the term corresponding to the first identifier if called in scalar context Args : term ids[] =cut sub get_terms { my ( $self, @ids ) = @_; my @terms = (); foreach my $id ( @ids ) { if ( $self->graph()->has_vertex( $id ) ) { push( @terms, $self->graph()->get_attribute( TERM, $id ) ); } } return wantarray ? @terms : shift(@terms); } # get_terms =head2 get_all_terms Title : get_all_terms Usage : $engine->get_all_terms(); Function: Returns all terms in this engine Returns : Bio::Ontology::TermI[] Args : =cut sub get_all_terms { my ( $self ) = @_; return( $self->get_terms( $self->graph()->vertices() ) ); } # get_all_terms =head2 find_terms Title : find_terms Usage : ($term) = $oe->find_terms(-identifier => "SO:0000263"); Function: Find term instances matching queries for their attributes. This implementation can efficiently resolve queries by identifier. Example : Returns : an array of zero or more Bio::Ontology::TermI objects Args : Named parameters. The following parameters should be recognized by any implementations: -identifier query by the given identifier -name query by the given name =cut sub find_terms{ my ($self,@args) = @_; my @terms; my ($id,$name) = $self->_rearrange([qw(IDENTIFIER NAME)],@args); if(defined($id)) { @terms = $self->get_terms($id); } else { @terms = $self->get_all_terms(); } if(defined($name)) { @terms = grep { $_->name() eq $name; } @terms; } return @terms; } =head2 relationship_factory Title : relationship_factory Usage : $fact = $obj->relationship_factory() Function: Get/set the object factory to be used when relationship objects are created by the implementation on-the-fly. Example : Returns : value of relationship_factory (a Bio::Factory::ObjectFactoryI compliant object) Args : on set, a Bio::Factory::ObjectFactoryI compliant object =cut sub relationship_factory{ my $self = shift; return $self->{'relationship_factory'} = shift if @_; return $self->{'relationship_factory'}; } =head2 term_factory Title : term_factory Usage : $fact = $obj->term_factory() Function: Get/set the object factory to be used when term objects are created by the implementation on-the-fly. Note that this ontology engine implementation does not create term objects on the fly, and therefore setting this attribute is meaningless. Example : Returns : value of term_factory (a Bio::Factory::ObjectFactoryI compliant object) Args : on set, a Bio::Factory::ObjectFactoryI compliant object =cut sub term_factory{ my $self = shift; if(@_) { $self->warn("setting term factory, but ".ref($self). " does not create terms on-the-fly"); return $self->{'term_factory'} = shift; } return $self->{'term_factory'}; } =head2 graph Title : graph() Usage : $engine->graph(); Function: Returns the Graph this engine is based on Returns : Graph Args : =cut sub graph { my ( $self, $value ) = @_; if ( defined $value ) { $self->_check_class( $value, "Graph::Directed" ); $self->{ "_graph" } = $value; } return $self->{ "_graph" }; } # graph # Internal methods # ---------------- # Checks the correct format of a GOBO-formatted id # Gets the id out of a term or id string sub _get_id { my ( $self, $term ) = @_; if(ref($term)) { return $term->GO_id() if $term->isa("Bio::Ontology::GOterm"); # if not a GOterm, use standard API $self->throw("Object doesn't implement Bio::Ontology::TermI. ". "Bummer.") unless $term->isa("Bio::Ontology::TermI"); $term = $term->identifier(); } # don't fuss if it looks remotely standard return $term if $term =~ /^[A-Z]{1,8}:\d{3,}$/; # prefix with something if only numbers if($term =~ /^\d+$/) { $self->warn(ref($self).": identifier [$term] is only numbers - ". "prefixing with 'GO:'"); return "GO:" . $term; } # we shouldn't have gotten here if it's at least a remotely decent ID $self->warn(ref($self). ": Are you sure '$term' is a valid identifier? ". "If you see problems, this may be the cause."); return $term; } # _get_id # Helper for getting children and parent terms sub _get_child_parent_terms_helper { my ( $self, $term, $do_get_child_terms, @types ) = @_; foreach my $type ( @types ) { $self->_check_class( $type, "Bio::Ontology::TermI" ); } my @relative_terms = (); $term = $self->_get_id( $term ); if ( ! $self->graph()->has_vertex( $term ) ) { $self->throw( "Ontology does not contain a term with an identifier of \"$term\"" ); } my @all_relative_terms = (); if ( $do_get_child_terms ) { @all_relative_terms = $self->graph()->successors( $term ); } else { @all_relative_terms = $self->graph()->predecessors( $term ); } foreach my $relative ( @all_relative_terms ) { if ( scalar( @types ) > 0 ) { foreach my $type ( @types ) { my $relative_type; if ( $do_get_child_terms ) { $relative_type = $self->graph()->get_attribute( TYPE, $term, $relative ); } else { $relative_type = $self->graph()->get_attribute( TYPE, $relative, $term ); } if ( $relative_type->equals( $type ) ) { push( @relative_terms, $relative ); } } } else { push( @relative_terms, $relative ); } } return $self->get_terms( @relative_terms ); } # get_child_terms # Recursive helper sub _get_descendant_terms_helper { my ( $self, $term, $ids_ref, $types_ref ) = @_; my @child_terms = $self->get_child_terms( $term, @$types_ref ); if ( scalar( @child_terms ) < 1 ) { return; } foreach my $child_term ( @child_terms ) { my $child_term_id = $child_term->identifier(); $ids_ref->{ $child_term_id } = 0; $self->_get_descendant_terms_helper( $child_term_id, $ids_ref, $types_ref ); } } # _get_descendant_terms_helper # Recursive helper sub _get_ancestor_terms_helper { my ( $self, $term, $ids_ref, $types_ref ) = @_; my @parent_terms = $self->get_parent_terms( $term, @$types_ref ); if ( scalar( @parent_terms ) < 1 ) { return; } foreach my $parent_term ( @parent_terms ) { my $parent_term_id = $parent_term->identifier(); $ids_ref->{ $parent_term_id } = 0; $self->_get_ancestor_terms_helper( $parent_term_id, $ids_ref, $types_ref ); } } # get_ancestor_terms_helper sub _check_class { my ( $self, $value, $expected_class ) = @_; if ( ! defined( $value ) ) { $self->throw( "Found [undef] where [$expected_class] expected" ); } elsif ( ! ref( $value ) ) { $self->throw( "Found [scalar] where [$expected_class] expected" ); } elsif ( ! $value->isa( $expected_class ) ) { $self->throw( "Found [" . ref( $value ) . "] where [$expected_class] expected" ); } } # _check_class ################################################################# # aliases ################################################################# *get_relationship_types = \&get_predicate_terms; 1;