Raw content of Bio::OntologyIO::InterProParser # $GNF: projects/gi/symgene/src/perl/seqproc/Bio/OntologyIO/InterProParser.pm,v 1.5 2003/02/07 22:05:58 pdimitro Exp $ # # BioPerl module for InterProParser # # Cared for by Peter Dimitrov <dimitrov@gnf.org> # # Copyright Peter Dimitrov # (c) Peter Dimitrov, dimitrov@gnf.org, 2002. # (c) GNF, Genomics Institute of the Novartis Research Foundation, 2002. # # You may distribute this module under the same terms as perl itself. # Refer to the Perl Artistic License (see the license accompanying this # software package, or see http://www.perl.com/language/misc/Artistic.html) # for the terms under which you may use, modify, and redistribute this module. # # THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED # WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF # MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. # # POD documentation - main docs before the code =head1 NAME InterProParser - Parser for InterPro xml files. =head1 SYNOPSIS # don't use this module directly - use Bio::OntologyIO with instead my $ipp = Bio::OntologyIO->new( -format => 'interpro', -file => 't/data/interpro.xml', -ontology_engine => 'simple' ); =head1 DESCRIPTION Use InterProParser to parse InterPro files in xml format. Typical use is the interpro.xml file published by EBI. The xml records should follow the format described in interpro.dtd, although the dtd file is not needed, and the XML file will not be validated against it. =head1 FEEDBACK =head2 Mailing Lists User feedback is an integral part of the evolution of this and other Bioperl modules. Send your comments and suggestions preferably to the Bioperl mailing list. Your participation is much appreciated. bioperl-l@bioperl.org - General discussion http://bioperl.org/MailList.shtml - About the mailing lists =head2 Reporting Bugs Report bugs to the Bioperl bug tracking system to help us keep track of the bugs and their resolution. Bug reports can be submitted via email or the web: bioperl-bugs@bioperl.org http://bugzilla.bioperl.org/ =head1 AUTHOR - Peter Dimitrov Email dimitrov@gnf.org =head1 CONTRIBUTORS Additional contributors names and emails here =head1 APPENDIX The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _ =cut # Let the code begin... package Bio::OntologyIO::InterProParser; use vars qw(@ISA); use strict; #use Carp; use XML::Parser::PerlSAX; use Bio::Ontology::SimpleOntologyEngine; use Bio::Ontology::TermFactory; use Bio::OntologyIO; use Bio::OntologyIO::Handlers::InterProHandler; @ISA = qw( Bio::OntologyIO ); =head2 new Title : new Usage : Function: Initializes objects needed for parsing. Example : $ipp = Bio::OntologyIO::InterProParser->new( -file => 't/data/interpro.xml', -ontology_engine => 'simple' ) Returns : Object of class Bio::OntologyIO::InterProParser. Args : -file - file name -ontology_engine - type of ontology engine. Should satisfy the OntologyEngine interface requirements. Currently the only option is 'simple'. In the future Graph.pm based engine will be added to the choices. =cut # in reality we let OntologyIO handle the first pass initialization # and instead override _initialize(). sub _initialize{ my $self = shift; $self->SUPER::_initialize(@_); my ($eng,$eng_type,$name) = $self->_rearrange([qw(ENGINE ONTOLOGY_ENGINE ONTOLOGY_NAME) ], @_); my $ip_h = Bio::OntologyIO::Handlers::InterProHandler->new( -ontology_name => $name); if(! $eng) { if(lc($eng_type) eq 'simple') { $eng = Bio::Ontology::SimpleOntologyEngine->new(); } else { $self->throw("ontology engine type '$eng_type' ". "not implemented yet"); } } if($eng->isa("Bio::Ontology::OntologyI")) { $ip_h->ontology($eng); $eng = $eng->engine() if $eng->can('engine'); } $self->{_ontology_engine} = $eng; $ip_h->ontology_engine($eng); $self->{_parser} = XML::Parser::PerlSAX->new( Handler => $ip_h ); $self->{_interpro_handler} = $ip_h; # default term object factory $self->term_factory(Bio::Ontology::TermFactory->new( -type => "Bio::Ontology::InterProTerm")) unless $self->term_factory(); $ip_h->term_factory($self->term_factory()); } =head2 parse Title : parse Usage : Function: Performs the actual parsing. Example : $ipp->parse(); Returns : Args : =cut sub parse{ my $self = shift; my $ret = $self->{_parser}->parse( Source => { SystemId => $self->file() } ); $self->_is_parsed(1); return $ret; } =head2 next_ontology Title : next_ontology Usage : $ipp->next_ontology() Function: Parses the input file and returns the next InterPro ontology available. Usually there will be only one ontology returned from an InterPro XML input. Example : $ipp->next_ontology(); Returns : Returns the ontology as a L<Bio::Ontology::OntologyEngineI> compliant object. Args : =cut sub next_ontology{ my $self = shift; $self->parse() unless $self->_is_parsed(); # there is only one ontology in an InterPro source file if(exists($self->{'_ontology_engine'})) { my $ont = $self->{_interpro_handler}->ontology(); delete $self->{_ontology_engine}; return $ont; } return undef; } =head2 _is_parsed Title : _is_parsed Usage : $obj->_is_parsed($newval) Function: Example : Returns : value of _is_parsed (a scalar) Args : on set, new value (a scalar or undef, optional) =cut sub _is_parsed{ my $self = shift; return $self->{'_is_parsed'} = shift if @_; return $self->{'_is_parsed'}; } =head2 secondary_accessions_map Title : secondary_accessions_map Usage : $obj->secondary_accessions_map() Function: This method is merely for convenience, and one should normally use the InterProTerm secondary_ids method to access the secondary accessions. Example : $map = $interpro_parser->secondary_accessions_map; Returns : Reference to a hash that maps InterPro identifier to an array reference of secondary accessions following the InterPro xml schema. Args : Empty hash reference =cut sub secondary_accessions_map{ my ($self) = @_; return $self->{_interpro_handler}->{secondary_accessions_map}; } 1;