Bio::OntologyIO
dagflat
Toolbar
Summary
dagflat - a base class parser for GO flat-file type formats
Package variables
No package variables defined.
Included modules
Inherit
Synopsis
use Bio::OntologyIO;
# do not use directly -- use via Bio::OntologyIO
# e.g., the GO parser is a simple extension of this class
my $parser = Bio::OntologyIO->new
( -format => "go",
-defs_file => "/home/czmasek/GO/GO.defs",
-files => ["/home/czmasek/GO/component.ontology",
"/home/czmasek/GO/function.ontology",
"/home/czmasek/GO/process.ontology"] );
my $go_ontology = $parser->next_ontology();
my $IS_A = Bio::Ontology::RelationshipType->get_instance( "IS_A" );
my $PART_OF = Bio::Ontology::RelationshipType->get_instance( "PART_OF" );
Description
Needs Graph.pm from CPAN.
Methods
Methods description
Title : _defs_io Usage : $obj->_defs_io($newval) Function: Get/set the Bio::Root::IO instance representing the definition file, if provided (see defs_file()). Example : Returns : value of _defs_io (a Bio::Root::IO object) Args : on set, new value (a Bio::Root::IO object or undef, optional) |
Title : _flat_files Usage : $files_to_parse = $parser->_flat_files(); Function: Get the array of ontology flat files that need to be parsed.
Note that this array will decrease in elements over the
parsing process. Therefore, it's value outside of this
module will be limited. Also, be careful not to alter the
array unless you know what you are doing.
Returns : a reference to an array of zero or more strings
Args : none |
Title : defs_file Usage : $parser->defs_file( "GO.defs" ); Function: Set/get for the term definitions filename. Returns : The term definitions file name [string]. Args : On set, the term definitions file name [string] (optional). |
Title : next_ontology Usage : Function: Get the next available ontology from the parser. This is the method prescribed by Bio::OntologyIO. Example : Returns : An object implementing Bio::Ontology::OntologyI, and undef if there is no more ontology in the input. Args : |
Title : ontology_name Usage : $obj->ontology_name($newval) Function: Get/set the name of the ontology parsed by this module. Example : Returns : value of ontology_name (a scalar) Args : on set, new value (a scalar or undef, optional) |
Title : parse() Usage : $parser->parse(); Function: Parses the files set with "new" or with methods defs_file and _flat_files.
Normally you should not need to call this method as it will
be called automatically upon the first call to
next_ontology().
Returns : [Bio::Ontology::OntologyEngineI]
Args : |
Methods code
_add_ontology | description | prev | next | Top |
sub _add_ontology
{ my $self = shift;
$self->{'_ontologies'} = [] unless exists($self->{'_ontologies'});
foreach my $ont (@_) {
$self->throw(ref($ont)." does not implement Bio::Ontology::OntologyI")
unless ref($ont) && $ont->isa("Bio::Ontology::OntologyI");
$ont->name($self->ontology_name) unless $ont->name();
push(@{$self->{'_ontologies'}}, $ont);
}
}
} |
sub _add_relationship
{ my ( $self, $parent, $child, $type, $ont ) = @_;
$self->_ont_engine()->add_relationship( $child, $type, $parent, $ont );
}
} |
sub _add_term
{ my ( $self, $term, $ont ) = @_;
$term->ontology($ont) if $ont && (! $term->ontology);
$self->_ont_engine()->add_term( $term );
}
} |
sub _count_spaces
{ my ( $self, $line ) = @_;
if ( $line =~ /^(\s+)/ ) {
return length( $1 );
}
else {
return 0;
}
}
} |
sub _create_ont_entry
{ my ( $self, $name, $termid, $def, $cmt, $dbxrefs, $obsolete ) = @_;
if((!defined($obsolete)) && (index(lc($name),"obsolete") == 0)) {
$obsolete = 1;
}
my $term = $self->term_factory->create_object(-name => $name,
-identifier => $termid,
-definition => $def,
-comment => $cmt,
-dblinks => $dbxrefs,
-is_obsolete => $obsolete);
return $term;
}
} |
sub _defs_io
{ my $self = shift;
return $self->{'_defs_io'} = shift if @_;
return $self->{'_defs_io'}; } |
sub _done
{ my ( $self, $value ) = @_;
if ( defined $value ) {
$self->{ "_done" } = $value;
}
return $self->{ "_done" };
}
} |
sub _flat_files
{ my $self = shift;
$self->{_flat_files} = [] unless exists($self->{_flat_files});
return $self->{_flat_files};
}
} |
sub _get_db_cross_refs
{ my ( $self, $line ) = @_;
my @refs = ();
while ( $line =~ /;([^;^<^%^:]+:[^;^<^%^:]+)/g ) {
my $ref = $1;
if ( $ref =~ /synonym/ || $ref =~ /[A-Z]{1,8}:\d{3,}/ ) {
next;
}
$ref =~ s/\s+$//;
$ref =~ s/^\s+//;
push( @refs, $ref );
}
return @refs;
}
} |
sub _get_first_termid
{ my ( $self, $line ) = @_;
if ( $line =~ /;\s*([A-Z]{1,8}:\d{3,})/ ) {
return $1;
}
else {
$self->throw( "format error: no term id in line\" $line\"" );
}
}
} |
sub _get_isa_termids
{ my ( $self, $line ) = @_;
my @ids = ();
$line =~ s/[A-Z]{1,8}:\d{3,}//;
while ( $line =~ /%[^<^,]*?([A-Z]{1,8}:\d{3,})/g ) {
push( @ids, $1 );
}
return @ids;
}
} |
sub _get_name
{ my ( $self, $line, $termid ) = @_;
if ( $line =~ /([^;<%]+);\s*$termid/ ) {
my $name = $1;
$name =~ s/\s+$//;
$name =~ s/^\s+//;
if(index($name,'$') == 0) {
$name = substr($name,1);
$self->ontology_name(join(" ",split(/_/,$name)))
unless $self->ontology_name();
}
return $name;
}
else {
return undef;
}
}
} |
sub _get_partof_termids
{ my ( $self, $line ) = @_;
my @ids = ();
$line =~ s/[A-Z]{1,8}:\d{3,}//;
while ( $line =~ /<[^%^,]*?([A-Z]{1,8}:\d{3,})/g ) {
push( @ids, $1 );
}
return @ids;
}
} |
sub _get_secondary_termids
{ my ( $self, $line ) = @_;
my @secs = ();
while ( $line =~ /,\s*([A-Z]{1,8}:\d{3,})/g ) {
my $sec = $1;
push( @secs, $sec );
}
return @secs;
}
} |
sub _get_synonyms
{ my ( $self, $line ) = @_;
my @synonyms = ();
while ( $line =~ /synonym\s*:\s*([^;^<^%]+)/g ) {
my $syn = $1;
$syn =~ s/\s+$//;
$syn =~ s/^\s+//;
push( @synonyms, $syn );
}
return @synonyms;
}
} |
sub _has_term
{ my $self = shift;
return $self->_ont_engine()->has_term( @_ );
}
} |
sub _initialize
{ my ($self, @args) = @_;
$self->SUPER::_initialize( @args );
my ( $defs_file_name,$files,$name,$eng ) =
$self->_rearrange([qw( DEFS_FILE
FILES
ONTOLOGY_NAME
ENGINE)
],
@args );
$self->_done( FALSE );
$self->_not_first_record( FALSE );
$self->_term( "" );
delete $self->{'_ontologies'};
$eng = Bio::Ontology::SimpleGOEngine->new() unless $eng;
if($eng->isa("Bio::Ontology::OntologyI")) {
$self->ontology_name($eng->name());
$eng = $eng->engine() if $eng->can('engine');
}
$self->_ont_engine($eng);
$self->defs_file( $defs_file_name );
$self->{_flat_files} = $files ? ref($files) ? $files : [$files] : [];
$self->ontology_name($name) if $name;
}
} |
sub _is_a_relationship
{ my $self = shift;
return $self->_ont_engine()->is_a_relationship(@_);
}
} |
sub _next_term
{ my ( $self ) = @_;
if ( ($self->_done() == TRUE) || (! $self->_defs_io())) {
return undef;
}
my $line = "";
my $termid = "";
my $next_term = $self->_term();
my $def = "";
my $comment = "";
my @def_refs = ();
my $isobsolete;
while( $line = ( $self->_defs_io->_readline() ) ) {
if ( $line !~ /\S/
|| $line =~ /^\s*!/ ) {
next;
}
elsif ( $line =~ /^\s*term:\s*(.+)/ ) {
$self->_term( $1 );
last if $self->_not_first_record();
$next_term = $1;
$self->_not_first_record( TRUE );
}
elsif ( $line =~ /^\s*[a-z]{1,8}id:\s*(.+)/ ) {
$termid = $1;
}
elsif ( $line =~ /^\s*definition:\s*(.+)/ ) {
$def = $1;
$isobsolete = 1 if index($def,"OBSOLETE") == 0;
}
elsif ( $line =~ /^\s*definition_reference:\s*(.+)/ ) {
push( @def_refs, $1 );
}
elsif ( $line =~ /^\s*comment:\s*(.+)/ ) {
$comment = $1;
}
}
$self->_done( TRUE ) unless $line;
return $self->_create_ont_entry( $next_term, $termid, $def,
$comment,\@ def_refs, $isobsolete);
}
} |
sub _not_first_record
{ my ( $self, $value ) = @_;
if ( defined $value ) {
$self->{ "_not_first_record" } = $value;
}
return $self->{ "_not_first_record" };
}
} |
sub _ont_engine
{ my ( $self, $value ) = @_;
if ( defined $value ) {
$self->{ "_ont_engine" } = $value;
}
return $self->{ "_ont_engine" };
}
} |
sub _parse_flat_file
{ my $self = shift;
my $ont = shift;
my @stack = ();
my $prev_spaces = -1;
my $prev_term = "";
while( my $line = $self->_readline() ) {
if ( $line =~ /^!/ ) {
next;
}
my $current_term = $self->_get_first_termid( $line );
my @isa_parents = $self->_get_isa_termids( $line );
my @partof_parents = $self->_get_partof_termids( $line );
my @syns = $self->_get_synonyms( $line );
my @sec_go_ids = $self->_get_secondary_termids( $line );
my @cross_refs = $self->_get_db_cross_refs( $line );
if ( ! $self->_has_term( $current_term ) ) {
my $term =$self->_create_ont_entry($self->_get_name($line,
$current_term),
$current_term );
$self->_add_term( $term, $ont );
}
my $current_term_object = $self->_ont_engine()->get_terms( $current_term );
$current_term_object->add_dblink( @cross_refs );
$current_term_object->add_secondary_id( @sec_go_ids );
$current_term_object->add_synonym( @syns );
unless ( $line =~ /^\$/ ) {
$current_term_object->ontology( $ont );
}
foreach my $parent ( @isa_parents ) {
if ( ! $self->_has_term( $parent ) ) {
my $term = $self->_create_ont_entry($self->_get_name($line,
$parent),
$parent );
$self->_add_term( $term, $ont );
}
$self->_add_relationship( $parent,
$current_term,
$self->_is_a_relationship(),
$ont);
}
foreach my $parent ( @partof_parents ) {
if ( ! $self->_has_term( $parent ) ) {
my $term = $self->_create_ont_entry($self->_get_name($line,
$parent),
$parent );
$self->_add_term( $term, $ont );
}
$self->_add_relationship( $parent,
$current_term,
$self->_part_of_relationship(),
$ont);
}
my $current_spaces = $self->_count_spaces( $line );
if ( $current_spaces != $prev_spaces ) {
if ( $current_spaces == $prev_spaces + 1 ) {
push( @stack, $prev_term );
}
elsif ( $current_spaces < $prev_spaces ) {
my $n = $prev_spaces - $current_spaces;
for ( my $i = 0; $i < $n; ++$i ) {
pop( @stack );
}
}
else {
$self->throw( "format error (file ".$self->file.")" );
}
}
my $parent = $stack[ @stack - 1 ];
if ( index($line,'$') != 0 ) {
if ( $line !~ /^\s*[<%]/ ) {
$self->throw( "format error (file ".$self->file.")" );
}
my $reltype = ($line =~ /^\s*</) ?
$self->_part_of_relationship() :
$self->_is_a_relationship();
$self->_add_relationship( $parent, $current_term, $reltype,
$ont);
}
$prev_spaces = $current_spaces;
$prev_term = $current_term;
}
return $ont;
}
} |
sub _part_of_relationship
{ my $self = shift;
return $self->_ont_engine()->part_of_relationship(@_);
}
} |
sub _term
{ my ( $self, $value ) = @_;
if ( defined $value ) {
$self->{ "_term" } = $value;
}
return $self->{ "_term" };
}
} |
sub defs_file
{ my $self = shift;
if ( @_ ) {
my $f = shift;
$self->{ "_defs_file_name" } = $f;
$self->_defs_io->close() if $self->_defs_io();
if(defined($f)) {
$self->_defs_io( Bio::Root::IO->new( -input => $f ) );
}
}
return $self->{ "_defs_file_name" };
}
} |
sub next_ontology
{ my $self = shift;
$self->parse() unless exists($self->{'_ontologies'});
return shift(@{$self->{'_ontologies'}}) if exists($self->{'_ontologies'});
return undef; } |
sub ontology_name
{ my $self = shift;
return $self->{'ontology_name'} = shift if @_;
return $self->{'ontology_name'}; } |
sub parse
{ my $self = shift;
$self->term_factory(Bio::Ontology::TermFactory->new(
-type => "Bio::Ontology::Term"))
unless $self->term_factory();
my $ont = Bio::Ontology::Ontology->new(-name => $self->ontology_name(),
-engine => $self->_ont_engine());
while( my $term = $self->_next_term() ) {
$self->_add_term( $term, $ont );
}
foreach ($self->_part_of_relationship(), $self->_is_a_relationship()) {
$_->ontology($ont);
}
if(! $self->_fh) {
$self->_initialize_io(-file => shift(@{$self->_flat_files()}));
}
while($self->_fh) {
$self->_parse_flat_file($ont);
if(@{$self->_flat_files()}) {
$self->close();
$self->_initialize_io(-file => shift(@{$self->_flat_files()}));
} else {
last; }
}
$self->_add_ontology($ont);
return $self->_ont_engine();
}
} |
General documentation
User feedback is an integral part of the evolution of this and other
Bioperl modules. Send your comments and suggestions preferably to the
Bioperl mailing lists Your participation is much appreciated.
bioperl-l@bioperl.org - General discussion
http://bio.perl.org/MailList.html - About the mailing lists
report bugs to the Bioperl bug tracking system to help us keep track
the bugs and their resolution. Bug reports can be submitted via
email or the web:
bioperl-bugs@bio.perl.org
http://bugzilla.bioperl.org/
Hilmar Lapp, hlapp at gmx.net
The rest of the documentation details each of the object
methods. Internal methods are usually preceded with a _
Title : new
Usage : $parser = Bio::OntologyIO->new(
-format => "go",
-defs_file => "/path/to/GO.defs",
-files => ["/path/to/component.ontology",
"/path/to/function.ontology",
"/path/to/process.ontology"] );
Function: Creates a new dagflat parser.
Returns : A new dagflat parser object, implementing Bio::OntologyIO.
Args : -defs_file => the name of the file holding the term
definitions
-files => a single ontology flat file holding the
term relationships, or an array ref holding
the file names (for GO, there will usually be
3 files: component.ontology, function.ontology,
process.ontology)
-file => if there is only a single flat file, it may
also be specified via the -file parameter
-ontology_name => the name of the ontology; if not specified the
parser will auto-discover it by using the term
that starts with a '$', and converting underscores
to spaces
-engine => the Bio::Ontology::OntologyEngineI object
to be reused (will be created otherwise); note
that every Bio::Ontology::OntologyI will
qualify as well since that one inherits from the
former.