Bio::Phenotype::OMIM
OMIMparser
Toolbar
Summary
OMIMparser - parser for the OMIM database
Package variables
No package variables defined.
Included modules
Inherit
Synopsis
use Bio::Phenotype::OMIM::OMIMparser;
# The OMIM database is available as textfile at:
# ftp://ncbi.nlm.nih.gov/repository/OMIM/omim.txt.Z
# The genemap is available as textfile at:
# ftp://ncbi.nlm.nih.gov/repository/OMIM/genemap
$omim_parser = Bio::Phenotype::OMIM::OMIMparser->new( -genemap => "/path/to/genemap",
-omimtext => "/path/to/omim.txt" );
while ( my $omim_entry = $omim_parser->next_phenotype() ) {
# This prints everything.
print( $omim_entry->to_string() );
print "\n\n";
# This gets individual data (some of them object-arrays)
# (and illustrates the relevant methods of OMIMentry).
my $numb = $omim_entry->MIM_number(); # *FIELD* NO
my $title = $omim_entry->title(); # *FIELD* TI - first line
my $alt = $omim_entry->alternative_titles_and_symbols(); # *FIELD* TI - additional lines
my $mtt = $omim_entry->more_than_two_genes(); # "#" before title
my $sep = $omim_entry->is_separate(); # "*" before title
my $desc = $omim_entry->description(); # *FIELD* TX
my $mm = $omim_entry->mapping_method(); # from genemap
my $gs = $omim_entry->gene_status(); # from genemap
my $cr = $omim_entry->created(); # *FIELD* CD
my $cont = $omim_entry->contributors(); # *FIELD* CN
my $ed = $omim_entry->edited(); # *FIELD* ED
my $sa = $omim_entry->additional_references(); # *FIELD* SA
my $cs = $omim_entry->clinical_symptoms(); # *FIELD* CS
my $comm = $omim_entry->comment(); # from genemap
my $mini_mim = $omim_entry->miniMIM(); # *FIELD* MN
# A Bio::Phenotype::OMIM::MiniMIMentry object.
# class Bio::Phenotype::OMIM::MiniMIMentry
# provides the following:
# - description()
# - created()
# - contributors()
# - edited()
#
# Prints the contents of the MINI MIM entry (most OMIM entries do
# not have MINI MIM entries, though).
print $mini_mim->description()."\n";
print $mini_mim->created()."\n";
print $mini_mim->contributors()."\n";
print $mini_mim->edited()."\n";
my @corrs = $omim_entry->each_Correlate(); # from genemap
# Array of Bio::Phenotype::Correlate objects.
# class Bio::Phenotype::Correlate
# provides the following:
# - name()
# - description() (not used)
# - species() (always mouse)
# - type() ("OMIM mouse correlate")
# - comment()
my @refs = $omim_entry->each_Reference(); # *FIELD* RF
# Array of Bio::Annotation::Reference objects.
my @avs = $omim_entry->each_AllelicVariant(); # *FIELD* AV
# Array of Bio::Phenotype::OMIM::OMIMentryAllelicVariant objects.
# class Bio::Phenotype::OMIM::OMIMentryAllelicVariant
# provides the following:
# - number (e.g ".0001" )
# - title (e.g "ALCOHOL INTOLERANCE" )
# - symbol (e.g "ALDH2*2" )
# - description (e.g "The ALDH2*2-encoded protein has a change ..." )
# - aa_ori (used if information in the form "LYS123ARG" is found)
# - aa_mut (used if information in the form "LYS123ARG" is found)
# - position (used if information in the form "LYS123ARG" is found)
# - additional_mutations (used for e.g. "1-BP DEL, 911T")
my @cps = $omim_entry->each_CytoPosition(); # from genemap
# Array of Bio::Map::CytoPosition objects.
my @gss = $omim_entry->each_gene_symbol(); # from genemap
# Array of strings.
# do something ...
}
Description
Methods
Methods description
Title : genemap_file_name Usage : $omimparser->genemap_file_name( "genemap" ); Function: Set/get for the genemap file name. Returns : The genemap file name [string]. Args : The genemap file name [string] (optional). |
Title : init() Usage : $omim_parser->init(); Function: Initializes this OMIMparser to all "". Returns : Args : |
Title : new Usage : $omim_parser = Bio::Phenotype::OMIM::OMIMparser->new( -genemap => "/path/to/genemap", -omimtext => "/path/to/omim.txt" ); Function: Creates a new OMIMparser. Returns : A new OMIMparser object. Args : -genemap => the genemap file name (optional) -omimtext => the omim text file name |
Title : next_phenotype() Usage : while ( my $omim_entry = $omim_parser->next_phenotype() ) { # do something with $omim_entry } Function: Returns an Bio::Phenotype::OMIM::OMIMentry or undef once the end of the omim text file is reached. Returns : A Bio::Phenotype::OMIM::OMIMentry. Args : |
Title : omimtxt_file_name Usage : $omimparser->omimtxt_file_name( "omim.txt" ); Function: Set/get for the omim text file name. Returns : The the omim text file name [string]. Args : The the omim text file name [string] (optional). |
Methods code
_OMIM_text_file | description | prev | next | Top |
sub _OMIM_text_file
{ my ( $self, $value ) = @_;
if ( defined $value ) {
unless ( $value->isa( "Bio::Root::IO" ) ) {
$self->throw( "[$value] is not a valid\" Bio::Root::IO\"" );
}
$self->{ "_omimtxt_file" } = $value;
}
return $self->{ "_omimtxt_file" };
}
} |
sub _add_to_hash
{ my ( $self, $state, $contents, $record_ref ) = @_;
if ( exists( $record_ref->{ $state } ) ) {
chomp( $record_ref->{ $state } );
$record_ref->{ $state } = $record_ref->{ $state } . $contents;
}
else {
$record_ref->{ $state } = $contents;
}
}
1; } |
sub _createOMIMentry
{ my ( $self, $record_ref ) = @_;
my $omim_entry = Bio::Phenotype::OMIM::OMIMentry->new();
my $mini_mim = Bio::Phenotype::OMIM::MiniMIMentry->new();
while ( ( my $key, my $val ) = each( %$record_ref ) ) {
$val =~ s/^\s+//;
$val =~ s/\s+$//;
if ( $key == MIM_NUMBER_STATE ) {
$val =~ s/\s+//g;
$val =~ s/\D//g;
$omim_entry->MIM_number( $val );
my $gm = $self->_genemap_hash();
if ( exists( $$gm{ $val } ) ) {
$self->_parse_genemap( $omim_entry, $val );
}
}
elsif ( $key == TITLE_STATE ) {
my ( $title, $alt_titles ) = $self->_parse_title( $val );
$omim_entry->title( $title );
$omim_entry->alternative_titles_and_symbols( $alt_titles );
if ( $title =~ /^\*/ ) {
$omim_entry->is_separate( TRUE );
}
elsif ( $title =~ /^#/ ) {
$omim_entry->more_than_two_genes( TRUE );
}
}
elsif ( $key == TEXT_STATE ) {
$omim_entry->description( $val );
}
elsif ( $key == ALLELIC_VARIANT_STATE ) {
my @allelic_variants = $self->_parse_allelic_variants( $val );
$omim_entry->add_AllelicVariants( @allelic_variants );
}
elsif ( $key == SEE_ALSO_STATE ) {
$omim_entry->additional_references( $val );
}
elsif ( $key == REF_STATE ) {
my @refs = $self->_parse_references( $val );
$omim_entry->add_References( @refs );
}
elsif ( $key == SYMPT_STATE ) {
$omim_entry->clinical_symptoms( $val );
}
elsif ( $key == CONTRIBUTORS_STATE ) {
$omim_entry->contributors( $val );
}
elsif ( $key == CREATED_BY_STATE ) {
$omim_entry->created( $val );
}
elsif ( $key == EDITED_BY_STATE ) {
$omim_entry->edited( $val );
}
elsif ( $key == MINI_MIM_TEXT_STATE ) {
$mini_mim->description( $val );
}
elsif ( $key == MINI_MIM_CONTRIBUTORS_STATE ) {
$mini_mim->contributors( $val );
}
elsif ( $key == MINI_MIM_CREATED_BY_STATE ) {
$mini_mim->created( $val );
}
elsif ( $key == MINI_MIM_EDITED_BY_STATE ) {
$mini_mim->edited( $val );
}
}
my $man = Bio::Species->new();
$man->classification( qw( sapiens Homo ) );
$man->common_name( "man" );
$omim_entry->species( $man );
$omim_entry->miniMIM( $mini_mim );
return $omim_entry;
}
} |
sub _create_allelic_variant
{ my ( $self, $number, $title, $symbol_mut_line, $description ) = @_;
my $symbol = "";
my $mutation = "";
my $aa_ori = "";
my $aa_mut = "";
my $position = "";
if ( $symbol_mut_line =~ /\s*(.+?)\s*,\s*([a-z]{3})(\d+)([a-z]{3})/i ) {
$symbol = $1;
$aa_ori = $2;
$aa_mut = $4;
$position = $3;
}
elsif ( $symbol_mut_line =~ /\s*(.+?)\s*,\s*(.+)/ ) {
$symbol = $1;
$mutation = $2;
}
else {
$symbol = $symbol_mut_line;
}
if ( ! defined( $description ) ) { die( "undef desc" ); }
if ( ! defined( $mutation ) ) { die( "undef mutation" ); }
my $allelic_variant = Bio::Phenotype::OMIM::OMIMentryAllelicVariant->new();
$allelic_variant->number( $number );
$allelic_variant->aa_ori( $aa_ori );
$allelic_variant->aa_mut( $aa_mut );
$allelic_variant->position( $position );
$allelic_variant->title( $title );
$allelic_variant->symbol( $symbol );
$allelic_variant->description( $description );
$allelic_variant->additional_mutations( $mutation );
return $allelic_variant;
}
} |
sub _done
{ my ( $self, $value ) = @_;
if ( defined $value ) {
unless ( $value == FALSE || $value == TRUE ) {
$self->throw( "Found [$value] where [" . TRUE
." or " . FALSE . "] expected" );
}
$self->{ "_done" } = $value;
}
return $self->{ "_done" };
}
} |
sub _genemap_hash
{ my ( $self, $value ) = @_;
if ( defined $value ) {
unless ( ref( $value ) eq "HASH" ) {
$self->throw( "Argument to method\" _genemap_hash\" is not a reference to an Hash" );
}
$self->{ "_genemap_hash" } = $value;
}
return $self->{ "_genemap_hash" };
}
} |
sub _is_not_first_record
{
my ( $self, $value ) = @_;
if ( defined $value ) {
unless ( $value == FALSE || $value == TRUE ) {
$self->throw( "Found [$value] where [" . TRUE
." or " . FALSE . "] expected" );
}
$self->{ "_not_first_record" } = $value;
}
return $self->{ "_not_first_record" };
}
} |
_no_OMIM_text_file_provided_error | description | prev | next | Top |
sub _no_OMIM_text_file_provided_error
{ my ( $self ) = @_;
my $msg = "Need to indicate a OMIM text file to read from with\n";
$msg .= "either\" OMIMparser->new( -omimtext =>\" path/to/omim.txt\" );\"\n";
$msg .= "or\"\$ omim_parser->omimtxt_file_name(\" path/to/omim.txt\" );\"";
$self->throw( $msg );
}
} |
sub _not_a_OMIM_text_file_error
{ my ( $self ) = @_;
my $msg = "File\" ".$self->omimtxt_file_name() .
"\" appears not to be a OMIM text file";
$self->throw( $msg );
}
} |
sub _parse_allelic_variants
{ my ( $self, $text ) = @_;
my @allelic_variants;
my $number = "";
my $title = "";
my $symbol_mut_line = "";
my $prev_line = "";
my $description = "";
my $saw_empty_line = FALSE;
my @lines = split( /\n/, $text );
foreach my $line ( @lines ) {
if ( $line !~ /\w/ ) {
$saw_empty_line = TRUE;
}
elsif ( $line =~ /^\s*(\.\d+)/ ) {
my $current_number = $1;
if ( $number ne "" ) {
my $allelic_variant = $self->_create_allelic_variant( $number, $title,
$symbol_mut_line, $description );
push( @allelic_variants, $allelic_variant );
}
$number = $current_number;
$title = "";
$prev_line = "";
$symbol_mut_line = "";
$description = "";
$saw_empty_line = FALSE;
}
elsif ( $title eq "" ) {
$title = $line;
}
elsif ( $saw_empty_line == FALSE ) {
$prev_line = $line;
}
elsif ( $saw_empty_line == TRUE ) {
if ( $prev_line ne "" ) {
$symbol_mut_line = $prev_line;
$prev_line = "";
}
if ( $description ne "" ) {
$description .= "\n" . $line;
}
else {
$description = $line;
}
}
}
my $allelic_variant = $self->_create_allelic_variant( $number, $title,
$symbol_mut_line, $description );
push( @allelic_variants, $allelic_variant );
return @allelic_variants;
}
} |
sub _parse_genemap
{ my ( $self, $omim_entry, $val ) = @_;
my $genemap_line = ${ $self->_genemap_hash() }{ $val };
my @a = split( /\|/, $genemap_line );
my $locations = $a[ 4 ];
if ( defined ( $locations ) ) {
$locations =~ s/\s+//g;
my @ls = split( /[,;]/, $locations );
my @cps;
foreach my $l ( @ls ) {
my $cp = Bio::Map::CytoPosition->new( -value => $l );
push( @cps, $cp );
}
$omim_entry->add_CytoPositions( @cps );
}
my $gene_symbols = $a[ 5 ];
if ( defined ( $gene_symbols ) ) {
$gene_symbols =~ s/\s+//g;
my @gss = split( /[,;]/, $gene_symbols );
$omim_entry->add_gene_symbols( @gss );
}
my $mouse_correlates = $a[ 16 ];
if ( defined ( $mouse_correlates ) ) {
$mouse_correlates =~ s/\s+//g;
my @mcs = split( /[,;]/, $mouse_correlates );
my @cs;
foreach my $mc ( @mcs ) {
my $mouse = Bio::Species->new();
$mouse->classification( qw( musculus Mus ) );
$mouse->common_name( "mouse" );
my $c = Bio::Phenotype::Correlate->new();
$c->name( $mc );
$c->species( $mouse );
$c->type( "OMIM mouse correlate" );
push( @cs, $c );
}
$omim_entry->add_Correlates( @cs );
}
$omim_entry->gene_status( $a[ 6 ] ) if defined $a[ 6 ];
$omim_entry->mapping_method( $a[ 10 ] ) if defined $a[ 10 ];
$omim_entry->comment( $a[ 11 ] ) if defined $a[ 11 ];
}
} |
sub _parse_references
{ my ( $self, $text ) = @_;
$text =~ s/\A\s+//;
$text =~ s/\s+\z//;
$text =~ s/\A\d+\.\s*//;
my @references;
my @texts = split( /\s*\n\s*\n\s*\d+\.\s*/, $text );
foreach my $t ( @texts ) {
my $authors = "";
my $title = "";
my $location = "";
$t =~ s/\s+/ /g;
if ( $t =~ /(.+?)\s*:\s*(.+?[.?!])\s+(.+?)\s+(\S+?)\s*:\s*(\w?\d+.*)\s*,\s*(\d+)/ ) {
$authors = $1;
$title = $2;
my $journal = $3;
my $volume = $4;
my $fromto = $5;
my $year = $6;
my $from = "",
my $to = "";
if ( $fromto =~ /(\d+)-+(\d+)/ ) {
$from = $1;
$to = "-".$2;
}
elsif ( $fromto =~ /\A(\w+)/ ) {
$from = $1;
}
$location = $journal." ".$volume." ".$from.$to." (".$year.")";
}
elsif ( $t =~ /(.+?)\s*:\s*(.+?[.?!])\s*(.+?)\z/ ) {
$authors = $1;
$title = $2;
$location = $3;
}
else {
$title = $t;
}
my $ref = Bio::Annotation::Reference->new( -title => $title,
-location => $location,
-authors => $authors );
push( @references, $ref );
}
return @references;
}
} |
sub _parse_title
{ my ( $self, $text ) = @_;
my $title = "";
if ( $text =~ /^(.+)\n/ ) {
$title = $1;
$text =~ s/^.+\n//;
}
else {
$title = $text;
$text = "";
}
return ( $title, $text );
}
} |
sub _read_genemap
{ my ( $self, $genemap_file_name ) = @_;
my $line = "";
my %genemap_hash = ();
my $genemap_file = new Bio::Root::IO->new( -file => $genemap_file_name );
my @a = ();
my %gm = ();
while( $line = $genemap_file->_readline() ) {
@a = split( /\|/, $line );
unless( scalar( @a ) == 18 ) {
$self->throw( "Gene map file\" ".$self->genemap_file_name()
. "\" is not in the expected format" );
}
$gm{ $a[ 9 ] } = $line;
}
$genemap_file->close();
$self->_genemap_hash(\% gm );
}
} |
sub genemap_file_name
{ my ( $self, $value ) = @_;
if ( defined $value ) {
$self->{ "_genemap_file_name" } = $value;
if ( $value =~ /\W/ ) {
_genemap_hash( $self->_read_genemap( $value ) );
}
}
return $self->{ "_genemap_file_name" };
}
} |
sub init
{ my ( $self ) = @_;
$self->genemap_file_name( "" );
$self->omimtxt_file_name( "" );
$self->_genemap_hash( {} );
$self->_OMIM_text_file( undef );
$self->_is_not_first_record( FALSE );
$self->_done( FALSE );
}
} |
sub new
{ my( $class, @args ) = @_;
my $self = $class->SUPER::new( @args );
my ( $genemap_file_name, $omimtxt_file_name )
= $self->_rearrange( [ qw( GENEMAP OMIMTEXT ) ], @args );
$self->init();
$genemap_file_name && $self->genemap_file_name( $genemap_file_name );
$omimtxt_file_name && $self->omimtxt_file_name( $omimtxt_file_name);
return $self; } |
sub next_phenotype
{ my ( $self ) = @_;
unless( defined( $self->_OMIM_text_file() ) ) {
$self->_no_OMIM_text_file_provided_error();
}
if ( $self->_done() == TRUE ) {
return undef;
}
my $fieldtag = "";
my $contents = "";
my $line = "";
my $state = DEFAULT_STATE;
my $saw_mini_min_flag = FALSE;
my %record = ();
while( $line = ( $self->_OMIM_text_file )->_readline() ) {
if ( $line =~ /^\s*\*RECORD\*/ ) {
if ( $self->_is_not_first_record() == TRUE ) {
$self->_add_to_hash( $state, $contents,\%record );
my $omim_entry = $self->_createOMIMentry(\% record );
return $omim_entry;
}
else {
$self->_is_not_first_record( TRUE );
}
}
elsif ( $line =~ /^\s*\*FIELD\*\s*(\S+)/ ) {
$fieldtag = $1;
if ( $state != DEFAULT_STATE ) {
$self->_add_to_hash( $state, $contents,\%record );
}
$contents = "";
if ( $fieldtag eq "NO" ) {
$state = MIM_NUMBER_STATE;
$saw_mini_min_flag = FALSE;
}
elsif ( $fieldtag eq "TI" ) {
$state = TITLE_STATE;
$saw_mini_min_flag = FALSE;
}
elsif ( $fieldtag eq "TX" ) {
$state = TEXT_STATE;
$saw_mini_min_flag = FALSE;
}
elsif ( $fieldtag eq "MN" ) {
$state = MINI_MIM_TEXT_STATE;
$saw_mini_min_flag = TRUE;
}
elsif ( $fieldtag eq "AV" ) {
$state = ALLELIC_VARIANT_STATE;
$saw_mini_min_flag = FALSE;
}
elsif ( $fieldtag eq "SA" ) {
$state = SEE_ALSO_STATE;
$saw_mini_min_flag = FALSE;
}
elsif ( $fieldtag eq "RF" ) {
$state = REF_STATE;
$saw_mini_min_flag = FALSE;
}
elsif ( $fieldtag eq "CS" ) {
$state = SYMPT_STATE;
$saw_mini_min_flag = FALSE;
}
elsif ( $fieldtag eq "CN" ) {
if ( $saw_mini_min_flag == TRUE ) {
$state = MINI_MIM_CONTRIBUTORS_STATE;
}
else {
$state = CONTRIBUTORS_STATE;
}
}
elsif ( $fieldtag eq "CD" ) {
if ( $saw_mini_min_flag == TRUE ) {
$state = MINI_MIM_CREATED_BY_STATE;
}
else {
$state = CREATED_BY_STATE;
}
}
elsif ( $fieldtag eq "ED" ) {
if ( $saw_mini_min_flag == TRUE ) {
$state = MINI_MIM_EDITED_BY_STATE;
}
else {
$state = EDITED_BY_STATE;
}
}
else {
print "Warning: Unknown tag: $fieldtag\n";
}
}
else {
$contents .= $line;
}
}
$self->_OMIM_text_file()->close();
$self->_done( TRUE );
unless( %record ) {
$self->_not_a_OMIM_text_file_error();
}
$self->_add_to_hash( $state, $contents,\%record );
my $omim_entry = $self->_createOMIMentry(\% record );
return $omim_entry;
}
} |
sub omimtxt_file_name
{ my ( $self, $value ) = @_;
if ( defined $value ) {
$self->{ "_omimtxt_file_name" } = $value;
if ( $value =~ /\W/ ) {
$self->_OMIM_text_file( new Bio::Root::IO->new( -file => $value ) );
}
}
return $self->{ "_omimtxt_file_name" };
}
} |
General documentation
User feedback is an integral part of the evolution of this and other
Bioperl modules. Send your comments and suggestions preferably to the
Bioperl mailing lists Your participation is much appreciated.
bioperl-l@bioperl.org - General discussion
http://bio.perl.org/MailList.html - About the mailing lists
report bugs to the Bioperl bug tracking system to help us keep track
the bugs and their resolution. Bug reports can be submitted via
email or the web:
bioperl-bugs@bio.perl.org
http://bugzilla.bioperl.org/
The rest of the documentation details each of the object
methods. Internal methods are usually preceded with a _