Raw content of XrefParser::MIMParser
package XrefParser::MIMParser;
use strict;
use POSIX qw(strftime);
use File::Basename;
use base qw( XrefParser::BaseParser );
# --------------------------------------------------------------------------------
# Parse command line and run if being run directly
if (!defined(caller())) {
if (scalar(@ARGV) != 1) {
print "\nUsage: MIMParser.pm file \n\n";
exit(1);
}
run($ARGV[0]);
}
sub run {
my $self = shift if (defined(caller(1)));
my $general_source_id = shift;
my $species_id = shift;
my $files = shift;
my $release_file = shift;
my $verbose = shift;
my $file = @{$files}[0];
my %old_to_new;
my %removed;
my $source_id;
my @sources;
if(!defined($general_source_id)){
$general_source_id = XrefParser::BaseParser->get_source_id_for_filename($file);
}
if(!defined($species_id)){
$species_id = XrefParser::BaseParser->get_species_id_for_filename($file);
}
push @sources, $general_source_id;
my $gene_source_id = XrefParser::BaseParser->get_source_id_for_source_name("MIM_GENE");
push @sources, $gene_source_id;
my $morbid_source_id = XrefParser::BaseParser->get_source_id_for_source_name("MIM_MORBID");
push @sources, $morbid_source_id;
print "sources are:- ".join(", ",@sources)."\n" if($verbose);
local $/ = "*RECORD*";
my $mim_io = $self->get_filehandle($file);
if ( !defined $mim_io ) {
print "ERROR: Could not open $file\n";
return 1; # 1 is an error
}
my $gene = 0;
my $phenotype = 0;
my $removed_count =0;
$mim_io->getline(); # first record is empty with *RECORD* as the
# record seperator
while ( $_ = $mim_io->getline() ) {
#get the MIM number
my $number = 0;
my $description = undef;
my $is_morbid = 0;
my $type =undef;
if(/\*FIELD\*\s+NO\n(\d+)/){
$number = $1;
$source_id = $gene_source_id;
if(/\*FIELD\*\sTI\n([\^\#\%\+\*]*)\d+(.*)\n/){
$description =$2;
$type = $1;
$description =~ s/\;\s[A-Z0-9]+$//; # strip gene name at end
if($type eq "*"){ # gene only
$gene++;
$self->add_xref($number,"",$number,$description,$gene_source_id,$species_id,"DEPENDENT");
}
elsif(!defined($type) or $type eq "" or $type eq "#" or $type eq "%"){ #phenotype only
$phenotype++;
$self->add_xref($number,"",$number,$description,$morbid_source_id,$species_id,"DEPENDENT");
}
elsif($type eq "+"){ # both
$gene++;
$phenotype++;
$self->add_xref($number,"",$number,$description,$gene_source_id,$species_id,"DEPENDENT");
$self->add_xref($number,"",$number,$description,$morbid_source_id,$species_id,"DEPENDENT");
}
elsif($type eq "^"){
if(/\*FIELD\*\sTI\n[\^]\d+ MOVED TO (\d+)/){
$old_to_new{$number} = $1;
}
else{
$removed{$number} = 1;
$removed_count++;
}
}
}
}
}
$mim_io->close();
my $syn_count =0;
foreach my $mim (keys %old_to_new){
my $old= $mim;
my $new= $old_to_new{$old};
while(defined($old_to_new{$new})){
$new = $old_to_new{$new};
}
if(!defined($removed{$new})){
$self->add_to_syn_for_mult_sources($new, \@sources, $old, $species_id);
$syn_count++;
}
}
print "$gene genemap and $phenotype phenotype MIM xrefs added\n" if($verbose);
print "added $syn_count synonyms (defined by MOVED TO)\n" if($verbose);
return 0; #successful
}
1;