Raw content of XrefParser::InterproGoParser
package XrefParser::InterproGoParser;
use strict;
use XrefParser::BaseParser;
use vars qw(@ISA);
@ISA = qw(XrefParser::BaseParser);
sub run {
my $self = shift if (defined(caller(1)));
my $source_id = shift;
my $species_id = shift;
my $files = shift;
my $release_file = shift;
my $verbose = shift;
my $file = @{$files}[0];
my $file_io = $self->get_filehandle($file)
|| ( print( "ERROR: Cannot open $file\n" ) && return 1 );
my %interpros = %{$self->get_valid_codes("interpro",$species_id)};
scalar( keys %interpros )
|| ( print STDERR "ERROR: No InterPro xrefs found in DB" && return 1 );
#get the "main" GO source id.
$source_id = $self->get_source_id_for_source_name("GO","main");
# get the mapping that are already there so that we don't get lots of duplicates.
# stored in the global hash xref_dependent_mapped.
$self->get_dependent_mappings($source_id);
# Process the file
my( $skip_count, $dependent_xref_count ) = (0,0);
while( my $line = $file_io->getline() ){
next if $line =~ /^!/; # Skip comments
# Example line
# InterPro:IPR000003 Retinoid X receptor > GO:DNA binding ; GO:0003677
if( $line =~ m/^InterPro:(\S+)\s+(.+)\s+>\s+GO:(.+)\s+;\s+(GO:\d+)/ ){
my $ipro_id = $1;
my $go_desc = $2;
my $go_term = $3;
my $go_id = $4;
if(defined($interpros{$ipro_id})){
$self->add_to_xrefs($interpros{$ipro_id},$go_id,1,$go_id,$go_term,'IEA',
$source_id,$species_id);
$dependent_xref_count++;
}
else{
$skip_count++;
}
}
}
print "Parsed identifiers from $file\n".
"\tadded $dependent_xref_count GO xrefs dependent on InterPro\n".
"\tskipped $skip_count GO terms due to missing InterPros\n" if($verbose);
return 0;
}
sub new{
my $self = {};
bless $self, __PACKAGE__;
return $self;
}