Raw content of XrefParser::InterproParser
package XrefParser::InterproParser;
use strict;
use POSIX qw(strftime);
use File::Basename;
use base qw( XrefParser::BaseParser );
my $xref_sth ;
my $dep_sth;
# --------------------------------------------------------------------------------
# Parse command line and run if being run directly
if (!defined(caller())) {
if (scalar(@ARGV) != 1) {
print "\nUsage: InterproParser.pm file \n\n";
exit(1);
}
run(@ARGV);
}
sub run {
my $self = shift if (defined(caller(1)));
my $source_id = shift;
my $species_id = shift;
my $files_ref = shift;
my $release_file = shift;
my $verbose = shift;
my $file = @{$files_ref}[0];
if(!defined($source_id)){
$source_id = $self->get_source_id_for_filename($file);
}
if(!defined($species_id)){
$species_id = $self->get_species_id_for_filename($file);
}
my $add_interpro_sth =
$self->dbi()
->prepare("INSERT INTO interpro (interpro, pfam) VALUES(?,?)");
my $get_interpro_sth =
$self->dbi()
->prepare( "SELECT interpro FROM interpro "
. "WHERE interpro = ? AND pfam = ?" );
my $add_xref_sth =
$self->dbi()
->prepare( "INSERT INTO xref "
. "(accession,version,label,description,source_id,species_id, info_type) "
. "VALUES(?,?,?,?,?,?,?)" );
# my $get_xref_sth =
# $self->dbi()
# ->prepare( "SELECT xref_id FROM xref "
# . "WHERE accession = ? AND source_id = ?" );
my $dir = dirname($file);
my %short_name;
my %description;
my %pfam;
my $xml_io = $self->get_filehandle($file);
if ( !defined $xml_io ) {
print "ERROR: Can't open hugo interpro file $file\n";
return 1; # 1= error
}
#
# Heat shock protein Hsp70
#
#
my %count;
local $/ = "";
my $last = "";
my $i =0;
while ( $_ = $xml_io->getline() ) {
my ($interpro) = $_ =~ /interpro id="(\S+)"/;
my ($short_name) = $_ =~ /short_name="(\S+)"/;
my ($name) = $_ =~ /(.*)<\/name>/;
if ($interpro) {
# print $interpro."\n";
if ( !$self->get_xref( $interpro, $source_id, $species_id ) ) {
$count{INTERPRO}++;
if (
!$add_xref_sth->execute(
$interpro, '', $short_name,
$name, $source_id, $species_id, 'MISC'
)
)
{
print STDERR "Problem adding '$interpro'\n";
return 1; # 1 is an error
}
}
my ($members) = $_ =~ /(.+)<\/member_list>/s;
while ( $members =~
/db="(PROSITE|PFAM|PRINTS|PREFILE|PROFILE|TIGRFAMs|PIRSF|SMART|SSF)"\s+dbkey="(\S+)"/cgm
)
{
my ( $db_type, $id ) = ( $1, $2 );
if( $db_type eq 'SSF' ){ $id =~ s/^SSF// } # Strip SSF prefix
if ( !$self->get_xref( $interpro, $id, $species_id ) ) {
$add_interpro_sth->execute( $interpro, $id );
$count{$db_type}++;
}
}
}
}
$xml_io->close();
for my $db ( keys %count ) {
print "\t" . $count{$db} . " $db loaded.\n" if($verbose);
}
if ( defined $release_file ) {
# Parse the second file that we got. This is assumed to be the
# HTML file that will contain the release information.
my $release;
my $release_io = $self->get_filehandle($release_file);
while ( defined( my $line = $release_io->getline() ) ) {
chomp $line;
if ( $line =~ m#(Release [0-9.]+, .*)# ) {
$release = $1;
last;
}
}
$release_io->close();
if ( defined $release ) {
print "Interpro release is '$release'\n" if($verbose);
$self->set_release( $source_id, $release );
} else {
print "Did not find release info in '$release_file'\n" if($verbose);
}
}
return 0;
}
#sub get_xref{
# my ($get_xref_sth, $acc, $source) = @_;
#
# $get_xref_sth->execute($acc, $source) || die "FAILED $acc $source\n";
# if(my @row = $get_xref_sth->fetchrow_array()) {
# return $row[0];
# }
# return 0;
#}
1;