XrefParser InterproParser
Included librariesPackage variablesGeneral documentationMethods
Toolbar
WebCvsRaw content
Package variables
Privates (from "my" definitions)
$xref_sth ;
$dep_sth;
Included modules
File::Basename
POSIX qw ( strftime )
Inherit
XrefParser::BaseParser
Synopsis
No synopsis!
Description
No description!
Methods
run
No description
Code
Methods description
None available.
Methods code
rundescriptionprevnextTop
sub run {
  my $self = shift if (defined(caller(1)));

  my $source_id = shift;
  my $species_id = shift;
  my $files_ref = shift;
  my $release_file = shift;
  my $verbose = shift;


  my $file = @{$files_ref}[0];

  if(!defined($source_id)){
    $source_id = $self->get_source_id_for_filename($file);
  }
  if(!defined($species_id)){
    $species_id = $self->get_species_id_for_filename($file);
  }

  my $add_interpro_sth =
    $self->dbi()
    ->prepare("INSERT INTO interpro (interpro, pfam) VALUES(?,?)");

  my $get_interpro_sth =
    $self->dbi()
    ->prepare( "SELECT interpro FROM interpro "
        . "WHERE interpro = ? AND pfam = ?" );

  my $add_xref_sth =
    $self->dbi()
    ->prepare( "INSERT INTO xref "
        . "(accession,version,label,description,source_id,species_id, info_type) "
        . "VALUES(?,?,?,?,?,?,?)" );

#  my $get_xref_sth =
# $self->dbi()
# ->prepare( "SELECT xref_id FROM xref "
# . "WHERE accession = ? AND source_id = ?" );
my $dir = dirname($file); my %short_name; my %description; my %pfam; my $xml_io = $self->get_filehandle($file); if ( !defined $xml_io ) { print "ERROR: Can't open hugo interpro file $file\n"; return 1; # 1= error
} #<interpro id="IPR001023" type="Family" short_name="Hsp70" protein_count="1556">
# <name>Heat shock protein Hsp70</name>
# <db_xref protein_count="18" db="PFAM" dbkey="PF01278" name="Omptin" />
# <db_xref protein_count="344" db="TIGRFAMs" dbkey="TIGR00099" name="Cof-subfamily" />
my %count; local $/ = "</interpro>"; my $last = ""; my $i =0; while ( $_ = $xml_io->getline() ) { my ($interpro) = $_ =~ /interpro id="(\S+)"/; my ($short_name) = $_ =~ /short_name="(\S+)"/; my ($name) = $_ =~ /<name>(.*)<\/name>/; if ($interpro) { # print $interpro."\n";
if ( !$self->get_xref( $interpro, $source_id, $species_id ) ) { $count{INTERPRO}++; if ( !$add_xref_sth->execute( $interpro, '', $short_name, $name, $source_id, $species_id, 'MISC' ) ) { print STDERR "Problem adding '$interpro'\n"; return 1; # 1 is an error
} } my ($members) = $_ =~ /<member_list>(.+)<\/member_list>/s; while ( $members =~ /db="(PROSITE|PFAM|PRINTS|PREFILE|PROFILE|TIGRFAMs|PIRSF|SMART|SSF)"\s+dbkey="(\S+)"/cgm ) { my ( $db_type, $id ) = ( $1, $2 ); if( $db_type eq 'SSF' ){ $id =~ s/^SSF// } # Strip SSF prefix
if ( !$self->get_xref( $interpro, $id, $species_id ) ) { $add_interpro_sth->execute( $interpro, $id ); $count{$db_type}++; } } } } $xml_io->close(); for my $db ( keys %count ) { print "\t" . $count{$db} . " $db loaded.\n" if($verbose); } if ( defined $release_file ) { # Parse the second file that we got. This is assumed to be the
# HTML file that will contain the release information.
my $release; my $release_io = $self->get_filehandle($release_file); while ( defined( my $line = $release_io->getline() ) ) { chomp $line; if ( $line =~ m#(Release [0-9.]+, .*)# ) {
$release = $1;
last; } } $release_io->close(); if ( defined $release ) { print "Interpro release is '$release'\n" if($verbose); $self->set_release( $source_id, $release ); } else { print "Did not find release info in '$release_file'\n" if($verbose); } } return 0; } #sub get_xref{
# my ($get_xref_sth, $acc, $source) = @_;
#
# $get_xref_sth->execute($acc, $source) || die "FAILED $acc $source\n";
# if(my @row = $get_xref_sth->fetchrow_array()) {
# return $row[0];
# }
# return 0;
#}
1;
}
General documentation
No general documentation available.