Raw content of XrefParser::IPIParser package XrefParser::IPIParser; use strict; use File::Basename; use base qw( XrefParser::BaseParser ); # IPI file format: fasta, e.g. # >IPI:IPI00000005.1|SWISS-PROT:P01111|TREMBL:Q15104|REFSEQ_NP:NP_002515|ENSEMBL:ENSP00000261444 Tax_Id=9606 Transforming protein N-Ras # MTEYKLVVVGAGGVGKSALTIQLIQNHFVDEYDPTIEDSYRKQVVIDGETCLLDILDTAG # PTRTVDTKQAHELAKSYGIPFIETSAKTRQGVEDAFYTLVREIRQYRMKKLNSSDDGTQG # CMGLPCVVM sub run { my $self = shift if (defined(caller(1))); my $source_id = shift; my $species_id = shift; my $files = shift; my $release_file = shift; my $verbose = shift; my $file = @{$files}[0]; my @xrefs; local $/ = "\n>"; my $ipi_io = $self->get_filehandle($file); if ( !defined $ipi_io ) { print STDERR "ERROR: Could not open $file\n"; return 1; # 1 = error } my %species_tax_id = %{$self->get_taxonomy_from_species_id($species_id)}; while ( $_ = $ipi_io->getline() ) { my $xref; my ($header, $sequence) = $_ =~ /^>?(.+?)\n([^>]*)/s or warn("Can't parse FASTA entry: $_\n"); # deconstruct header my @header = split /\|/, $header; my ($ipi) = $header[0] =~ /^IPI:(IPI(\d)+(\.\d+)?)/ or warn("Can't deduce IPI identifier from " . $header[0]); my ($ipi_ac, $ipi_ver) = $ipi =~ /(IPI\d+)\.(\d+)/; my ($tax_id, $description) = $header[-1] =~ /.*Tax_Id=(\d+)\s+(.*)/; # note currently we ignore all the other cross-references in the IPI file # only interested in species with the taxonomy ID were looking for next if ( !defined $tax_id || !defined $species_tax_id{$tax_id}); # make sequence into one long string $sequence =~ s/\n//g; # build the xref object and store it $xref->{ACCESSION} = $ipi_ac; $xref->{VERSION} = $ipi_ver; $xref->{LABEL} = $ipi; $xref->{DESCRIPTION} = $description; $xref->{SEQUENCE} = $sequence; $xref->{SOURCE_ID} = $source_id; $xref->{SPECIES_ID} = $species_id; $xref->{SEQUENCE_TYPE} = 'peptide'; $xref->{STATUS} = 'experimental'; $xref->{INFO_TYPE} = "SEQUENCE_MATCH"; push @xrefs, $xref; } $ipi_io->close(); XrefParser::BaseParser->upload_xref_object_graphs(\@xrefs); print scalar(@xrefs) . " IPI xrefs succesfully parsed\n" if($verbose); return 0; #successful } 1;