Raw content of XrefParser::UniProtVarSplicParser
package XrefParser::UniProtVarSplicParser;
# Parse UniProt alternative splice files
use strict;
use File::Basename;
use base qw( XrefParser::BaseParser );
# UniProtVarSplic file format: fasta, e.g.
#>P48347-2|14310_ARATH Isoform 2 of P48347 - Arabidopsis thaliana (Mouse-ear cress)
#MENEREKQVYLAKLSEQTERYDEMVEAMKKVAQLDVELTVEERNLVSVGYKNVIGARRAS
#WRILSSIEQKEESKGNDENVKRLKNYRKRVEDELAKVCNDILSVIDKHLIPSSNAVESTV
#FFYKMKGDYYRYLAEFSSGAERKEAADQSLEAYKAAVAAAENGLAPTHPVRLGLALNFSV
#FYYEILNSPESACQLAKQAFDDAIAELDSLNEESYKDSTLIMQLLRDNLTLWTSDLNEEG
#DERTKGADEPQDEV
sub run {
my $self = shift if (defined(caller(1)));
my $source_id = shift;
my $species_id = shift;
my $files = shift;
my $release_file = shift;
my $verbose = shift;
my $file = @{$files}[0];
my @xrefs;
local $/ = "\n>";
my $file_io = $self->get_filehandle($file);
if ( !defined $file_io ) {
print STDERR "ERROR: Could not open $file\n";
return 1; # 1 error
}
my %swiss = %{ $self->get_valid_codes( "uniprot", $species_id ) };
print scalar(%swiss)." uniprot entries will be used as tests\n" if($verbose);
my $missed = 0;
while ( $_ = $file_io->getline() ) {
my $xref;
my ($header, $sequence) = $_ =~ /^>?(.+?)\n([^>]*)/s or warn("Can't parse FASTA entry: $_\n");
# deconstruct header
my ($accession, @description) = split /\|/, $header;
my $description = join(" ", @description);
my ($original, $extension) = split/-/, $accession;
if(defined($swiss{$original})){
# make sequence into one long string
$sequence =~ s/\n//g;
# build the xref object and store it
$xref->{ACCESSION} = $accession;
$xref->{LABEL} = $accession;
$xref->{DESCRIPTION} = $description;
$xref->{SEQUENCE} = $sequence;
$xref->{SOURCE_ID} = $source_id;
$xref->{SPECIES_ID} = $species_id;
$xref->{SEQUENCE_TYPE} = 'peptide';
$xref->{STATUS} = 'experimental';
push @xrefs, $xref;
}
else{
$missed++;
}
}
$file_io->close();
print $missed." ignored as original uniprot not found in database\n" if($verbose);
print scalar(@xrefs) . " UniProtVarSplic xrefs succesfully parsed\n" if($verbose);
$self->upload_xref_object_graphs(\@xrefs);
if ( defined $release_file ) {
# Parse and apply the Swiss-Prot release info
# from $release_file.
my $release_io = $self->get_filehandle($release_file);
while ( defined( my $line = $release_io->getline() ) ) {
if ( $line =~ m#(UniProtKB/Swiss-Prot Release .*)# ) {
print "Swiss-Prot release is '$1'\n" if($verbose);
$self->set_release( $source_id, $1 );
}
}
$release_io->close();
}
return 0;
}
1;