Raw content of XrefParser::FastaParser package XrefParser::FastaParser; use strict; use Bio::SeqIO; use File::Basename; use base qw( XrefParser::BaseParser ); # Fasta file format, e.g. # >foo peptide sequence for the foo gene # MTEYKLVVVGAGGVGKSALTIQLIQNHFVDEYDPTIEDSYRKQVVIDGETCLLDILDTAG # PTRTVDTKQAHELAKSYGIPFIETSAKTRQGVEDAFYTLVREIRQYRMKKLNSSDDGTQG # CMGLPCVVM sub run { my $self = shift if (defined(caller(1))); my $source_id = shift; my $species_id = shift; my $files = shift; my $release_file = shift; my $verbose = shift; my $file = @{$files}[0]; my $sio = Bio::SeqIO->new(-format=>'fasta' , -file=>$file ); my %species_tax_id = %{$self->get_taxonomy_from_species_id($species_id)}; my @xrefs; while( my $seq = $sio->next_seq ) { # Test species if available if( my $sp = $seq->species ){ if( my $tax_id = $sp->ncbi_taxid ){ next if (!defined $species_tax_id{$tax_id}); } } # build the xref object and store it my $xref; $xref->{ACCESSION} = $seq->display_name; $xref->{LABEL} = $seq->display_name; $xref->{DESCRIPTION} = $seq->description; $xref->{SEQUENCE} = $seq->seq; $xref->{SOURCE_ID} = $source_id; $xref->{SPECIES_ID} = $species_id; $xref->{SEQUENCE_TYPE} = $seq->alphabet eq 'protein' ? 'peptide' : 'dna'; $xref->{STATUS} = 'experimental'; if( my $v = $seq->version ){ $xref->{VERSION} = $v }; push @xrefs, $xref; } print scalar(@xrefs) . " Fasta xrefs succesfully parsed\n" if($verbose); $self->upload_xref_object_graphs(\@xrefs); print scalar(@xrefs) . " Fasta xrefs succesfully loaded\n" if($verbose); return 0; #successful } 1;