Raw content of XrefParser::VegaParser # $Id: VegaParser.pm,v 1.3 2008/09/02 10:02:10 ianl Exp $ package XrefParser::VegaParser; use warnings; use strict; use base qw( XrefParser::BaseParser ); # Parses the Vega CDNA and Peptide Fasta file format: # # >OTTMUST00000004500 cdna:tot chromosome:VEGA:1:60690948:60709172:1 Gene:OTTMUSG00000002254 # GTGACTTCAGTTCACACCACACTCTGCCTTGCTCACAGAGGAGGGGCTGCAGCCCTGGCC # CTCATCAGAACAATGACACTCAGGCTGCTGTTCTTGGCTCTCAACTTCTTCTCAGTTCAA # GTAACAGAAAACAAGATTTTGGTAAAGCAGTCGCCCCTGCTTGTGGTAGATAGCAACGAG # # >OTTMUSP00000002157 pep:known chromosome:VEGA:1:60690904:60717905:1 Gene:OTTMUSG00000002254 Transcript:OTTMUST00000004499 # MTLRLLFLALNFFSVQVTENKILVKQSPLLVVDSNEVSLSCRYSYNLLAKEFRASLYKGV # NSDVEVCVGNGNFTYQPQFRSNAEFNCDGDFDNETVTFRLWNLHVNHTDIYFCKIEFMYP # PPYLDNERSNGTIIHIKEKHLCHTQSSPKLFWALVVVAGVLFCYGLLVTVALCVIWTNSR # RNRLLQSDYMNMTPRRPGLTRKPYQPYAPARDFAAYRP sub run { my $self = shift if (defined(caller(1))); my $source_id = shift; my $species_id = shift; my $files_ref = shift; my $rel_file = shift; my $verbose = shift; my $file_name = @{$files_ref}[0]; my $file_io = $self->get_filehandle($file_name); if ( !defined $file_io ) { return 1; # Failed. } my @xrefs; while ( defined( my $line = $file_io->getline() ) ) { chomp $line; if ( substr( $line, 0, 1 ) eq '>' ) { # New sequence header. substr( $line, 0, 1, '' ); # Remove initial '>' my ( $vega_id, $vega_alphabet ) = ( $line =~ /^(\S+)\s([^:]+):/ ); my %xref = ( 'ACCESSION' => $vega_id, 'LABEL' => $vega_id, 'DESCRIPTION' => $line, 'SEQUENCE' => '', 'SOURCE_ID' => $source_id, 'SPECIES_ID' => $species_id, 'SEQUENCE_TYPE' => ( $vega_alphabet eq 'pep' ? 'peptide' : 'dna' ), 'STATUS' => 'experimental' ); push @xrefs, \%xref; } else { $xrefs[-1]->{'SEQUENCE'} .= $line; } } $self->upload_xref_object_graphs( \@xrefs ); print scalar(@xrefs) . " Vega Fasta Xrefs successfully parsed\n" if($verbose); return 0; # Successful. } 1;