Raw content of XrefParser::VbGFF3Parser
package XrefParser::VbGFF3Parser;
use strict;
use warnings;
use File::Basename;
use Bio::SeqIO;
# perl xref_parser.pl -user ensadmin -pass PASS -host genebuild2 -dbname snr_anopheles_gambiae_48_xref_test -species anopheles_gambiae -source ARRAY_JHSPH_AGGAMBER_15k_v1 -download_path $mywork/VB_xref/downloads/ -checkdownload
use base qw( XrefParser::CoordinateParser );
# Parser for GFF3-format probe mappings from Vectorbase
sub run {
my $self = shift if (defined(caller(1)));
my $source_id = shift;
my $species_id = shift;
my $files = shift;
my $release_file = shift;
my $verbose = shift;
my $file = @{$files}[0];
open (INFILE, "<$file");
my $i=0; my $type = "transcript";
while (my $ln = ) {
# # parse GFF line:
chomp($ln); $i++;
my ($seqid, $source, $type, $start, $end, $score, $strand, $phase, $attributes) = split("\t",$ln);
if ($strand eq ".") {$strand = 0;}
elsif ($strand eq "?") {$strand = 0;} # nb should more properly be NULL;
my %attributes;
unless ($attributes eq ".") {
my @attributes = split(";",$attributes);
foreach my $pair ( @attributes ){
my ($key, $value) = split("=",$pair);
if ($value =~ m/,/i ) {
my @values = split(",",$value);
foreach my $splitval (@values) {
$splitval =~ s/%59/;/;
$splitval =~ s/%44/,/;
$splitval =~ s/%61/=/;
push (@{$attributes{uc($key)}},$splitval);
}
}
else {
$value =~ s/%59/;/;
$value =~ s/%44/,/;
$value =~ s/%61/=/;
push(@{$attributes{uc($key)}},$value);
}
}
}
# # assess and build oligo_feature
if (($seqid eq ".") || ($start eq ".") || ($end eq ".")) {
# _parse_error(\%attributes,"incomplete location (".$seqid.":".$start."-".$end.")");
}
else {
foreach my $name (@{$attributes{"NAME"}}) {
my %xref = ( 'accession' => $name,
'chromosome' => $seqid,
'strand' => $strand,
'txStart' => $start,
'txEnd' => $end,
'cdsStart' => $start,
'cdsEnd' => $end,
'exonStarts' => $start,
'exonEnds' => $end );
print STDERR "$name, $start, $end\n" if($verbose);
$self->add_xref( $source_id, $species_id, \%xref );
}
}
}
print STDERR $i." VB GFF3 xrefs succesfully parsed\n" if($verbose);
close(INFILE);
return 0;
} ## end sub run
#sub _parse_error {
# my ($attributes, $errortext) = @_;
# my %attributes = %{$attributes};
# print STDERR "A:". $attributes{"ARRAY"}->[0].
# "\tS:". ($attributes{"PROBESET"}->[0] || "").
# "\tP:". $attributes{"NAME"}->[0].
# "\t".$errortext."\n";
# }
1;