XrefParser FlybaseParser
Included librariesPackage variablesGeneral documentationMethods
Toolbar
WebCvsRaw content
Package variables
Privates (from "my" definitions)
$verbose;
Included modules
Carp
Inherit
XrefParser::BaseParser
Synopsis
No synopsis!
Description
No description!
Methods
get_source_id_for_source_name
No description
Code
run
No description
Code
Methods description
None available.
Methods code
get_source_id_for_source_namedescriptionprevnextTop
sub get_source_id_for_source_name {
  my $self = shift;
  my ($source_name) = @_;

  if ( !defined( $source_id{$source_name} ) ) {
    $source_id{$source_name} =
      $self->SUPER::get_source_id_for_source_name(@_);

    printf( "source_id for source '%s' is %d\n",
            $source_name, $source_id{$source_name} ) if ($verbose);
  }

  if ( !defined( $source_id{$source_name} )
       || $source_id{$source_name} < 0 )
  {
    carp(
       sprintf( "Can not find source_id for source '%s'", $source_name )
    );
  }

  return $source_id{$source_name};
}
rundescriptionprevnextTop
sub run {
  my $self = shift if (defined(caller(1)));

  my $source_id = shift;
  my $species_id = shift;
  my $files       = shift;
  my $release_file   = shift;
  $verbose       = shift;

  my $data_file = @{$files}[0];

#  my $self = shift;
# my ( $source_id, $species_id, $data_file, $release_file ) = @_;
# Fetch hashes of already stored Uniprot and Interpro accessions.
my %pre_xref_ids = ( 'Uniprot' => $self->get_valid_codes( 'uniprot', $species_id ), 'Interpro' => $self->get_valid_codes( 'interpro', $species_id ) ); my %xref_ids; my $data_io = $self->get_filehandle($data_file); my ( $count_read, $count_skipped, $last_count_read ) = ( 0, 0, 0 ); my $status_interval = 30; local $SIG{ALRM} = sub { printf( "%d lines read, %d skipped, %d parsed; %d lines/s\n", $count_read, $count_skipped, $count_read - $count_skipped, ( $count_read - $last_count_read )/$status_interval ) if($verbose);
$last_count_read = $count_read; alarm($status_interval); }; alarm($status_interval); while ( defined( my $line = $data_io->getline() ) ) { ++$count_read; # Skip comment lines at the start of the file.
if ( substr( $line, 0, 1 ) eq '#' ) { ++$count_skipped; next } chomp($line); # Split each line into fields.
my @fields = split( /\t/, $line ); # Only pick out the interesting lines.
if ( !( defined( $fields[1] ) && $fields[1] eq 'FlyBase' && defined( $fields[2] ) && exists( $object_types{ $fields[2] } ) ) ) { ++$count_skipped; next; } # Go though each attribute (from the 9th field), split them up into
# key-value pairs and store them.
my %attributes; foreach my $attribute ( split( /;/, $fields[8] ) ) { my ( $key, $value ) = split( /=/, $attribute ); if ( $key ne '' && $value ne '' ) { $attributes{$key} = $value; } } my $id = $attributes{'ID'}; my $type; if ( substr( $id, 0, 4 ) eq 'FBgn' ) { $type = 'gene' } elsif ( substr( $id, 0, 4 ) eq 'FBtr' ) { $type = 'transcript' } elsif ( substr( $id, 0, 4 ) eq 'FBpp' ) { $type = 'translation' } else { $type = 'unknown' } # For the 'Dbxref' and 'Ontology_term' attributes, split them up on
# commas, divide into key-value pairs, and store them.
foreach my $attribute_key ( 'Dbxref', 'Ontology_term' ) { if ( exists( $attributes{$attribute_key} ) ) { my %tmphash; foreach my $subattribute ( split( /,/, $attributes{$attribute_key} ) ) { my ( $key, $value ) = split( /:/, $subattribute, 2 ); push( @{ $tmphash{$key} }, $value ); } # Replace the attribute entry with the hash.
$attributes{$attribute_key} =\% tmphash; } } my $dbxref = $attributes{'Dbxref'}; #-------------------------------------------------------------------
# Store Xrefs and Direct Xrefs for all the interesting Dbxref
# entries.
#-------------------------------------------------------------------
foreach my $dbxref_name ( keys( %{$dbxref} ) ) { if ( exists( $source_name_map{$dbxref_name} ) ) { my $source_name = $source_name_map{$dbxref_name}; my $source_id = $self->get_source_id_for_source_name($source_name); # Treat Uniprot and Interpro differently.
my ($pre_source) = ( $source_name =~ /^(Uniprot|Interpro)/ ); if ( defined($pre_source) ) { foreach my $accession ( @{ $dbxref->{$dbxref_name} } ) { if ( exists( $pre_xref_ids{$pre_source}{$accession} ) ) { $self->add_direct_xref( $pre_xref_ids{$pre_source}{$accession}, $id, $type, '' ); $xref_ids{$pre_source}{$accession} = $pre_xref_ids{$pre_source}{$accession}; } else { $xref_ids{ $pre_source . ' (missed)' }{$accession} = -1; } } } else { foreach my $accession ( @{ $dbxref->{$dbxref_name} } ) { my $xref_id; if ( exists( $xref_ids{$source_name}{$accession} ) ) { $xref_id = $xref_ids{$source_name}{$accession}; } else { # The Dbxref 'bdgpinsituexpr' needs case sensitivity, just
# like the FlyBase Names, so use the ID as the accession
# for this source.
if ( $dbxref_name eq 'bdgpinsituexpr' ) { $xref_id = $self->add_xref( $id, undef, $accession, '', $source_id, $species_id ); } else { $xref_id = $self->add_xref( $accession, undef, $accession, '', $source_id, $species_id ); } $xref_ids{$source_name}{$accession} = $xref_id; } $self->add_direct_xref( $xref_id, $id, $type, '' ); } } } ## end if ( exists( $source_name_map...
} ## end foreach my $dbxref_name ( keys...
#-------------------------------------------------------------------
# Store Xrefs and Direct Xrefs for the GO 'Ontology_term' entries.
#-------------------------------------------------------------------
if ( exists( $attributes{'Ontology_term'}{'GO'} ) ) { my $source_name = 'GO'; my $source_id = $self->get_source_id_for_source_name($source_name); foreach my $accession ( @{ $attributes{'Ontology_term'}{'GO'} } ) { my $xref_id; if ( exists( $xref_ids{$source_name}{$accession} ) ) { $xref_id = $xref_ids{$source_name}{$accession}; } else { $xref_id = $self->add_xref( $accession, undef, $accession, '', $source_id, $species_id ); $xref_ids{$source_name}{$accession} = $xref_id; } $self->add_direct_xref( $xref_id, $id, $type, '' ); } } #-------------------------------------------------------------------
# Store Xrefs and Direct Xrefs for the 'FlyBase_Annotation_IDs'
# Dbxref entry (depends on type of 'ID').
#-------------------------------------------------------------------
if ( exists( $dbxref->{'FlyBase_Annotation_IDs'} ) ) { my $source_name = $special_source_name_map{$type}{'Dbxref'}; my $source_id = $self->get_source_id_for_source_name($source_name); foreach my $accession ( @{ $dbxref->{'FlyBase_Annotation_IDs'} } ) { my $xref_id; if ( exists( $xref_ids{$source_name}{$accession} ) ) { $xref_id = $xref_ids{$source_name}{$accession}; } else { $xref_id = $self->add_xref( $accession, undef, $accession, '', $source_id, $species_id ); $xref_ids{$source_name}{$accession} = $xref_id; } $self->add_direct_xref( $xref_id, $id, $type, '' ); } } #-------------------------------------------------------------------
# Store Xref and Direct Xref for the 'Name' (depends on type of
# 'ID').
#-------------------------------------------------------------------
{ my $source_name = $special_source_name_map{$type}{'Name'}; my $source_id = $self->get_source_id_for_source_name($source_name); my $accession = $attributes{'Name'}; my $xref_id; if ( exists( $xref_ids{$source_name}{$accession} ) ) { $xref_id = $xref_ids{$source_name}{$accession}; } else { $xref_id = $self->add_xref( $id, undef, $accession, '', $source_id, $species_id ); $xref_ids{$source_name}{$accession} = $xref_id; } $self->add_direct_xref( $xref_id, $id, $type, '' ); } #-------------------------------------------------------------------
# Store Xref and Direct Xref for the 'ID' (depends on type of 'ID').
#-------------------------------------------------------------------
{ my $source_name = $special_source_name_map{$type}{'ID'}; my $source_id = $self->get_source_id_for_source_name($source_name); my $accession = $id; my $xref_id; if ( exists( $xref_ids{$source_name}{$accession} ) ) { $xref_id = $xref_ids{$source_name}{$accession}; } else { $xref_id = $self->add_xref( $accession, undef, $accession, '', $source_id, $species_id ); $xref_ids{$source_name}{$accession} = $xref_id; } $self->add_direct_xref( $xref_id, $id, $type, '' ); } } ## end while ( defined( my $line...
$data_io->close(); print("FlybaseParser Summary:\n") if($verbose); foreach my $label ( sort( keys(%xref_ids) ) ) { my $accessions = $xref_ids{$label}; printf( "\t%-32s %6d\n", $label, scalar( keys( %{$accessions} ) ) ) if($verbose); } } ## end sub run
1;
}
General documentation
No general documentation available.