XrefParser FlybaseParser
Included librariesPackage variablesGeneral documentationMethods
WebCvsRaw content
Package variables
Privates (from "my" definitions)
Included modules
No synopsis!
No description!
No description
No description
Methods description
None available.
Methods code
sub get_source_id_for_source_name {
  my $self = shift;
  my ($source_name) = @_;

  if ( !defined( $source_id{$source_name} ) ) {
    $source_id{$source_name} =

    printf( "source_id for source '%s' is %d\n",
            $source_name, $source_id{$source_name} ) if ($verbose);

  if ( !defined( $source_id{$source_name} )
       || $source_id{$source_name} < 0 )
       sprintf( "Can not find source_id for source '%s'", $source_name )

  return $source_id{$source_name};
sub run {
  my $self = shift if (defined(caller(1)));

  my $source_id = shift;
  my $species_id = shift;
  my $files       = shift;
  my $release_file   = shift;
  $verbose       = shift;

  my $data_file = @{$files}[0];

#  my $self = shift;
# my ( $source_id, $species_id, $data_file, $release_file ) = @_;
# Fetch hashes of already stored Uniprot and Interpro accessions.
my %pre_xref_ids = ( 'Uniprot' => $self->get_valid_codes( 'uniprot', $species_id ), 'Interpro' => $self->get_valid_codes( 'interpro', $species_id ) ); my %xref_ids; my $data_io = $self->get_filehandle($data_file); my ( $count_read, $count_skipped, $last_count_read ) = ( 0, 0, 0 ); my $status_interval = 30; local $SIG{ALRM} = sub { printf( "%d lines read, %d skipped, %d parsed; %d lines/s\n", $count_read, $count_skipped, $count_read - $count_skipped, ( $count_read - $last_count_read )/$status_interval ) if($verbose);
$last_count_read = $count_read; alarm($status_interval); }; alarm($status_interval); while ( defined( my $line = $data_io->getline() ) ) { ++$count_read; # Skip comment lines at the start of the file.
if ( substr( $line, 0, 1 ) eq '#' ) { ++$count_skipped; next } chomp($line); # Split each line into fields.
my @fields = split( /\t/, $line ); # Only pick out the interesting lines.
if ( !( defined( $fields[1] ) && $fields[1] eq 'FlyBase' && defined( $fields[2] ) && exists( $object_types{ $fields[2] } ) ) ) { ++$count_skipped; next; } # Go though each attribute (from the 9th field), split them up into
# key-value pairs and store them.
my %attributes; foreach my $attribute ( split( /;/, $fields[8] ) ) { my ( $key, $value ) = split( /=/, $attribute ); if ( $key ne '' && $value ne '' ) { $attributes{$key} = $value; } } my $id = $attributes{'ID'}; my $type; if ( substr( $id, 0, 4 ) eq 'FBgn' ) { $type = 'gene' } elsif ( substr( $id, 0, 4 ) eq 'FBtr' ) { $type = 'transcript' } elsif ( substr( $id, 0, 4 ) eq 'FBpp' ) { $type = 'translation' } else { $type = 'unknown' } # For the 'Dbxref' and 'Ontology_term' attributes, split them up on
# commas, divide into key-value pairs, and store them.
foreach my $attribute_key ( 'Dbxref', 'Ontology_term' ) { if ( exists( $attributes{$attribute_key} ) ) { my %tmphash; foreach my $subattribute ( split( /,/, $attributes{$attribute_key} ) ) { my ( $key, $value ) = split( /:/, $subattribute, 2 ); push( @{ $tmphash{$key} }, $value ); } # Replace the attribute entry with the hash.
$attributes{$attribute_key} =\% tmphash; } } my $dbxref = $attributes{'Dbxref'}; #-------------------------------------------------------------------
# Store Xrefs and Direct Xrefs for all the interesting Dbxref
# entries.
foreach my $dbxref_name ( keys( %{$dbxref} ) ) { if ( exists( $source_name_map{$dbxref_name} ) ) { my $source_name = $source_name_map{$dbxref_name}; my $source_id = $self->get_source_id_for_source_name($source_name); # Treat Uniprot and Interpro differently.
my ($pre_source) = ( $source_name =~ /^(Uniprot|Interpro)/ ); if ( defined($pre_source) ) { foreach my $accession ( @{ $dbxref->{$dbxref_name} } ) { if ( exists( $pre_xref_ids{$pre_source}{$accession} ) ) { $self->add_direct_xref( $pre_xref_ids{$pre_source}{$accession}, $id, $type, '' ); $xref_ids{$pre_source}{$accession} = $pre_xref_ids{$pre_source}{$accession}; } else { $xref_ids{ $pre_source . ' (missed)' }{$accession} = -1; } } } else { foreach my $accession ( @{ $dbxref->{$dbxref_name} } ) { my $xref_id; if ( exists( $xref_ids{$source_name}{$accession} ) ) { $xref_id = $xref_ids{$source_name}{$accession}; } else { # The Dbxref 'bdgpinsituexpr' needs case sensitivity, just
# like the FlyBase Names, so use the ID as the accession
# for this source.
if ( $dbxref_name eq 'bdgpinsituexpr' ) { $xref_id = $self->add_xref( $id, undef, $accession, '', $source_id, $species_id ); } else { $xref_id = $self->add_xref( $accession, undef, $accession, '', $source_id, $species_id ); } $xref_ids{$source_name}{$accession} = $xref_id; } $self->add_direct_xref( $xref_id, $id, $type, '' ); } } } ## end if ( exists( $source_name_map...
} ## end foreach my $dbxref_name ( keys...
# Store Xrefs and Direct Xrefs for the GO 'Ontology_term' entries.
if ( exists( $attributes{'Ontology_term'}{'GO'} ) ) { my $source_name = 'GO'; my $source_id = $self->get_source_id_for_source_name($source_name); foreach my $accession ( @{ $attributes{'Ontology_term'}{'GO'} } ) { my $xref_id; if ( exists( $xref_ids{$source_name}{$accession} ) ) { $xref_id = $xref_ids{$source_name}{$accession}; } else { $xref_id = $self->add_xref( $accession, undef, $accession, '', $source_id, $species_id ); $xref_ids{$source_name}{$accession} = $xref_id; } $self->add_direct_xref( $xref_id, $id, $type, '' ); } } #-------------------------------------------------------------------
# Store Xrefs and Direct Xrefs for the 'FlyBase_Annotation_IDs'
# Dbxref entry (depends on type of 'ID').
if ( exists( $dbxref->{'FlyBase_Annotation_IDs'} ) ) { my $source_name = $special_source_name_map{$type}{'Dbxref'}; my $source_id = $self->get_source_id_for_source_name($source_name); foreach my $accession ( @{ $dbxref->{'FlyBase_Annotation_IDs'} } ) { my $xref_id; if ( exists( $xref_ids{$source_name}{$accession} ) ) { $xref_id = $xref_ids{$source_name}{$accession}; } else { $xref_id = $self->add_xref( $accession, undef, $accession, '', $source_id, $species_id ); $xref_ids{$source_name}{$accession} = $xref_id; } $self->add_direct_xref( $xref_id, $id, $type, '' ); } } #-------------------------------------------------------------------
# Store Xref and Direct Xref for the 'Name' (depends on type of
# 'ID').
{ my $source_name = $special_source_name_map{$type}{'Name'}; my $source_id = $self->get_source_id_for_source_name($source_name); my $accession = $attributes{'Name'}; my $xref_id; if ( exists( $xref_ids{$source_name}{$accession} ) ) { $xref_id = $xref_ids{$source_name}{$accession}; } else { $xref_id = $self->add_xref( $id, undef, $accession, '', $source_id, $species_id ); $xref_ids{$source_name}{$accession} = $xref_id; } $self->add_direct_xref( $xref_id, $id, $type, '' ); } #-------------------------------------------------------------------
# Store Xref and Direct Xref for the 'ID' (depends on type of 'ID').
{ my $source_name = $special_source_name_map{$type}{'ID'}; my $source_id = $self->get_source_id_for_source_name($source_name); my $accession = $id; my $xref_id; if ( exists( $xref_ids{$source_name}{$accession} ) ) { $xref_id = $xref_ids{$source_name}{$accession}; } else { $xref_id = $self->add_xref( $accession, undef, $accession, '', $source_id, $species_id ); $xref_ids{$source_name}{$accession} = $xref_id; } $self->add_direct_xref( $xref_id, $id, $type, '' ); } } ## end while ( defined( my $line...
$data_io->close(); print("FlybaseParser Summary:\n") if($verbose); foreach my $label ( sort( keys(%xref_ids) ) ) { my $accessions = $xref_ids{$label}; printf( "\t%-32s %6d\n", $label, scalar( keys( %{$accessions} ) ) ) if($verbose); } } ## end sub run
General documentation
No general documentation available.