XrefParser Flybase_dmel_GFFv3_Parser
Included librariesPackage variablesGeneral documentationMethods
Toolbar
WebCvsRaw content
Package variables
Privates (from "my" definitions)
%cache_source = ()
$verbose;
Included modules
Bio::EnsEMBL::Utils::Exception
File::Basename
POSIX qw ( strftime )
Inherit
XrefParser::BaseParser
Synopsis
No synopsis!
Description
No description!
Methods
add_direct_xref
No description
Code
add_synonym
No description
Code
add_xref
No description
Code
create_xrefs
No description
Code
direct_xrefs
No description
Code
external_source_db_nameDescriptionCode
gene_types
No description
Code
get_fields
No description
Code
get_source
No description
Code
get_species
No description
Code
get_synonyms
No description
Code
gff_dbxref
No description
Code
gff_name
No description
Code
gff_object_typesDescriptionCode
gff_ontology
No description
Code
gff_synonym
No description
Code
line_contains_object_to_process
No description
Code
make_dbxref_xref
No description
Code
make_id_xref
No description
Code
make_name_xref
No description
Code
new
No description
Code
relink_synonyms_to_xrefs
No description
Code
run
No description
Code
set_ensembl_object_type
No description
Code
set_flybase_synonyms
No description
Code
source_name_affymetrix
No description
Code
source_name_bdgpinsituexpr
No description
Code
source_name_dedb
No description
Code
source_name_dgrc1
No description
Code
source_name_dgrc2
No description
Code
source_name_drosdel
No description
Code
source_name_drsc
No description
Code
source_name_epd
No description
Code
source_name_fban
No description
Code
source_name_fbgn
No description
Code
source_name_fbpp
No description
Code
source_name_fbtr
No description
Code
source_name_flygrid
No description
Code
source_name_flyreg
No description
Code
source_name_gadfly_gene
No description
Code
source_name_gadfly_transcript
No description
Code
source_name_gadfly_translation
No description
Code
source_name_gb
No description
Code
source_name_gbprotein
No description
Code
source_name_gcr
No description
Code
source_name_genomeRNAi
No description
Code
source_name_gi
No description
Code
source_name_go
No description
Code
source_name_hybrigenics
No description
Code
source_name_if
No description
Code
source_name_interpro
No description
Code
source_name_merops
No description
Code
source_name_miRBase
No description
Code
source_name_mitodrome
No description
Code
source_name_name
No description
Code
source_name_name_prefix
No description
Code
source_name_nrl3d
No description
Code
source_name_pdb
No description
Code
source_name_prefix_ensAGgene
No description
Code
source_name_prefix_ensAMgene
No description
Code
source_name_prefix_ensCEgene
No description
Code
source_name_prefix_ensCFgene
No description
Code
source_name_prefix_ensDMgene
No description
Code
source_name_prefix_ensDRgene
No description
Code
source_name_prefix_ensFRgene
No description
Code
source_name_prefix_ensGGgene
No description
Code
source_name_prefix_ensHSgene
No description
Code
source_name_prefix_ensMMgene
No description
Code
source_name_prefix_ensPTgene
No description
Code
source_name_prefix_ensRNgene
No description
Code
source_name_prefix_ensTNgene
No description
Code
source_name_prefix_modCBgene
No description
Code
source_name_prefix_modCEgene
No description
Code
source_name_prefix_modDDgene
No description
Code
source_name_rfam
No description
Code
source_name_synonym
No description
Code
source_name_tf
No description
Code
source_name_uniprotsp
No description
Code
source_name_uniprottr
No description
Code
species_id
No description
Code
synonyms
No description
Code
transcript_types
No description
Code
translation_types
No description
Code
xrefs
No description
Code
Methods description
external_source_db_namecode    nextTop
  Title       : external_source_db_name
Usage : $obj->external_source_db_name(external db name)
Function : returns name of hardcoded external source db name
Arguments : external db name
Return-Val : string
gff_object_typescodeprevnextTop
  Title       : gff_object_types
Usage : $obj->gff_object_types(array-ref)
Function : contains gff-type-identifiers of gff-objects which have to be processed
Arguments : array-ref
Return-Val : array-ref
Methods code
add_direct_xrefdescriptionprevnextTop
sub add_direct_xref {
    my ($self,$dr) = @_;

    push @{$self->direct_xrefs() }, $dr;
    return;
}
add_synonymdescriptionprevnextTop
sub add_synonym {
  my ($self,$unique_id,$synref) = @_;
  #print "adding synonym for -$unique_id-:".join(" " , @$synref)."\n" ; ;
${$self->synonyms}{$unique_id}=$synref if($synref); return;
}
add_xrefdescriptionprevnextTop
sub add_xref {
    my ($self,$add_xref) = @_;
    push @{$self->xrefs() }, $add_xref;
    return;
}
create_xrefsdescriptionprevnextTop
sub create_xrefs {
  my ($self, $flybase_source_id, $file) = @_;

  print STDERR "starting to parse $file...." if($verbose);

  my $gff_io = $self->get_filehandle($file);

  if ( !defined $gff_io ) {
    print STDERR "ERROR: Can't open the GFF file $file\n";
    return 0;
  }

  while ( $_ = $gff_io->getline() ) {
    chomp;
    my @col = split /\t/;
    if($col[3]){

      # test if line contains information for object wanted (CDS,mRNA,gene,..)
if ( $self->line_contains_object_to_process( $col[2] ) ){ # work out if we have a gene, transcript or translation
my $type = $self->set_ensembl_object_type($col[2]); # the 9th column contains all the attributes
my @desc = split /\;/,$col[8]; # the ID= is always the first element of this array
my $unique_id = shift @desc; if(!$unique_id=~m/ID=/){ throw("parse-error: There seems to be no Identifier: $unique_id. Suspicous!"); # print "parse-error: There seems to be no Identifier: $unique_id. Suspicous!";
# return 0;
} # for a gene, this will be FBgn, for a transcript this will be FBtr, etc
$unique_id =~s/ID=//g;
$self->make_id_xref($unique_id,$type); # set up xref-entry for EVERY single item
foreach my $item (@desc) { $self->set_flybase_synonyms($item,$unique_id); # make all xrefs for type "Name=" in desc-field
# these are FlyBaseName_gene for genes, FlyBaseName_transcript for transcripts, etc
$self->make_name_xref($item,$unique_id,$type); # make all xrefs for type "Name=" in desc-field
$self->make_dbxref_xref($item,$unique_id,$type); } } } # we don't want to read the line otherwise
} # while ( $_ = $gff_io->getline() ) {
$gff_io->close(); return 1;
}
direct_xrefsdescriptionprevnextTop
sub direct_xrefs {
  my $self = shift;

  $self->{_direct_xrefs} = shift if @_ ;
  return $self->{_direct_xrefs};
}
external_source_db_namedescriptionprevnextTop
sub external_source_db_name {
  my $self = shift;

  $self->{_external_source_db_name} = shift if @_ ;
  return $self->{_external_source_db_name};
}



# --------------------------------------------------------------------------------
# Get species (id and name) from file
# For UniProt files the filename is the taxonomy ID
}
gene_typesdescriptionprevnextTop
sub gene_types {
  my $self = shift;

  $self->{_gene_types} = shift if @_ ;
  return $self->{_gene_types};
}
get_fieldsdescriptionprevnextTop
sub get_fields {
  my ($item,$target) =@_;

  my @entrys;
  if ($item =~m/$target/){
    $item =~s/$target//g;
# check if there is more than one synonym / dbxref ...
if ($item =~/,/){ @entrys = split (/\,/,$item); } else{ push @entrys, $item; } return\@ entrys; # if the item does not hold information of specific field
}else{ return undef; }
}
get_sourcedescriptionprevnextTop
sub get_source {
  my ($self, $name) =@_;

  if(!defined($cache_source{$name})){
    $cache_source{$name} = XrefParser::BaseParser->get_source_id_for_source_name($name)
  }

  return $cache_source{$name};
}
get_speciesdescriptionprevnextTop
sub get_species {
  my ($file) = @_;
  my ($taxonomy_id, $extension) = split(/\./, basename($file));
  my $sth = XrefParser::BaseParser->dbi()->prepare("SELECT species_id,name FROM species WHERE taxonomy_id=?");
  $sth->execute($taxonomy_id);
  my ($species_id, $species_name);
  while(my @row = $sth->fetchrow_array()) {
    $species_id = $row[0];
    $species_name = $row[1];
  }
  $sth->finish;

  if (defined $species_name) {
    print "Taxonomy ID " . $taxonomy_id . " corresponds to species ID " . $species_id . " name " . $species_name . "\n" if($verbose);
  } else {
    throw("Cannot find species corresponding to taxonomy ID " . $species_id . " - check species table\n");
  }

  return ($species_id, $species_name);
}
get_synonymsdescriptionprevnextTop
sub get_synonyms {
  my ($self,$unique_id) = @_;

  return ${$self->synonyms}{$unique_id};
}
gff_dbxrefdescriptionprevnextTop
sub gff_dbxref {
  my $self = shift;
  $self->{_gff_dbxref} = shift if @_ ;
  return $self->{_gff_dbxref};
}
gff_namedescriptionprevnextTop
sub gff_name {
  my $self = shift;
  $self->{_gff_name} = shift if @_ ;
  return $self->{_gff_name};
}
gff_object_typesdescriptionprevnextTop
sub gff_object_types {
  my $self = shift;

  $self->{_gff_object_types} = shift if @_ ;
  return $self->{_gff_object_types};
}
gff_ontologydescriptionprevnextTop
sub gff_ontology {
  my $self = shift;
  $self->{_gff_ontology} = shift if @_ ;
  return $self->{_gff_ontology};
}
gff_synonymdescriptionprevnextTop
sub gff_synonym {
  my $self = shift;
  $self->{_gff_synonym} = shift if @_ ;
  return $self->{_gff_synonym};
}
line_contains_object_to_processdescriptionprevnextTop
sub line_contains_object_to_process {
  my ($self,$type_of_line) = @_;  # shoud be mRNA, gene, pseudogene, CDS,...
for my $check_types ( @{$self->gff_object_types}) { if ($check_types =~/^$type_of_line$/){ return 1; } } return 0;
}
make_dbxref_xrefdescriptionprevnextTop
sub make_dbxref_xref {
  my ($self,$item,$unique_id,$type) = @_;
  # item = attribute 
# unique_id = ID
# type = gene, transcript, translation
my ($xref); my $tg1 = $self->gff_dbxref ; my $tg2 = $self->gff_ontology; if ($item=~/$tg1/ || $item=~/$tg2/){ # Dbxref=
# split the xrefs up into a list
my $dbx1 = get_fields($item,$tg1); my @dbx; push @dbx, @{$dbx1} if $dbx1; foreach my $dbx (@dbx) { my $src_id = undef; my $source_type = undef; if ($dbx =~m/FlyBase:/){ $dbx =~s/FlyBase://g;
if($dbx=~m/FBgn/ and $type eq "gene"){ $src_id = $self->get_source($self->source_name_fbgn); }elsif ($dbx =~m/FBtr/ and $type eq "transcript"){ $src_id = $self->get_source($self->source_name_fbtr); }elsif ($dbx =~m/FBpp/ and $type eq "translation"){ $src_id = $self->get_source($self->source_name_fbpp); }elsif ($dbx =~m/FBan/){ $src_id = $self->get_source($self->source_name_fban); } }elsif($dbx =~m/FlyBase_Annotation_IDs:/){ $dbx =~s/FlyBase_Annotation_IDs://g;
if($type eq "gene"){ $src_id = $self->get_source($self->source_name_gadfly_gene) ; } elsif($type eq "translation"){ $src_id = $self->get_source($self->source_name_gadfly_translation); } elsif($type eq "transcript"){ $src_id = $self->get_source($self->source_name_gadfly_transcript); } } elsif ($dbx =~m/Affymetrix:/) { $dbx =~s/Affymetrix://g;
$src_id = $self->get_source($self->source_name_affymetrix) ; } elsif ($dbx =~m/DGRC-1:/) { $dbx =~s/DGRC-1://g;
$src_id = $self->get_source($self->source_name_dgrc1) ; } elsif ($dbx =~m/DGRC-2:/) { $dbx =~s/DGRC-2://g;
$src_id = $self->get_source($self->source_name_dgrc2); } elsif ($dbx =~m/DRSC:/) { $dbx =~s/DRSC://g;
$src_id = $self->get_source($self->source_name_drsc); } elsif ($dbx =~m/EPD:/) { $dbx =~s/EPD://g;
$src_id = $self->get_source($self->source_name_epd); } elsif ($dbx =~m/FlyReg:/) { $dbx =~s/FlyReg://g;
$src_id = $self->get_source($self->source_name_flyreg); } elsif ($dbx =~m/GB:/) { $dbx =~s/GB://g;
$src_id = $self->get_source($self->source_name_gb); } elsif ($dbx =~m/GB_protein:/) { $dbx =~s/GB_protein://g;
$src_id = $self->get_source($self->source_name_gbprotein); } elsif ($dbx =~m/GCR:/) { $dbx =~s/GCR://g;
$src_id = $self->get_source($self->source_name_gcr); } elsif ($dbx =~m/GI:/) { $dbx =~s/GI://g;
$src_id = $self->get_source($self->source_name_gi); } elsif ($dbx =~m/GO:/) { # this is an ontology_term
$dbx =~s/GO://g;
$src_id = $self->get_source($self->source_name_go); } elsif ($dbx =~m/GenomeRNAi:/) { $dbx =~s/GenomeRNAi://g;
$src_id = $self->get_source($self->source_name_genomeRNAi); } elsif ($dbx =~m/INTERPRO:/) { $dbx =~s/INTERPRO://g;
$src_id = $self->get_source($self->source_name_interpro); } elsif ($dbx =~m/MEROPS:/) { $dbx =~s/MEROPS://g;
$src_id = $self->get_source($self->source_name_merops); } elsif ($dbx =~m/MIR:/) { $dbx =~s/MIR://g;
$src_id = $self->get_source($self->source_name_miRBase); } elsif ($dbx =~m/MITODROME:/) { $dbx =~s/MITODROME://g;
$src_id = $self->get_source($self->source_name_mitodrome); } elsif ($dbx =~m/NRL_3D:/) { $dbx =~s/NRL_3D://g;
$src_id = $self->get_source($self->source_name_nrl3d); } elsif ($dbx =~m/PDB:/) { $dbx =~s/PDB://g;
$src_id = $self->get_source($self->source_name_pdb); } elsif ($dbx =~m/Rfam:/) { $dbx =~s/Rfam://g;
$src_id = $self->get_source($self->source_name_rfam); } elsif ($dbx =~m/SO:/) { # do nothing, we don't collect these
} elsif ($dbx =~m/TF:/) { $dbx =~s/TF://g;
$src_id = $self->get_source($self->source_name_tf); } elsif ($dbx =~m/UniProt\/Swiss-Prot:/) { $dbx =~s/UniProt\/Swiss-Prot://g;
$src_id = $self->get_source($self->source_name_uniprotsp); } elsif ($dbx =~m/UniProt\/TrEMBL:/) { $dbx =~s/UniProt\/TrEMBL://g;
$src_id = $self->get_source($self->source_name_uniprottr); } elsif ($dbx =~m/bdgpinsituexpr:/) { $dbx =~s/bdgpinsituexpr://g;
$src_id = $self->get_source($self->source_name_bdgpinsituexpr); } elsif ($dbx =~m/dedb:/) { $dbx =~s/dedb://g;
$src_id = $self->get_source($self->source_name_dedb); } elsif ($dbx =~m/drosdel:/) { $dbx =~s/drosdel://g;
$src_id = $self->get_source($self->source_name_drosdel); } elsif ($dbx =~m/flygrid:/) { $dbx =~s/flygrid://g;
$src_id = $self->get_source($self->source_name_flygrid); } elsif ($dbx =~m/hybrigenics:/) { $dbx =~s/hybrigenics://g;
$src_id = $self->get_source($self->source_name_hybrigenics); } elsif ($dbx =~m/if:/) { $dbx =~s/if://g;
$src_id = $self->get_source($self->source_name_if); } elsif ($dbx =~m/orthologs:ensAG:/) { $dbx =~s/orthologs://g;
$src_id = $self->get_source($self->source_name_prefix_ensAGgene); } elsif ($dbx =~m/orthologs:ensAM:/) { $dbx =~s/orthologs://g;
$src_id = $self->get_source($self->source_name_prefix_ensAMgene); } elsif ($dbx =~m/orthologs:ensCE:/) { $dbx =~s/orthologs://g;
$src_id = $self->get_source($self->source_name_prefix_ensCEgene); } elsif ($dbx =~m/orthologs:ensCF:/) { $dbx =~s/orthologs://g;
$src_id = $self->get_source($self->source_name_prefix_ensCFgene); } elsif ($dbx =~m/orthologs:ensDM:/) { $dbx =~s/orthologs://g;
$src_id = $self->get_source($self->source_name_prefix_ensDMgene); } elsif ($dbx =~m/orthologs:ensDR:/) { $dbx =~s/orthologs://g;
$src_id = $self->get_source($self->source_name_prefix_ensDRgene); } elsif ($dbx =~m/orthologs:ensFR:/) { $dbx =~s/orthologs://g;
$src_id = $self->get_source($self->source_name_prefix_ensFRgene); } elsif ($dbx =~m/orthologs:ensGG:/) { $dbx =~s/orthologs://g;
$src_id = $self->get_source($self->source_name_prefix_ensGGgene); } elsif ($dbx =~m/orthologs:ensHS:/) { $dbx =~s/orthologs://g;
$src_id = $self->get_source($self->source_name_prefix_ensHSgene); } elsif ($dbx =~m/orthologs:ensMM:/) { $dbx =~s/orthologs://g;
$src_id = $self->get_source($self->source_name_prefix_ensMMgene); } elsif ($dbx =~m/orthologs:ensPT:/) { $dbx =~s/orthologs://g;
$src_id = $self->get_source($self->source_name_prefix_ensPTgene); } elsif ($dbx =~m/orthologs:ensRN:/) { $dbx =~s/orthologs://g;
$src_id = $self->get_source($self->source_name_prefix_ensRNgene); } elsif ($dbx =~m/orthologs:ensTN:/) { $dbx =~s/orthologs://g;
$src_id = $self->get_source($self->source_name_prefix_ensTNgene); } elsif ($dbx =~m/orthologs:modCB:/) { $dbx =~s/orthologs://g;
$src_id = $self->get_source($self->source_name_prefix_modCBgene); } elsif ($dbx =~m/orthologs:modCE:/) { $dbx =~s/orthologs://g;
$src_id = $self->get_source($self->source_name_prefix_modCEgene); } elsif ($dbx =~m/orthologs:modDD:/) { $dbx =~s/orthologs://g;
$src_id = $self->get_source($self->source_name_prefix_modDDgene); } else { warning("Dbxref type not recognised : $dbx"); } if ($src_id){ # only add xref entry for FBgn FBtr...
my $xref ; $xref->{ACCESSION} = $dbx ; $xref->{LABEL} = $dbx; $xref->{SOURCE_ID} = $src_id; $xref->{SPECIES_ID} = $self->species_id(); #$xref->{SYNONYMS} = $self->get_synonyms($unique_id);
$self->add_xref($xref); if ($type){ my $direct_xref; $direct_xref = $xref ; $direct_xref->{ENSEMBL_STABLE_ID} = $unique_id; $direct_xref->{ENSEMBL_TYPE} = $type; #$direct_xref->{LINKAGE_XREF}=undef;
$self->add_direct_xref($direct_xref) if $type ; } } } return; }
}
make_id_xrefdescriptionprevnextTop
sub make_id_xref {
  my ($self,$unique_id,$type) = @_;
  my $xref=undef;

  # make an xref
$xref->{ACCESSION} = $unique_id; $xref->{LABEL} = $unique_id; $xref->{SPECIES_ID} = $self->species_id(); $xref->{SYNONYMS} = $self->get_synonyms($unique_id); my $type_s = $type; if ($type eq "gene") { $type_s = $self->source_name_fbgn(); } elsif ($type eq "transcript") { $type_s = $self->source_name_fbtr(); } elsif ($type eq "translation") { $type_s = $self->source_name_fbpp(); } else { throw ("Type $type not recognised"); } $xref->{SOURCE_ID} = $self->get_source($type_s); $self->add_xref($xref); # only allow Name on genes. This is a fix for Biomart really.
if (defined($xref) and $type){ my $direct_xref; $direct_xref = $xref ; $direct_xref->{ENSEMBL_STABLE_ID} = $unique_id; $direct_xref->{ENSEMBL_TYPE} = $type; $direct_xref->{LINKAGE_TYPE}='bla'; $direct_xref->{SYNONYMS} = $self->get_synonyms($unique_id); $self->add_direct_xref($direct_xref); } return;
}
make_name_xrefdescriptionprevnextTop
sub make_name_xref {
  my ($self,$item,$unique_id,$type) = @_;
  my $xref=undef;
  my $target = $self->gff_name ;
  if($item=~m/$target/){  ##Name=
#print "having $$gff_gene_name[0]\n" ;
# remove the Name= bit and split the names on a ','
my $gff_gene_name = get_fields ( $item, $target ) ; throw("there is more than one id for item $item\n") if $$gff_gene_name[1]; $xref->{ACCESSION} = $$gff_gene_name[0]; $xref->{LABEL} = $$gff_gene_name[0]; $xref->{SPECIES_ID} = $self->species_id(); $xref->{SYNONYMS} = $self->get_synonyms($unique_id); my $type_s = $type; if($type eq "translation"){ $type_s = $type."s"; } $xref->{SOURCE_ID} = $self->get_source($self->source_name_name_prefix().$type_s); $self->add_xref($xref); } # only allow Name on genes. This is a fix for Biomart really.
if (defined($xref) and $type){ my $direct_xref; $direct_xref = $xref ; $direct_xref->{ENSEMBL_STABLE_ID} = $unique_id; $direct_xref->{ENSEMBL_TYPE} = $type; $direct_xref->{LINKAGE_TYPE}='bla'; $direct_xref->{SYNONYMS} = $self->get_synonyms($unique_id); $self->add_direct_xref($direct_xref); } return;
}
newdescriptionprevnextTop
sub new {
  my $proto = shift;
  my $self = $proto->SUPER::new(@_);

  $self->external_source_db_name('flybase_gff');

  #  my @gff_obj =qw( CDS exon gene mRNA ncRNA pseudogene rRNA snRNA snoRNA tRNA );
# this array may need to change between releases so check that it's updated
my @gff_obj =qw( gene mRNA ncRNA snRNA tRNA rRNA pseudogene snoRNA miRNA); $self->gff_object_types(\@gff_obj); #
# hard-coded field separators out of gff
#
$self->gff_name("Name="); $self->gff_ontology("Ontology_term="); $self->gff_synonym("Alias="); $self->gff_dbxref("Dbxref="); #
# hard-coded source-names for different objects out of ./sql/populate_metadata.sql
#
# For Alias
$self->source_name_synonym('flybase_synonym'); # source for any Alias
# For Name
$self->source_name_name_prefix('FlyBaseName_'); # source for any Name
# For Dbxref
$self->source_name_fbgn('flybase_gene_id'); # source-name for ID=FBgn
$self->source_name_fbtr('flybase_transcript_id'); # source-name for ID=FBtr
$self->source_name_fbpp('flybase_polypeptide_id'); # source-name for ID=FBpp
$self->source_name_fban('flybase_annotation_id'); # source-name for ID=FBan
$self->source_name_gadfly_gene('gadfly_gene_cgid'); # For Dbxref=FlyBase_Annotation_IDs
$self->source_name_gadfly_transcript('gadfly_transcript_cgid'); # For Dbxref=FlyBase_Annotation_IDs
$self->source_name_gadfly_translation('gadfly_translation_cgid'); # For Dbxref=FlyBase_Annotation_IDs
$self->source_name_affymetrix('AFFY_DrosGenome1'); # For Dbxref=Affymetrix
$self->source_name_dgrc1('DGRC-1'); # For Dbxref=DGRC-1
$self->source_name_dgrc2('DGRC-2'); # For Dbxref=DGRC-2
$self->source_name_drsc('DRSC'); # For Dbxref=DRSC
$self->source_name_epd('EPD'); # For Dbxref=EPD
$self->source_name_flyreg('FlyReg'); # For Dbxref=FlyReg
$self->source_name_gb('EMBL'); # For Dbxref=GB
$self->source_name_gbprotein('protein_id'); # For Dbxref=GB_protein
$self->source_name_gcr('GPCR'); # For Dbxref=GCR
$self->source_name_gi('GI'); # For Dbxref=GI
$self->source_name_go('GO'); # For Dbxref=GO
$self->source_name_genomeRNAi('GenomeRNAi'); # For Dbxref=GenomeRNAi
$self->source_name_interpro('Interpro'); # For Dbxref=INTERPRO
$self->source_name_merops('MEROPS'); # For Dbxref=MEROPS
$self->source_name_miRBase('miRBase'); # For Dbxref=miRBase
$self->source_name_mitodrome('MitoDrome'); # For Dbxref=MitoDrome
$self->source_name_nrl3d('PDB'); # For Dbxref=NRL_3D
$self->source_name_pdb('PDB'); # For Dbxref=PDB
$self->source_name_rfam('RFAM'); # For Dbxref=Rfam
$self->source_name_tf('TransFac'); # For Dbxref=TF
$self->source_name_uniprotsp('Uniprot/SWISSPROT'); # For Dbxref=UniProt/Swiss-Prot
$self->source_name_uniprottr('Uniprot/SPTREMBL'); # For Dbxref=UniProt/TrEMBL
$self->source_name_bdgpinsituexpr('BDGP_insitu_expr'); # For Dbxref=bdgpinsituexpr
$self->source_name_dedb('DEDb'); # For Dbxref=dedb
$self->source_name_drosdel('DrosDel'); # For Dbxref=drosdel
$self->source_name_flygrid('FlyGrid'); # For Dbxref=flygrid
$self->source_name_hybrigenics('hybrigenics'); # For Dbxref=hybrigenics
$self->source_name_if('InteractiveFly'); # For Dbxref=if
$self->source_name_prefix_ensAGgene('Ens_Ag_gene'); # For Dbxref=ensAG
$self->source_name_prefix_ensAMgene('Ens_Am_gene'); # For Dbxref=ensAM
$self->source_name_prefix_ensCEgene('Ens_Ce_gene'); # For Dbxref=ensCE
$self->source_name_prefix_ensCFgene('Ens_Cf_gene'); # For Dbxref=ensCF
$self->source_name_prefix_ensDMgene('Ens_Dm_gene'); # For Dbxref=ensDM
$self->source_name_prefix_ensDRgene('Ens_Dr_gene'); # For Dbxref=ensDR
$self->source_name_prefix_ensFRgene('Ens_Fr_gene'); # For Dbxref=ensFR
$self->source_name_prefix_ensGGgene('Ens_Gg_gene'); # For Dbxref=ensGG
$self->source_name_prefix_ensHSgene('Ens_Hs_gene'); # For Dbxref=ensHS
$self->source_name_prefix_ensMMgene('Ens_Mm_gene'); # For Dbxref=ensMM
$self->source_name_prefix_ensPTgene('Ens_Pt_gene'); # For Dbxref=ensPT
$self->source_name_prefix_ensRNgene('Ens_Rn_gene'); # For Dbxref=ensRN
$self->source_name_prefix_ensTNgene('Ens_Tn_gene'); # For Dbxref=ensTN
$self->source_name_prefix_modCBgene('modCB_gene'); # For Dbxref=modCB
$self->source_name_prefix_modCEgene('modCE_gene'); # For Dbxref=modCE
$self->source_name_prefix_modDDgene('modDD_gene'); # For Dbxref=modDD
my @gene_types = qw (gene) ; my @translation_types = qw (protein); # The transcript_types may change from release to release so check that this list is up-to-date
my @transcript_types = qw (mRNA ncRNA snRNA tRNA rRNA pseudogene snoRNA miRNA); $self->gene_types(\@gene_types) ; $self->translation_types(\@translation_types) ; $self->transcript_types(\@transcript_types) ; $self->{'_xrefs'}=[]; $self->{'_direct_xrefs'}=[]; $self->{'_synonyms'}={}; return $self; } # --------------------------------------------------------------------------------
# large number of calls to SQL should now be speeded up as cached.
}
relink_synonyms_to_xrefsdescriptionprevnextTop
sub relink_synonyms_to_xrefs {
  my $self = shift;
  foreach my $x (@{$self->xrefs} ){
    my $src_name = XrefParser::BaseParser->get_source_name_for_source_id($x->{SOURCE_ID});
    if ($src_name =~ m/^FlyBaseName_/ || $src_name =~ m/^flybase_.*_id$/) {
$x->{SYNONYMS} = $self->get_synonyms($x->{ENSEMBL_STABLE_ID});
} } } # --------------------------------------------------------------------------------
# Parse file into array of xref objects
# parse
}
rundescriptionprevnextTop
sub run {
  my $self = shift if (defined(caller(1)));

  my $source_id = shift;
  my $species_id = shift;
  my $files       = shift;
  my $release_file   = shift;
  $verbose       = shift;

  my $file = @{$files}[0];
#   my $self = shift if (defined(caller(1)));
# my $source_id = shift;
# my $species_id = shift;
# my $file = shift;
my $species_name; if(!defined($species_id)){ ($species_id, $species_name) = get_species($file); } $self->species_id($species_id) ; my $external_source_db_name = $self->external_source_db_name() ; my $flybase_source_id = $self->get_source($external_source_db_name); if(!$self->create_xrefs($flybase_source_id, $file)){ return 1; } my @xrefs = @{$self->xrefs}; $self->relink_synonyms_to_xrefs(); my @direct_xrefs = @{ $self->direct_xrefs } ; # delete previous if running directly rather than via BaseParser
if (!defined(caller(1))) { print "Deleting previous xrefs for these sources\n" if($verbose); XrefParser::BaseParser->delete_by_source(\@xrefs); } print "... parsed.\n" if($verbose); print STDERR "uploading ".scalar(@xrefs)." xrefs's\n" if($verbose); XrefParser::BaseParser->upload_xref_object_graphs(\@xrefs); print STDERR "uploading ".scalar(@direct_xrefs)." direct-xrefs's\n" if($verbose); XrefParser::BaseParser->upload_direct_xrefs(\@direct_xrefs); return 0;
}
set_ensembl_object_typedescriptionprevnextTop
sub set_ensembl_object_type {
  my ($self,$t) = @_ ; # $t is identifier in gff for object : CDS,mRNA,gene,pseudogene,snRNA,....
for my $hc (@{ $self->gene_types } ){ if ($t=~m/^$hc$/){ return 'gene'; } } for my $hc (@{ $self->translation_types } ){ if ($t=~m/^$hc$/){ return 'translation'; } } for my $hc (@{ $self->transcript_types} ){ if ($t=~m/^$hc$/){ return 'transcript'; } }
}
set_flybase_synonymsdescriptionprevnextTop
sub set_flybase_synonyms {
  my ($self,$item,$unique_id) = @_; 
  my $syn1 = $self->gff_synonym;

  if ($item=~/$syn1/){
    my $s1 = get_fields($item,$syn1);
    my @syns;
    push @syns, @{$s1} if $s1;
    $self->add_synonym($unique_id,\@syns);
    return\@ syns;
  }
  return undef;
}
source_name_affymetrixdescriptionprevnextTop
sub source_name_affymetrix {
  my $self = shift;
  $self->{_sn_affymetrix} = shift if @_ ;
  return $self->{_sn_affymetrix};
}
source_name_bdgpinsituexprdescriptionprevnextTop
sub source_name_bdgpinsituexpr {
  my $self = shift;
  $self->{_sn_bdgpinsituexpr} = shift if @_ ;
  return $self->{_sn_bdgpinsituexpr};
}
source_name_dedbdescriptionprevnextTop
sub source_name_dedb {
  my $self = shift;
  $self->{_sn_dedb} = shift if @_ ;
  return $self->{_sn_dedb};
}
source_name_dgrc1descriptionprevnextTop
sub source_name_dgrc1 {
  my $self = shift;
  $self->{_sn_dgrc1} = shift if @_ ;
  return $self->{_sn_dgrc1};
}
source_name_dgrc2descriptionprevnextTop
sub source_name_dgrc2 {
  my $self = shift;
  $self->{_sn_dgrc2} = shift if @_ ;
  return $self->{_sn_dgrc2};
}
source_name_drosdeldescriptionprevnextTop
sub source_name_drosdel {
  my $self = shift;
  $self->{_sn_drosdel} = shift if @_ ;
  return $self->{_sn_drosdel};
}
source_name_drscdescriptionprevnextTop
sub source_name_drsc {
  my $self = shift;
  $self->{_sn_drsc} = shift if @_ ;
  return $self->{_sn_drsc};
}
source_name_epddescriptionprevnextTop
sub source_name_epd {
  my $self = shift;
  $self->{_sn_epd} = shift if @_ ;
  return $self->{_sn_epd};
}
source_name_fbandescriptionprevnextTop
sub source_name_fban {
  my $self = shift;

  $self->{_sn_fban} = shift if @_ ;
  return $self->{_sn_fban};
}
source_name_fbgndescriptionprevnextTop
sub source_name_fbgn {
  my $self = shift;

  $self->{_source_name_gene} = shift if @_ ;
  return $self->{_source_name_gene};
}
source_name_fbppdescriptionprevnextTop
sub source_name_fbpp {
  my $self = shift;

  $self->{_source_name_fbpp} = shift if @_ ;
  return $self->{_source_name_fbpp};
}
source_name_fbtrdescriptionprevnextTop
sub source_name_fbtr {
  my $self = shift;

  $self->{_source_name_transcript} = shift if @_ ;
  return   $self->{_source_name_transcript}  ;
}
source_name_flygriddescriptionprevnextTop
sub source_name_flygrid {
  my $self = shift;
  $self->{_sn_flygrid} = shift if @_ ;
  return $self->{_sn_flygrid};
}
source_name_flyregdescriptionprevnextTop
sub source_name_flyreg {
  my $self = shift;
  $self->{_sn_flyreg} = shift if @_ ;
  return $self->{_sn_flyreg};
}
source_name_gadfly_genedescriptionprevnextTop
sub source_name_gadfly_gene {
  my $self = shift;

  $self->{_source_name_gadfly_gene} = shift if @_ ;
  return $self->{_source_name_gadfly_gene};
}
source_name_gadfly_transcriptdescriptionprevnextTop
sub source_name_gadfly_transcript {
  my $self = shift;

  $self->{_source_name_gadfly_transcript} = shift if @_ ;
  return $self->{_source_name_gadfly_transcript};
}
source_name_gadfly_translationdescriptionprevnextTop
sub source_name_gadfly_translation {
  my $self = shift;

  $self->{_source_name_gadfly_translation} = shift if @_ ;
  return $self->{_source_name_gadfly_translation};
}
source_name_gbdescriptionprevnextTop
sub source_name_gb {
  my $self = shift;
  $self->{_sn_gb} = shift if @_ ;
  return $self->{_sn_gb};
}
source_name_gbproteindescriptionprevnextTop
sub source_name_gbprotein {
  my $self = shift;
  $self->{_sn_gbprotein} = shift if @_ ;
  return $self->{_sn_gbprotein};
}
source_name_gcrdescriptionprevnextTop
sub source_name_gcr {
  my $self = shift;
  $self->{_sn_gcr} = shift if @_ ;
  return $self->{_sn_gcr};
}
source_name_genomeRNAidescriptionprevnextTop
sub source_name_genomeRNAi {
  my $self = shift;
  $self->{_sn_genomeRNAi} = shift if @_ ;
  return $self->{_sn_genomeRNAi};
}
source_name_gidescriptionprevnextTop
sub source_name_gi {
  my $self = shift;
  $self->{_sn_gi} = shift if @_ ;
  return $self->{_sn_gi};
}
source_name_godescriptionprevnextTop
sub source_name_go {
  my $self = shift;
  $self->{_sn_go} = shift if @_ ;
  return $self->{_sn_go};
}
source_name_hybrigenicsdescriptionprevnextTop
sub source_name_hybrigenics {
  my $self = shift;
  $self->{_sn_hybrigenics} = shift if @_ ;
  return $self->{_sn_hybrigenics};
}
source_name_ifdescriptionprevnextTop
sub source_name_if {
  my $self = shift;
  $self->{_sn_if} = shift if @_ ;
  return $self->{_sn_if};
}
source_name_interprodescriptionprevnextTop
sub source_name_interpro {
  my $self = shift;
  $self->{_sn_interpro} = shift if @_ ;
  return $self->{_sn_interpro};
}
source_name_meropsdescriptionprevnextTop
sub source_name_merops {
  my $self = shift;
  $self->{_sn_merops} = shift if @_ ;
  return $self->{_sn_merops};
}
source_name_miRBasedescriptionprevnextTop
sub source_name_miRBase {
  my $self = shift;
  $self->{_sn_miRBase} = shift if @_ ;
  return $self->{_sn_miRBase};
}
source_name_mitodromedescriptionprevnextTop
sub source_name_mitodrome {
  my $self = shift;
  $self->{_sn_mitodrome} = shift if @_ ;
  return $self->{_sn_mitodrome};
}
source_name_namedescriptionprevnextTop
sub source_name_name {
  my $self = shift;

  $self->{_source_name_name} = shift if @_ ;
  return $self->{_source_name_name};
}
source_name_name_prefixdescriptionprevnextTop
sub source_name_name_prefix {
  my $self = shift;

  $self->{_source_name_name_prefix} = shift if @_ ;
  return $self->{_source_name_name_prefix};
}
source_name_nrl3ddescriptionprevnextTop
sub source_name_nrl3d {
  my $self = shift;
  $self->{_sn_nrl3d} = shift if @_ ;
  return $self->{_sn_nrl3d};
}
source_name_pdbdescriptionprevnextTop
sub source_name_pdb {
  my $self = shift;
  $self->{_sn_pdb} = shift if @_ ;
  return $self->{_sn_pdb};
}
source_name_prefix_ensAGgenedescriptionprevnextTop
sub source_name_prefix_ensAGgene {
  my $self = shift;
  $self->{_sn_prefix_ensAG} = shift if @_ ;
  return $self->{_sn_prefix_ensAG};
}
source_name_prefix_ensAMgenedescriptionprevnextTop
sub source_name_prefix_ensAMgene {
  my $self = shift;
  $self->{_sn_prefix_ensAM} = shift if @_ ;
  return $self->{_sn_prefix_ensAM};
}
source_name_prefix_ensCEgenedescriptionprevnextTop
sub source_name_prefix_ensCEgene {
  my $self = shift;
  $self->{_sn_prefix_ensCE} = shift if @_ ;
  return $self->{_sn_prefix_ensCE};
}
source_name_prefix_ensCFgenedescriptionprevnextTop
sub source_name_prefix_ensCFgene {
  my $self = shift;
  $self->{_sn_prefix_ensCF} = shift if @_ ;
  return $self->{_sn_prefix_ensCF};
}
source_name_prefix_ensDMgenedescriptionprevnextTop
sub source_name_prefix_ensDMgene {
  my $self = shift;
  $self->{_sn_prefix_ensDM} = shift if @_ ;
  return $self->{_sn_prefix_ensDM};
}
source_name_prefix_ensDRgenedescriptionprevnextTop
sub source_name_prefix_ensDRgene {
  my $self = shift;
  $self->{_sn_prefix_ensDR} = shift if @_ ;
  return $self->{_sn_prefix_ensDR};
}
source_name_prefix_ensFRgenedescriptionprevnextTop
sub source_name_prefix_ensFRgene {
  my $self = shift;
  $self->{_sn_prefix_ensFR} = shift if @_ ;
  return $self->{_sn_prefix_ensFR};
}
source_name_prefix_ensGGgenedescriptionprevnextTop
sub source_name_prefix_ensGGgene {
  my $self = shift;
  $self->{_sn_prefix_ensGG} = shift if @_ ;
  return $self->{_sn_prefix_ensGG};
}
source_name_prefix_ensHSgenedescriptionprevnextTop
sub source_name_prefix_ensHSgene {
  my $self = shift;
  $self->{_sn_prefix_ensHS} = shift if @_ ;
  return $self->{_sn_prefix_ensHS};
}
source_name_prefix_ensMMgenedescriptionprevnextTop
sub source_name_prefix_ensMMgene {
  my $self = shift;
  $self->{_sn_prefix_ensMM} = shift if @_ ;
  return $self->{_sn_prefix_ensMM};
}
source_name_prefix_ensPTgenedescriptionprevnextTop
sub source_name_prefix_ensPTgene {
  my $self = shift;
  $self->{_sn_prefix_ensPT} = shift if @_ ;
  return $self->{_sn_prefix_ensPT};
}
source_name_prefix_ensRNgenedescriptionprevnextTop
sub source_name_prefix_ensRNgene {
  my $self = shift;
  $self->{_sn_ensRN} = shift if @_ ;
  return $self->{_sn_ensRN};
}
source_name_prefix_ensTNgenedescriptionprevnextTop
sub source_name_prefix_ensTNgene {
  my $self = shift;
  $self->{_sn_ensTN} = shift if @_ ;
  return $self->{_sn_ensTN};
}
source_name_prefix_modCBgenedescriptionprevnextTop
sub source_name_prefix_modCBgene {
  my $self = shift;
  $self->{_sn_modCB} = shift if @_ ;
  return $self->{_sn_modCB};
}
source_name_prefix_modCEgenedescriptionprevnextTop
sub source_name_prefix_modCEgene {
  my $self = shift;
  $self->{_sn_modCE} = shift if @_ ;
  return $self->{_sn_modCE};
}
source_name_prefix_modDDgenedescriptionprevnextTop
sub source_name_prefix_modDDgene {
  my $self = shift;
  $self->{_sn_modDD} = shift if @_ ;
  return $self->{_sn_modDD};
}
source_name_rfamdescriptionprevnextTop
sub source_name_rfam {
  my $self = shift;
  $self->{_sn_rfam} = shift if @_ ;
  return $self->{_sn_rfam};
}
source_name_synonymdescriptionprevnextTop
sub source_name_synonym {
  my $self = shift;

  $self->{_source_name_synonym} = shift if @_ ;
  return $self->{_source_name_synonym};
}
source_name_tfdescriptionprevnextTop
sub source_name_tf {
  my $self = shift;
  $self->{_sn_tf} = shift if @_ ;
  return $self->{_sn_tf};
}
source_name_uniprotspdescriptionprevnextTop
sub source_name_uniprotsp {
  my $self = shift;
  $self->{_sn_uniprotsp} = shift if @_ ;
  return $self->{_sn_uniprotsp};
}
source_name_uniprottrdescriptionprevnextTop
sub source_name_uniprottr {
  my $self = shift;
  $self->{_sn_uniprottr} = shift if @_ ;
  return $self->{_sn_uniprottr};
}
species_iddescriptionprevnextTop
sub species_id {
  my $self = shift;
  $self->{_species_id} = shift if @_ ;
  return $self->{_species_id};
}
synonymsdescriptionprevnextTop
sub synonyms {
  my $self = shift;
  $self->{_synonyms} = shift if @_ ;
  return $self->{_synonyms};
}
transcript_typesdescriptionprevnextTop
sub transcript_types {
  my $self = shift;

  $self->{_trans_types} = shift if @_ ;
  return $self->{_trans_types};
}
translation_typesdescriptionprevnextTop
sub translation_types {
  my $self = shift;

  $self->{_tl_types} = shift if @_ ;
  return $self->{_tl_types};
}

 1;

 #  Drosophila v5.3 : xrefs 
# Gff_file external_db_id db_name
# ==
# Affymetrix 3120 AFFY_DrosGenome1
# DGRC-1 830 DGRC-1
# DGRC-2 831 DGRC-2
# DRSC 840 DRSC
# EPD 10100 EPD
# FlyBase 800 flybase_gene_id
# FlyBase_Annotation_IDs 804 flybase_annotation_id
# FlyReg 850 FlyReg
# GB 700 EMBL
# GB_protein 1700 protein_id
# GCR 10200 GPCR
# GI 10900 GI
# GO 1000 GO
# GenomeRNAi 860 GenomeRNAi
# INTERPRO 1200 Interpro
# MEROPS 10300 MEROPS
# MIR 10400 miRBase
# MITODROME 870 MitoDrome
# NRL_3D 1600 PDB
# PDB 1600 PDB
# Rfam 4200 RFAM
# TF 10500 TransFac
# UniProt/Swiss-Prot 2200 Uniprot/SWISSPROT
# UniProt/TrEMBL 2000 Uniprot/SPTREMBL
# bdgpinsituexpr 880 BDGP_insitu_expr
# dedb 890 DEDb
# drosdel 881 DrosDel
# flygrid 882 FlyGrid
# hybrigenics 883 hybrigenics
# if 884 InteractiveFly
# ensAG 6600 Ens_Ag_gene # Anopheles gambiae
# ensAM 6630 Ens_Am_gene # apis mellifera?
# ensCE 6660 Ens_Ce_gene # C Elegans
# ensCF 5700 Ens_Cf_gene # Canis familiaris
# ensDM 6690 Ens_Dm_gene #
# ensDR 5800 Ens_Dr_gene # Danio rerio
# ensFR 6720 Ens_Fr_gene # Takifugu rubripes
# ensGG 6400 Ens_Gg_gene # Gallus gallus
# ensHS 2700 Ens_Hs_gene # Homo sapiens
# ensMM 5000 Ens_Mm_gene # mus musculus
# ensPT 6750 Ens_Pt_gene # Pan troglodytes
# ensRN 6200 Ens_Rn_gene # Rattus norvegicus
# ensTN 6810 Ens_Tn_gene # Tetraodon nigroviridis
# modCB 10600 modCB # InParanoid Model organism database, Caenorhabditis briggsae
# modCE 10700 modCE # Caenorhabditis elegans
# modDD 10800 modDD # Dictyostelium discoideum
}
xrefsdescriptionprevnextTop
sub xrefs {
  my $self = shift;

  $self->{_xrefs} = shift if @_ ;
  return $self->{_xrefs};
}
General documentation
No general documentation available.