Raw content of Bio::EnsEMBL::Analysis::Config::ImportArrays # # package Bio::EnsEMBL::Analysis::Config::ImportArrays # # Cared for by EnsEMBL (ensembl-dev@ebi.ac.uk) # # Copyright GRL & EBI # # You may distribute this module under the same terms as perl itself # POD documentation - main docs before the code =head1 NAME Bio::EnsEMBL::Analysis::Config::ImportArrays =head1 SYNOPSIS use Bio::EnsEMBL::Analysis:Config::ImportAarrys; =head1 DESCRIPTION This contains the configuration for importing arrays from flat files. It is entirely dependant on the arrays.env environment which can be used to set up and run the pipeline in an easy and interactive way. This contains all possible configurations which will then be set dynamically by the RunnableDB for each instance using the input_id as a key into a separate ImportArrays.conf file, listed here as ARRAY_FORMAT_FILE. The layout of the configuration is a set of hashes, each one keyed by logic name. There is also a DEFAULT hash, which is used as the default for all logic names (this was the configuration pattern stolen from Exonerate2Genes, although in this case it's very unlikely you will need to have different configs by logic name). =head1 CONTACT =cut package Bio::EnsEMBL::Analysis::Config::ImportArrays; use strict; use vars qw( %Config ); # Hash containing config info # -- one hashnode per logic name, with a 'DEFAULT' logic name provided # %Config = ( #This entire hash is exported as the global $ARRAY_CONFIG var #each key will be exported as $ARRAY_CONFIG->{'_CONFIG_'.$key} #Dependant on logic name of RunnableDB ARRAY_CONFIG => { DEFAULT => { #These are now defined dynamically or via the ImportArrays.conf file # All input probes must be kept in one huge (possibly redundant) fasta file #QUERYSEQS => $ENV{'RAW_FASTA'}, # The output of this module writes a set of affy probes into the OUTDB.affy_probe table, # and also writes the nonredundant probes into this fasta file, # with the fasta headers keyed with the affy probes' internal id. #NON_REDUNDANT_PROBE_SEQS => $ENV{'NR_FASTA'}, # DB containing all affy_arrays, affy_probes and (next step) affy_features OUTDB => { -dbname => $ENV{'DB_NAME'}, -host => $ENV{'DB_HOST'}, -port => $ENV{'DB_PORT'}, -user => $ENV{'DB_USER'}, -pass => $ENV{'DB_PASS'}, -species => $ENV{'SPECIES'},#Only here until we fix the DBAadptor new method -multispecies_db => $ENV{'MULTISPECIES_DB'}, -species_id => $ENV{'SPECIES_ID'} }, #Optional, must define if dnadb is not on ensembldb #Not used, but will fail if dnadb autoguessing fails DNADB => { -dbname => $ENV{'DNADB_NAME'}, -host => $ENV{'DNADB_HOST'}, -port => $ENV{'DNADB_PORT'}, -user => $ENV{'DNADB_USER'}, -pass => $ENV{'DNADB_PASS'}, -species => $ENV{'SPECIES'}, -multispecies_db => $ENV{'DNADB_MULTISPECIES_DB'}, -species_id => $ENV{'DNADB_SPECIES_ID'} }, #Used for building the format specific NR fasta file OUTPUT_DIR => $ENV{'WORK_DIR'}, #This defines how to parse the file headers IIDREGEXP => '^>probe:(\S+):(\S+):(\S+:\S+;).*$',#AFFY #We also need a has to define the input field order #This will be used to set the relevant hash values IFIELDORDER => { #do we need to add fields for class to enable skipping on control probes #here and in regexp #We duplicate the field 0 between array.name and array_chip.design_id #-name => 2, #-array => 0, #-array_chip => 0, #-probe_set => 1, }, #ISKIPLIST/REGEX #ISKIPFIELD ARRAY_PARAMS => { #'MG-U74Cv2' => { # -name => 'MG-U74Cv2', # -vendor => 'AFFY', # #-setsize => undef, # -format => 'EXPRESSION', # -type => 'OLIGO', # -class => 'AFFY_ST', # #-description => '', # }, # 'MoGene-1_0-st-v1' => { # -name => 'MoGene-1_0-st-v1', # -vendor => 'AFFY', # #-setsize => undef, # -format => 'EXPRESSION', # -type => 'OLIGO', # #-description => '', # -class => 'AFFY_ST', # }, }, }, #%{$Config::ArrayMapping::import_arrays}, IMPORT_AFFY_UTR_ARRAYS => { IIDREGEXP => '^>probe:(\S+):(\S+):(\S+:\S+;).*$', IFIELDORDER => { -name => 2, -array_chip => 0, -array => 0, -probe_set => 1 }, #Can we remove name from these hashes? ARRAY_PARAMS => { #Remove all the redundant values and set them in ImportArrays? #Frog 'X_tropicalis' => { -name => 'X_tropicalis', -vendor => 'AFFY', -format => 'EXPRESSION', -type => 'OLIGO', #-description => '', -class => 'AFFY_UTR', }, #Dog 'Canine_2' => { -name => 'Canine_2', -vendor => 'AFFY', -format => 'EXPRESSION', -type => 'OLIGO', #-description => '', -class => 'AFFY_UTR', }, #Macaque 'Rhesus' => { -name => 'Rhesus', -vendor => 'AFFY', -format => 'EXPRESSION', -type => 'OLIGO', #-description => '', -class => 'AFFY_UTR', }, #C intestinalis 'CINT06a520380F' => { -name => 'CINT06a520380F', -vendor => 'AFFY', -format => 'EXPRESSION', -type => 'OLIGO', #-description => '', -class => 'AFFY_UTR', }, #Cow 'Bovine' => { -name => 'Bovine', -vendor => 'AFFY', -format => 'EXPRESSION', -type => 'OLIGO', #-description => '', -class => 'AFFY_UTR', }, #Chicken 'Chicken' => { -name => 'Chicken', -vendor => 'AFFY', -format => 'EXPRESSION', -type => 'OLIGO', #-description => '', -class => 'AFFY_UTR', }, #C_elegans 'C_elegans' => { -name => 'C_elegans', -vendor => 'AFFY', -format => 'EXPRESSION', -type => 'OLIGO', #-description => '', -class => 'AFFY_UTR', }, #Zebra fish 'Zebrafish' => { -name => 'Zebrafish', -vendor => 'AFFY', -format => 'EXPRESSION', -type => 'OLIGO', #-description => '', -class => 'AFFY_UTR', }, #Rat 'RAE230A' => { -name => 'RAE230A', -vendor => 'AFFY', -format => 'EXPRESSION', -type => 'OLIGO', #-description => '', -class => 'AFFY_UTR', }, 'RAE230B' => { -name => 'RAE230B', -vendor => 'AFFY', -format => 'EXPRESSION', -type => 'OLIGO', #-description => '', -class => 'AFFY_UTR', }, 'Rat230_2' => { -name => 'Rat230_2', -vendor => 'AFFY', -format => 'EXPRESSION', -type => 'OLIGO', #-description => '', -class => 'AFFY_UTR', }, 'RG-U34A' => { -name => 'RG-U34A', -vendor => 'AFFY', -format => 'EXPRESSION', -type => 'OLIGO', #-description => '', -class => 'AFFY_UTR', }, 'RG-U34B' => { -name => 'RG-U34B', -vendor => 'AFFY', -format => 'EXPRESSION', -type => 'OLIGO', #-description => '', -class => 'AFFY_UTR', }, 'RG-U34C' => { -name => 'RG-U34C', -vendor => 'AFFY', -format => 'EXPRESSION', -type => 'OLIGO', #-description => '', -class => 'AFFY_UTR', }, 'RN-U34' => { -name => 'RN-U34', -vendor => 'AFFY', -format => 'EXPRESSION', -type => 'OLIGO', #-description => '', -class => 'AFFY_UTR', }, 'RT-U34' => { -name => 'RT-U34', -vendor => 'AFFY', -format => 'EXPRESSION', -type => 'OLIGO', #-description => '', -class => 'AFFY_UTR', }, #Human 'HC-G110' => { -name => 'HC-G110', -vendor => 'AFFY', -format => 'EXPRESSION', -type => 'OLIGO', #-description => '', -class => 'AFFY_UTR', }, 'U133_X3P' => { -name => 'U133_X3P', -vendor => 'AFFY', -format => 'EXPRESSION', -type => 'OLIGO', #-description => '', -class => 'AFFY_UTR', }, 'HuGeneFL' => {-name => 'HuGeneFL', -vendor => 'AFFY', -format => 'EXPRESSION', -type => 'OLIGO', #-description => '', -class => 'AFFY_UTR'}, 'HG_U95A' => {-name => 'HG_U95A', -vendor => 'AFFY', -format => 'EXPRESSION', -type => 'OLIGO', #-description => '', -class => 'AFFY_UTR'}, 'HG-U95E' => {-name => 'HG-U95E', -vendor => 'AFFY', -format => 'EXPRESSION', -type => 'OLIGO', #-description => '', -class => 'AFFY_UTR'}, 'HG-U95D' => {-name => 'HG-U95D', -vendor => 'AFFY', -format => 'EXPRESSION', -type => 'OLIGO', #-description => '', -class => 'AFFY_UTR'}, 'HG-U95C' => {-name => 'HG-U95C', -vendor => 'AFFY', -format => 'EXPRESSION', -type => 'OLIGO', #-description => '', -class => 'AFFY_UTR'}, 'HG-U95B' => {-name => 'HG-U95B', -vendor => 'AFFY', -format => 'EXPRESSION', -type => 'OLIGO', #-description => '', -class => 'AFFY_UTR'}, 'HG_U95Av2' => {-name => 'HG_U95Av2', -vendor => 'AFFY', -format => 'EXPRESSION', -type => 'OLIGO', #-description => '', -class => 'AFFY_UTR'}, 'HG-U133_Plus_2' => {-name => 'HG-U133_Plus_2', -vendor => 'AFFY', -format => 'EXPRESSION', -type => 'OLIGO', #-description => '', -class => 'AFFY_UTR'}, 'HG-U133B' => {-name => 'HG-U133B', -vendor => 'AFFY', -format => 'EXPRESSION', -type => 'OLIGO', #-description => '', -class => 'AFFY_UTR'}, 'HG-U133A' => {-name => 'HG-U133A', -vendor => 'AFFY', -format => 'EXPRESSION', -type => 'OLIGO', #-description => '', -class => 'AFFY_UTR'}, 'HG-U133A_2' => {-name => 'HG-U133A_2', -vendor => 'AFFY', -format => 'EXPRESSION', -type => 'OLIGO', #-description => '', -class => 'AFFY_UTR'}, 'HG-Focus' => {-name => 'HG-Focus', -vendor => 'AFFY', -format => 'EXPRESSION', -type => 'OLIGO', #-description => '', -class => 'AFFY_UTR'}, #Mouse 'MG-U74Cv2' => { -name => 'MG-U74Cv2', -vendor => 'AFFY', -format => 'EXPRESSION', -type => 'OLIGO', #-description => '', -class => 'AFFY_UTR', }, 'MG-U74A' => { -name => 'MG-U74A', -vendor => 'AFFY', -format => 'EXPRESSION', -type => 'OLIGO', #-description => '', -class => 'AFFY_UTR', }, 'MG-U74Av2' => { -name => 'MG-U74Av2', -vendor => 'AFFY', -format => 'EXPRESSION', -type => 'OLIGO', #-description => '', -class => 'AFFY_UTR', }, 'MG-U74B' => { -name => 'MG-U74B', -vendor => 'AFFY', -format => 'EXPRESSION', -type => 'OLIGO', #-description => '', -class => 'AFFY_UTR', }, 'MG-U74Bv2' => { -name => 'MG-U74Bv2', -vendor => 'AFFY', -format => 'EXPRESSION', -type => 'OLIGO', #-description => '', -class => 'AFFY_UTR', }, 'MG-U74C' => { -name => 'MG-U74C', -vendor => 'AFFY', -format => 'EXPRESSION', -type => 'OLIGO', #-description => '', -class => 'AFFY_UTR', }, 'MOE430A' => { -name => 'MOE430A', -vendor => 'AFFY', -format => 'EXPRESSION', -type => 'OLIGO', #-description => '', -class => 'AFFY_UTR', }, 'MOE430B' => { -name => 'MOE430B', -vendor => 'AFFY', -format => 'EXPRESSION', -type => 'OLIGO', #-description => '', -class => 'AFFY_UTR', }, 'Mouse430A_2' => { -name => 'Mouse430A_2', -vendor => 'AFFY', -format => 'EXPRESSION', -type => 'OLIGO', #-description => '', -class => 'AFFY_UTR', }, 'Mouse430_2' => { -name => 'Mouse430_2', -vendor => 'AFFY', -format => 'EXPRESSION', -type => 'OLIGO', #-description => '', -class => 'AFFY_UTR', }, 'Mu11KsubA' => { -name => 'Mu11KsubA', -vendor => 'AFFY', -format => 'EXPRESSION', -type => 'OLIGO', #-description => '', -class => 'AFFY_UTR', }, 'Mu11KsubB' => { -name => 'Mu11KsubB', -vendor => 'AFFY', -format => 'EXPRESSION', -type => 'OLIGO', #-description => '', -class => 'AFFY_UTR', }, #Drosophila 'DrosGenome1' => {-name => 'DrosGenome1', -vendor => 'AFFY', -format => 'EXPRESSION', -type => 'OLIGO', #-description => '', -class => 'AFFY_UTR'}, 'Drosophila_2' => {-name => 'Drosophila_2', -vendor => 'AFFY', -format => 'EXPRESSION', -type => 'OLIGO', #-description => '', -class => 'AFFY_UTR'}, #Yeast 'Yeast_2' => { -name => 'Yeast_2', -vendor => 'AFFY', #-setsize => undef, -format => 'EXPRESSION',#? UTR? -type => 'OLIGO', #-description => '', -class => 'AFFY_UTR', }, 'YG-S98' => { -name => 'YG-S98', -vendor => 'AFFY', #-setsize => undef, -format => 'EXPRESSION',#? UTR? -type => 'OLIGO', #-description => '', -class => 'AFFY_UTR', }, #EColi 'E_coli_2' => { -name => 'E_coli_2', -vendor=>'AFFY', -format => 'EXPRESSION', -type=>'OLIGO', -class=>'AFFY_UTR' }, #Then add user defined/custom ones here? #values %{$ArrayConfig->{ARRAY_PARAMS}} #Could write this automatically from env or script? }, INPUT_FORMAT => 'FASTA', }, IMPORT_AFFY_ST_ARRAYS => { IIDREGEXP => '^>probe:(\S+):(\S+);\S+:\S+;.*[TranscriptCluster|ProbeSet]ID=([0-9]+);.*$', IFIELDORDER => { -name => 1, -array_chip => 0, -array => 0, -probe_set => 2, }, ARRAY_PARAMS => { #Platypus #'RaEx-1_0-st-v1' => { # -name => 'RaEx-1_0-st-v1', # -vendor => 'AFFY', # #-setsize => undef, # -format => 'EXPRESSION', # -type => 'OLIGO', # #-description => '', # -class => 'AFFY_ST', # }, #Rat 'RaEx-1_0-st-v1' => { -name => 'RaEx-1_0-st-v1', -vendor => 'AFFY', #-setsize => undef, -format => 'EXPRESSION', -type => 'OLIGO', #-description => '', -class => 'AFFY_ST', }, 'RaGene-1_0-st-v1' => { -name => 'RaGene-1_0-st-v1', -vendor => 'AFFY', #-setsize => undef, -format => 'EXPRESSION', -type => 'OLIGO', #-description => '', -class => 'AFFY_ST', }, #Human 'HuGene-1_0-st-v1' => { -name => 'HuGene-1_0-st-v1', -vendor => 'AFFY', #-setsize => undef, -format => 'EXPRESSION', -type => 'OLIGO', #-description => '', -class => 'AFFY_ST', }, 'HuEx-1_0-st-v2' => { -name => 'HuEx-1_0-st-v2', -vendor => 'AFFY', -format => 'EXPRESSION', -type => 'OLIGO', #-description => '', -class => 'AFFY_ST', }, #Mouse 'MoGene-1_0-st-v1' => { -name => 'MoGene-1_0-st-v1', -vendor => 'AFFY', #-setsize => undef, -format => 'EXPRESSION', -type => 'OLIGO', #-description => '', -class => 'AFFY_ST', }, 'MoEx-1_0-st-v1' => { -name => 'MoEx-1_0-st-v1', -vendor => 'AFFY', -format => 'EXPRESSION', -type => 'OLIGO', #-description => '', -class => 'AFFY_ST', }, }, INPUT_FORMAT => 'FASTA', }, IMPORT_ILLUMINA_WG_ARRAYS => { IIDREGEXP => '^>(\S+):(\S+).*$', IFIELDORDER => { -name => 1, -array_chip => 0, -array => 0, #-probe_set => 2,#This could be annotation }, ARRAY_PARAMS => { 'MouseWG_6_V1' => { -name => 'MouseWG_6_V1', -vendor => 'ILLUMINA', #-setsize => undef, -format => 'EXPRESSION', -type => 'OLIGO', #-description => '', -class => 'ILLUMINA_WG', }, 'MouseWG_6_V2' => { -name => 'MouseWG_6_V2', -vendor => 'ILLUMINA', #-setsize => undef, -format => 'EXPRESSION', -type => 'OLIGO', #-description => '', -class => 'ILLUMINA_WG', }, #No longer accesible via website? 'HumanWG_6_V1' => { -name => 'HumanWG_6_V1', -vendor => 'ILLUMINA', #-setsize => undef, -format => 'EXPRESSION', -type => 'OLIGO', #-description => '', -class => 'ILLUMINA_WG', }, 'HumanWG_6_V2' => { -name => 'HumanWG_6_V2', -vendor => 'ILLUMINA', #-setsize => undef, -format => 'EXPRESSION', -type => 'OLIGO', #-description => '', -class => 'ILLUMINA_WG', }, 'HumanWG_6_V3' => { -name => 'HumanWG_6_V3', -vendor => 'ILLUMINA', #-setsize => undef, -format => 'EXPRESSION', -type => 'OLIGO', #-description => '', -class => 'ILLUMINA_WG', }, }, INPUT_FORMAT => 'FASTA', }, #CODELINK IMPORT_CODELINK_ARRAYS => { IIDREGEXP => '^>(\S+):(\S+).*$', IFIELDORDER => { -name => 1, -array_chip => 0, -array => 0, #-probe_set => 2,#This could be annotation }, ARRAY_PARAMS => { 'CODELINK' => { -name => 'CODELINK', -vendor => 'CODELINK', #-setsize => undef, -format => 'EXPRESSION', -type => 'OLIGO', #-description => '', -class => 'CODELINK', }, }, INPUT_FORMAT => 'FASTA', }, #AGILENT IMPORT_AGILENT_ARRAYS => { IIDREGEXP => '^>(\S+):(\S+).*$', IFIELDORDER => { -name => 1, -array_chip => 0, -array => 0, #-probe_set => 2,#This could be annotation }, ARRAY_PARAMS => { #Danio 'G2518A' => { -name => 'G2518A', -vendor => 'AGILENT', #-setsize => undef, -format => 'EXPRESSION', -type => 'OLIGO', #-description => '', -class => 'AGILENT', }, 'G2519F' => { -name => 'G2519F', -vendor => 'AGILENT', #-setsize => undef, -format => 'EXPRESSION', -type => 'OLIGO', #-description => '', -class => 'AGILENT', }, }, INPUT_FORMAT => 'FASTA', }, #PHALANX #Human #ftp://ftp.phalanxbiotech.com/pub/probe_sequences/hoa #Mouse #ftp://ftp.phalanxbiotech.com/pub/probe_sequences/moa IMPORT_PHALANX_ARRAYS => { IIDREGEXP => '^>(\S+):(\S+).*$', IFIELDORDER => { -name => 1, -array_chip => 0, -array => 0, #-probe_set => 2,#This could be annotation }, ARRAY_PARAMS => { 'OneArray' => { -name => 'OneArray', -vendor => 'PHALANX', #-setsize => undef, -format => 'EXPRESSION', -type => 'OLIGO', #-description => '', -class => 'PHALANX', }, }, INPUT_FORMAT => 'FASTA', }, #LEIDEN IMPORT_LEIDEN_ARRAYS => { IIDREGEXP => '^>(\S+):(\S+).*$', IFIELDORDER => { -name => 1, -array_chip => 0, -array => 0, #-probe_set => 2,#This could be annotation }, ARRAY_PARAMS => { #Danio 'LEIDEN2' => { -name => 'LEIDEN2', -vendor => 'LEIDEN', #-setsize => undef, -format => 'EXPRESSION', -type => 'OLIGO', #-description => '', -class => 'LEIDEN', }, 'LEIDEN3' => { -name => 'LEIDEN3', -vendor => 'LEIDEN', #-setsize => undef, -format => 'EXPRESSION', -type => 'OLIGO', #-description => '', -class => 'LEIDEN', }, }, INPUT_FORMAT => 'FASTA', }, #? } ); sub import { my ($callpack) = caller(0); # Name of the calling package my $pack = shift; # Need to move package off @_ # Get list of variables supplied, or else everything my @vars = @_ ? @_ : keys( %Config ); return unless @vars; # Predeclare global variables in calling package eval "package $callpack; use vars qw(" . join(' ', map { '$'.$_ } @vars) . ")"; die $@ if $@; foreach (@vars) { if ( defined $Config{$_} ) { no strict 'refs'; # Exporter does a similar job to the following # statement, but for function names, not # scalar variables: *{"${callpack}::$_"} = \$Config{ $_ }; } else { die "Error: Config: $_ not known\n"; } } } 1;