Raw content of cDNAUpdate package cDNAUpdate; =pod =head1 NAME cDNAupdate.pm =head1 DESCRIPTION This file contains the necessary configuration for running cDNA_update for both human and mouse cDNAs. You should read this POD along with the POD for cDNA_update for an overview of the cDNA_update process. As in all configurations there is a default section containg all variables that are required, followed by the individual settings for human and mouse. Which species to use is set when cDNA_update.pl is run. Note that the import subroutine is slightly different to the method usually used for importing the configurations as the cDNA_update procedure itself doesn't follow a regular genebuild pipeline. Some variables are pre-defined in the default section, these should not require minimal modicifations once they have been set. The user section will change every time the procedure is run. =cut use strict; use vars qw(%Config); %Config = ( DEFAULT => { # Admin rights are required WB_DBUSER => 'ensadmin', WB_DBPASS => 'ensembl', WB_REF_DBPORT => 3306, # Path to gss file GSS_PATH => '/ensembl-personal/genebuilders/cDNA_update/gss_acc.txt', # Various scripts required by the process. FASTA_SPLIT => '/nfs/acari/searle/progs/production_code/ensembl-trunk_1106/ensc-core/src/Programs/fastasplit', POLYA_CLIPPING_PATH => '/ensembl-pipeline/scripts/EST/new_polyA_clipping.pl', FIND_N_PATH => '/ensembl-pipeline/scripts/cDNA_update/find_N.pl', STORE_UNMAPPED_PATH => '/ensembl-pipeline/scripts/cDNA_update/store_unmapped_cdnas.pl', UNMAPPED_REASONS_PATH => '/ensembl/misc-scripts/unmapped_reason/unmapped_reason.txt', LOAD_TAX_PATH => '/ensembl-pipeline/scripts/load_taxonomy.pl', # Exonerate specifications PROGRAM_NAME => "exonerate", PROGRAM_VERSION => "0.9.0", PROGRAM_FILE => "/usr/local/ensembl/bin/exonerate-0.9.0", MODULE_NAME => "Exonerate2Genes", # Source data files details SOURCE_HOST => 'cbi4', SOURCE_DIR => '/data/blastdb/', # Taxonomy db for loading meta_table - should not need to change TAXONDBNAME => 'ncbi_taxonomy', TAXONDBHOST => 'ens-livemirror', TAXONDBPORT => 3306, # For the comparison only OLD_FEATURE_NAME => 'cDNA_update', # These variables need to be set, please do so in the # corresponding hash tables for the species you want to # analyse. # User details USER => '', HOST => '', GENEBUILD_ID => undef, # Reference db (current build) WB_REF_DBNAME => '', WB_REF_DBHOST => '', # New source db (PIPELINE) WB_PIPE_DBNAME => '', WB_PIPE_DBHOST => '', WB_PIPE_DBPORT => 3306, # New target db (ESTGENE) WB_TARGET_DBNAME => '', WB_TARGET_DBHOST => '', WB_TARGET_DBPORT => 3306, # Older cDNA db (needed for comparison only) - # check schema is up to date!!!!!! WB_LAST_DBNAME => '', WB_LAST_DBHOST => '', WB_LAST_DBPORT => undef, # Reference db (last build, needed for comparison only) WB_LAST_DNADBNAME => '', WB_LAST_DNADBHOST => '', WB_LAST_DNADBPORT => undef, # Path to your cvs directory CVS_DIR => '', # Where the output files should go DATA_DIR => '', # Path to the genomic sequence GENOMICSEQS => '', # Chunk size recommendations: 5500 for human # otherwise get AWOL jobs in first run CHUNK => undef, # Sequence files VERTRNA => '', VERTRNA_UPDATE => '', REFSEQ => '', # Species information COMMON_SPECIES_NAME => '', SPECIES => '', TAX_ID => undef, }, human => { USER => 'amonida', HOST => 'bc-9-1-01', GENEBUILD_ID => 25, WB_REF_DBNAME => 'amonida_human_core_55', WB_REF_DBHOST => 'genebuild7', WB_PIPE_DBNAME => 'amonida_homo_cdna0509_ref', WB_PIPE_DBHOST => 'genebuild1', WB_PIPE_DBPORT => 3306, WB_TARGET_DBNAME => 'amonida_homo_cdna0509_update', WB_TARGET_DBHOST => 'genebuild1', WB_TARGET_DBPORT => 3306, WB_LAST_DBNAME => 'homo_sapiens_cdna_54_36p', WB_LAST_DBHOST => 'ens-livemirror', WB_LAST_DBPORT => 3306, WB_LAST_DNADBNAME => 'homo_sapiens_core_54_36p', WB_LAST_DNADBHOST => 'ens-livemirror', WB_LAST_DNADBPORT => 3306, CVS_DIR => "$ENV{CVSDIR}", DATA_DIR => "$ENV{WORK}", # You shouldn't need to change the settings below but do check # # that they are correct. # ################################################################ # Path to the genomic sequence GENOMICSEQS => '/data/blastdb/Ensembl/Human/GRCh37/genome/softmasked/softmasked_dusted.fa', # Chunk size recommendations: 5500 for human # otherwise get AWOL jobs in first run CHUNK => 5500, # Sequence files VERTRNA => 'embl_vertrna-1', VERTRNA_UPDATE => 'emnew_vertrna-1', # Using human sequence REFSEQ => 'hs.fna', # Species information COMMON_SPECIES_NAME => 'human', SPECIES => 'Homo sapiens', TAX_ID => 9606, }, mouse => { USER => 'amonida', HOST => 'bc-9-1-03', GENEBUILD_ID => 25, WB_REF_DBNAME => 'amonida_mouse_core_53', WB_REF_DBHOST => 'genebuild4', WB_PIPE_DBNAME => 'amonida_mus_test_ref', WB_PIPE_DBHOST => 'genebuild4', WB_PIPE_DBPORT => 3306, WB_TARGET_DBNAME => 'amonida_mus_test_update', WB_TARGET_DBHOST => 'genebuild4', WB_TARGET_DBPORT => 3306, WB_LAST_DBNAME => 'mus_musculus_cdna_53_37f', WB_LAST_DBHOST => 'ensdb-archive', WB_LAST_DBPORT => 5304, WB_LAST_DNADBNAME => 'mus_musculus_core_53_37f', WB_LAST_DNADBHOST => 'ens-livemirror', WB_LAST_DNADBPORT => 3306, CVS_DIR => '/nfs/acari/amonida/projects/cdna_update/mouse/', DATA_DIR => "$ENV{SCRATCH}", # You shouldn't need to change the settings below but do check # # that they are correct. # ################################################################ # Path to the genomic sequence GENOMICSEQS => '/data/blastdb/Ensembl/Mouse/NCBIM37/genome/softmasked_dusted/toplevel_sequence.fa', # Chunk size recommendations: 1500 for mouse # otherwise get AWOL jobs in first run CHUNK => 1500, # Sequence files VERTRNA => 'embl_vertrna-1', VERTRNA_UPDATE => 'emnew_vertrna-1', REFSEQ => 'mouse.fna', # Species information COMMON_SPECIES_NAME => 'mouse', SPECIES => 'Mus musculus', TAX_ID => 10090, } ); sub import { my ($callpack) = caller(0); # Name of the calling package my $pack = shift; # Need to move package off @_ # Get list of variables supplied, or else # all of General: my @vars = @_ ? @_ : keys( %Config ); return unless @vars; # Predeclare global variables in calling package eval "package $callpack; use vars qw(" . join(' ', map { '$'.$_ } @vars) . ")"; die $@ if $@; foreach my $var (@vars) { if ( defined $Config{ $var } ) { no strict 'refs'; foreach my $key (keys %{$Config{$var}}) { # Exporter does a similar job to the following # statement, but for function names, not # scalar variables: *{"${callpack}::$key"} = \$Config{$var}{$key}; } } else { die "Error: Config: $var not known\n"; } } } 1;