Raw content of Bio::EnsEMBL::Pipeline::Config::BatchQueue # EnsEMBL module for Bio::EnsEMBL::Pipeline::Config::BatchQueue; # # You may distribute this module under the same terms as perl itself =head1 NAME Bio::EnsEMBL::Pipeline::Config::BatchQueue =head1 SYNOPSIS use Bio::EnsEMBL::Pipeline::Config::BatchQueue; use Bio::EnsEMBL::Pipeline::Config::BatchQueue qw(); =head1 DESCRIPTION Configuration for pipeline batch queues. Specifies per-analysis resources and configuration, e.g. so that certain jobs are run only on certain nodes. It imports and sets a number of standard global variables into the calling package. Without arguments all the standard variables are set, and with a list, only those variables whose names are provided are set. The module will die if a variable which doesn\'t appear in its C<%Config> hash is asked to be set. The variables can also be references to arrays or hashes. Edit C<%Config> to add or alter variables. All the variables are in capitals, so that they resemble environment variables. To run a job only on a certain host, you have to add specific resource-requirements. This can be useful if you have special memory-requirements, if you like to run the job only on linux 64bit machines or if you want to run the job only on a specific host group. The commands bmgroups and lsinfo show you certain host-types / host-groups. Here are some example resource-statements / sub_args statements: sub_args => '-m bc_hosts', # only use hosts of host-group 'bc_hosts' (see bmgroup) sub_args => '-m bc1_1', # only use hosts of host-group 'bc1_1' resource => '-R "select[type=LINUX64]" ', # use Linux 64 bit machines only resource => '"select[type=LINUX64]" ', # same as above resource => '"select[model=IBMBC2800]" ', # only run on IBMBC2800 hosts resource => 'alpha', # only run on DEC alpha resource => 'linux', # only run on linux machines Database throtteling : This runs a job on a linux host, throttles ecs4:3350 to not have more than 300 cative connections, 10 connections per job in the duration of the first 10 minutes when the job is running (means 30 hosts * 10 connections = 300 connections): resource =>'select[linux && ecs4my3350 <=300] rusage[ecs4my3350=10:duration=10]', Running on 'linux' hosts with not more than 200 active connections for myia64f and myia64g, 10 connections per job to each db-instance for the first 10 minutes : resource =>'select[linux && myia64f <=200 && myia64g <=200] rusage[myia64f=10:myia64g=10:duration=10]', Running on hosts of model 'IBMBC2800' hosts with not more than 200 active connections to myia64f; 10 connections per job for the first 10 minutes: resource =>'select[model==IBMBC2800 && myia64f<=200] rusage[myia64f=10:duration=10]', Running on hosts of host_group bc_hosts with not more than 200 active connections to myia64f; 10 connections per job for the first 10 minutes: resource =>'select[myia64f<=200] rusage[myia64f=10:duration=10]', sub_args =>'-m bc_hosts' =head1 CONTACT B<ensembl-dev@ebi.ac.uk> =cut package Bio::EnsEMBL::Pipeline::Config::BatchQueue; use strict; use LowCoverageGeneBuildConf; use vars qw(%Config); %Config = ( QUEUE_MANAGER => 'LSF', DEFAULT_BATCH_SIZE => 10, DEFAULT_RETRIES => 3, DEFAULT_BATCH_QUEUE => '', # put in the queue of your choice, eg. 'acari' DEFAULT_OUTPUT_DIR => $LC_scratchDIR."/raw_computes/", DEFAULT_CLEANUP => 'no', DEFAULT_VERBOSITY => 'WARNING', DEFAULT_RESOURCE => 'linux', AUTO_JOB_UPDATE => 1, JOB_LIMIT => 100000, # at this number of jobs RuleManager will sleep for # a certain period of time if you effectively want this never to run set # the value to very high ie 100000 for a certain period of time JOB_STATUSES_TO_COUNT => ['PEND'], # these are the jobs which will be # counted # valid statuses for this array are RUN, PEND, SSUSP, EXIT, DONE MARK_AWOL_JOBS => 0, MAX_JOB_SLEEP => 3600, # the maximun time to sleep for when job limit # reached MIN_JOB_SLEEP => 120, # the minimum time to sleep for when job limit reached SLEEP_PER_JOB => 30, # the amount of time to sleep per job when job limit # reached DEFAULT_RUNNABLEDB_PATH => 'Bio/EnsEMBL/Analysis/RunnableDB', DEFAULT_RUNNER => '', DEFAULT_RETRY_QUEUE => 'long', DEFAULT_RETRY_SUB_ARGS => '', DEFAULT_RETRY_RESOURCE => 'linux', DEFAULT_SUB_ARGS => '', QUEUE_CONFIG => [ { logic_name => 'RepeatMask', batch_size => 100, resource => 'select[linux && my$LC_DBHOST<800] rusage[my$LC_DBHOST=10:duration=10:decay=1]', retries => 4, sub_args => '', runner => '', queue => 'normal', runnabledb_path => 'Bio/EnsEMBL/Analysis/RunnableDB', }, { logic_name => 'Supp_RepeatMask', batch_size => 100, # For running the tail resource => 'select[linux && my$LC_DBHOST<800] rusage[my$LC_DBHOST=10:duration=10:decay=1]', retries => 4, sub_args => '', runner => '', queue => 'normal', runnabledb_path => 'Bio/EnsEMBL/Analysis/RunnableDB', }, { logic_name => 'Ab_initio_RepeatMask', batch_size => 100, # For running the tail resource => 'select[linux && my$LC_DBHOST<800] rusage[my$LC_DBHOST=10:duration=10:decay=1]', retries => 4, sub_args => '', runner => '', queue => 'normal', runnabledb_path => 'Bio/EnsEMBL/Analysis/RunnableDB', }, { logic_name => 'Genscan', batch_size => 200, resource => 'select[linux && my$LC_DBHOST<800] rusage[my$LC_DBHOST=10:duration=10:decay=1]', retries => 4, sub_args => '', runner => '', queue => 'normal', runnabledb_path => 'Bio/EnsEMBL/Analysis/RunnableDB', }, { logic_name => 'Uniprot', batch_size => 200, # For finishing off resource => 'select[linux && my$LC_DBHOST<800] rusage[my$LC_DBHOST=10:duration=10:decay=1]', retries => 4, sub_args => '', runner => '', queue => 'long', runnabledb_path => 'Bio/EnsEMBL/Analysis/RunnableDB', }, { logic_name => 'Vertrna', batch_size => 200, # For finishing off resource => 'select[linux && my$LC_DBHOST<800] rusage[my$LC_DBHOST=10:duration=10:decay=1]', retries => 4, sub_args => '', runner => '', queue => 'long', runnabledb_path => 'Bio/EnsEMBL/Analysis/RunnableDB', }, { logic_name => 'Unigene', batch_size => 200, # For finishing off resource => 'select[linux && my$LC_DBHOST<800] rusage[my$LC_DBHOST=10:duration=10:decay=1]', retries => 4, sub_args => '', runner => '', queue => 'long', runnabledb_path => 'Bio/EnsEMBL/Analysis/RunnableDB', }, { logic_name => 'CpG', batch_size => 500, resource => 'select[linux && my$LC_DBHOST<800] rusage[my$LC_DBHOST=10:duration=10:decay=1]', retries => 4, sub_args => '', runner => '', queue => 'normal', runnabledb_path => 'Bio/EnsEMBL/Analysis/RunnableDB', }, { logic_name => 'Dust', batch_size => 500, resource => 'select[linux && my$LC_DBHOST<800] rusage[my$LC_DBHOST=10:duration=10:decay=1]', retries => 4, sub_args => '', runner => '', queue => 'normal', runnabledb_path => 'Bio/EnsEMBL/Analysis/RunnableDB', }, { logic_name => 'tRNAscan', batch_size => 500, resource => 'select[linux && my$LC_DBHOST<800] rusage[my$LC_DBHOST=10:duration=10:decay=1]', retries => 4, sub_args => '', runner => '', queue => 'normal', runnabledb_path => 'Bio/EnsEMBL/Analysis/RunnableDB', }, { logic_name => 'TRF', batch_size => 500, resource => 'select[linux && my$LC_DBHOST<800] rusage[my$LC_DBHOST=10:duration=10:decay=1]', retries => 4, sub_args => '', runner => '', queue => 'normal', runnabledb_path => 'Bio/EnsEMBL/Analysis/RunnableDB', }, { logic_name => 'Eponine', batch_size => 200, resource => 'select[linux && my$LC_DBHOST<800] rusage[my$LC_DBHOST=10:duration=10:decay=1]', retries => 4, sub_args => '', runner => '', queue => 'normal', runnabledb_path => 'Bio/EnsEMBL/Analysis/RunnableDB', }, ] ); sub import { my ($callpack) = caller(0); # Name of the calling package my $pack = shift; # Need to move package off @_ # Get list of variables supplied, or else all my @vars = @_ ? @_ : keys(%Config); return unless @vars; # Predeclare global variables in calling package eval "package $callpack; use vars qw(" . join(' ', map { '$'.$_ } @vars) . ")"; die $@ if $@; foreach (@vars) { if (defined $Config{ $_ }) { no strict 'refs'; # Exporter does a similar job to the following # statement, but for function names, not # scalar variables: *{"${callpack}::$_"} = \$Config{ $_ }; } else { die "Error: Config: $_ not known\n"; } } } 1;