Raw content of Bio::EnsEMBL::Pipeline::Config::BatchQueue
# EnsEMBL module for Bio::EnsEMBL::Pipeline::Config::BatchQueue;
#
# You may distribute this module under the same terms as perl itself
=head1 NAME
Bio::EnsEMBL::Pipeline::Config::BatchQueue
=head1 SYNOPSIS
use Bio::EnsEMBL::Pipeline::Config::BatchQueue;
use Bio::EnsEMBL::Pipeline::Config::BatchQueue qw();
=head1 DESCRIPTION
Configuration for pipeline batch queues. Specifies per-analysis
resources and configuration, e.g. so that certain jobs are run
only on certain nodes.
It imports and sets a number of standard global variables into the
calling package. Without arguments all the standard variables are set,
and with a list, only those variables whose names are provided are set.
The module will die if a variable which doesn't appear in its
C<%Config> hash is asked to be set.
The variables can also be references to arrays or hashes.
Edit C<%Config> to add or alter variables.
All the variables are in capitals, so that they resemble environment variables.
To run a job only on a certain host, you have to add specific resource-requirements. This
can be useful if you have special memory-requirements, if you like to run the job only on
linux 64bit machines or if you want to run the job only on a specific host group.
The commands bmgroups and lsinfo show you certain host-types / host-groups.
Here are some example resource-statements / sub_args statements:
sub_args => '-m bc_hosts', # only use hosts of host-group 'bc_hosts' (see bmgroup)
sub_args => '-m bc1_1', # only use hosts of host-group 'bc1_1'
resource => 'select[type==X86_64]', # use Linux 64 bit machines only
resource => 'select[model==IBMBC2800] ', # only run on IBMBC2800 hosts
resource => 'alpha', # only run on DEC alpha
resource => 'linux', # run on any machine capable of running 32-bit X86 Linux apps
Database throtteling :
This runs a job on a linux host, throttles ecs4:3350 to not have more than 300 cative connections, 10 connections per
job in the duration of the first 10 minutes when the job is running (means 30 hosts * 10 connections = 300 connections):
resource =>'select[linux && ecs4my3350 <=300] rusage[ecs4my3350=10:duration=10]',
Running on 'linux' hosts with not more than 200 active connections for myia64f and myia64g, 10 connections per job to each
db-instance for the first 10 minutes :
resource =>'select[linux && myia64f <=200 && myia64g <=200] rusage[myia64f=10:myia64g=10:duration=10]',
Running on hosts of model 'IBMBC2800' hosts with not more than 200 active connections to myia64f;
10 connections per job for the first 10 minutes:
resource =>'select[model==IBMBC2800 && myia64f<=200] rusage[myia64f=10:duration=10]',
Running on hosts of host_group bc_hosts with not more than 200 active connections to myia64f;
10 connections per job for the first 10 minutes:
resource =>'select[myia64f<=200] rusage[myia64f=10:duration=10]',
sub_args =>'-m bc_hosts'
=head1 CONTACT
=cut
package Bio::EnsEMBL::Pipeline::Config::BatchQueue;
use strict;
use vars qw(%Config);
%Config = (
QUEUE_MANAGER => 'LSF', # depending on the job-submission-system you're using
# use LSF, you can also use 'Local'
# for more info look into
# /ensembl-pipeline/modules/Bio/EnsEMBL/Pipeline/BatchSubmission
DEFAULT_BATCH_SIZE => '',
DEFAULT_RETRIES => 3,
DEFAULT_BATCH_QUEUE => 'normal', # put in the queue of your choice, eg. 'normal'
DEFAULT_RESOURCE => 'linux',
DEFAULT_SUB_ARGS => '',
# When running the ensembl pipeline test system using ensembl-pipeline/test_system/test_single_analysis.pl or
# ensembl-pipeline/test_system/test_whole_pipeline.pl, DEFAULT_OUTPUT_DIR *must* be defined, even if output_dir
# option has been explicitly specified on the command line when running the scripts.
# If the output is to be written to DEFAULT_OUTPUT_DIR, provide a genuine path.
# For example: DEFAULT_OUTPUT_DIR => 'the/path/to/your/pipeline/output/files'.
#
# If the output is intended to be written to the output_dir specified on the command line, provide the
# path to a ghost directory. No data will be written to the ghost directory but it stops the system from
# complaining that "Your output directory does not exist, I'll create it now".
#
# Note also that when output_dir is explicitly specified on the command line, even if analysis-specific output dirs
# are defined in this module, no data will be written to them as the command line option overrides them too.
DEFAULT_OUTPUT_DIR => '/lustre/scratch1/ensembl/at6/ghost_output_dir_for_test_system',
DEFAULT_CLEANUP => 'no',
DEFAULT_VERBOSITY => 'WARNING',
JOB_LIMIT => 10000, # at this number of jobs, RuleManager will sleep for
# a certain period of time. If you effectively want this never to run,
# set the value to very high (e.g.100000) for a certain period of time.
# This is important for queue managers which cannot cope with large numbers
# of pending jobs (e.g. early LSF versions and SGE).
JOB_STATUSES_TO_COUNT => ['PEND'], # these are the jobs which will be counted.
# valid statuses for this array are RUN, PEND, SSUSP, EXIT, DONE
MARK_AWOL_JOBS => 1,
MAX_JOB_SLEEP => 3600,# the maximun time to sleep for when job limit
# reached
MIN_JOB_SLEEP => 120, # the minium time to sleep for when job limit reached
SLEEP_PER_JOB => 30, # the amount of time to sleep per job when job limit
# reached
DEFAULT_RUNNABLEDB_PATH => 'Bio/EnsEMBL/Analysis/RunnableDB',
DEFAULT_RUNNER => 'Bio/EnsEMBL/Pipeline/runner.pl',
DEFAULT_RETRY_QUEUE => 'long',
DEFAULT_RETRY_SUB_ARGS => '',
DEFAULT_RETRY_RESOURCE => '',
QUEUE_CONFIG => [
{
logic_name => 'RepeatMask',
batch_size => 5,
resource => '',
retries => 3,
sub_args => '',
runner => '',
queue => '',
output_dir => '',
cleanup => 'no',
verbosity => 'INFO',
runnabledb_path => '',
retry_queue => '',
retry_resource => '',
retry_sub_args => '',
},
{
logic_name => 'CpG',
batch_size => 108,
resource => '',
retries => 3,
sub_args => '',
runner => '',
queue => 'small',
output_dir => '',
cleanup => 'no',
verbosity => 'INFO',
runnable_path => '',
retry_queue => '',
retry_resource => '',
retry_sub_args => '',
},
{
logic_name => 'Dust',
batch_size => 108,
resource => '',
retries => 3,
sub_args => '',
runner => '',
queue => 'small',
output_dir => '',
cleanup => 'no',
verbosity => 'INFO',
runnabledb_path => '',
retry_queue => '',
retry_resource => '',
retry_sub_args => '',
},
{
logic_name => 'Eponine',
batch_size => 54,
resource => '',
retries => 3,
sub_args => '',
runner => '',
queue => 'small',
output_dir => '',
cleanup => 'no',
verbosity => 'INFO',
runnabledb_path => '',
retry_queue => '',
retry_resource => '',
retry_sub_args => '',
},
{
logic_name => 'marker',
batch_size => 108,
resource => '',
retries => 4,
sub_args => '',
runner => '',
queue => 'small',
output_dir => '',
cleanup => 'no',
verbosity => 'INFO',
runnabledb_path => '',
retry_queue => '',
retry_resource => '',
retry_sub_args => '',
},
{
logic_name => 'TRF',
batch_size => 108,
resource => '',
retries => 3,
sub_args => '',
runner => '',
queue => 'small',
output_dir => '',
cleanup => 'no',
verbosity => 'INFO',
runnabledb_path => '',
retry_queue => '',
retry_resource => '',
retry_sub_args => '',
},
{
logic_name => 'tRNAscan',
batch_size => 108,
resource => '',
retries => 3,
sub_args => '',
runner => '',
queue => 'small',
output_dir => '',
cleanup => 'no',
verbosity => 'INFO',
runnabledb_path => '',
retry_queue => '',
retry_resource => '',
retry_sub_args => '',
},
{
logic_name => 'FirstEF',
batch_size => 108,
resource => '',
retries => 10,
sub_args => '',
runner => '',
queue => 'small',
output_dir => '',
cleanup => 'no',
verbosity => 'INFO',
runnabledb_path => '',
retry_queue => '',
retry_resource => '',
retry_sub_args => '',
},
{
logic_name => 'Genscan',
batch_size => 54,
resource => '',
retries => 3,
sub_args => '',
runner => '',
queue => 'small',
output_dir => '',
cleanup => 'no',
verbosity => 'INFO',
runnabledb_path => '',
retry_queue => '',
retry_resource => '',
retry_sub_args => '',
},
{
logic_name => 'Vertrna',
batch_size => 4,
resource => '',
retries => 3,
sub_args => '',
runner => '',
queue => '',
output_dir => '',
cleanup => 'no',
verbosity => 'INFO',
runnabledb_path => '',
retry_queue => '',
retry_resource => '',
retry_sub_args => '',
},
{
logic_name => 'Unigene',
batch_size => 4,
resource => '',
retries => 3,
sub_args => '',
runner => '',
queue => '',
output_dir => '',
cleanup => 'no',
verbosity => 'INFO',
runnabledb_path => '',
retry_queue => '',
retry_resource => '',
retry_sub_args => '',
},
{
logic_name => 'Uniprot',
batch_size => 4,
resource => '',
retries => 3,
sub_args => '',
runner => '',
queue => '',
output_dir => '',
cleanup => 'no',
verbosity => 'INFO',
runnabledb_path => '',
retry_queue => '',
retry_resource => '',
retry_sub_args => '',
},
{
logic_name => 'Pmatch',
batch_size => 108,
resource => '',
retries => 3,
sub_args => '',
runner => '',
queue => 'small',
output_dir => '',
cleanup => 'no',
verbosity => 'INFO',
runnabledb_path => '',
retry_queue => '',
retry_resource => '',
retry_sub_args => '',
},
{
logic_name => 'Pmatch_Wait',
batch_size => 1,
resource => '',
retries => 1,
sub_args => '',
runner => '',
queue => 'small',
output_dir => '',
runnabledb_path => '',
},
{
logic_name => 'BestPmatch',
batch_size => 1,
resource => '',
retries => 3,
sub_args => '',
runner => '',
queue => 'small',
cleanup => 'no',
output_dir => '',
runnabledb_path => '',
},
{
logic_name => 'Best_Wait',
batch_size => 1,
resource => '',
retries => 1,
sub_args => '',
runner => '',
queue => 'small',
cleanup => 'no',
output_dir => '',
runnabledb_path => '',
},
]
);
sub import {
my ($callpack) = caller(0); # Name of the calling package
my $pack = shift; # Need to move package off @_
# Get list of variables supplied, or else all
my @vars = @_ ? @_ : keys(%Config);
return unless @vars;
# Predeclare global variables in calling package
eval "package $callpack; use vars qw("
. join(' ', map { '$'.$_ } @vars) . ")";
die $@ if $@;
foreach (@vars) {
if (defined $Config{ $_ }) {
no strict 'refs';
# Exporter does a similar job to the following
# statement, but for function names, not
# scalar variables:
*{"${callpack}::$_"} = \$Config{ $_ };
} else {
die "Error: Config: $_ not known\n";
}
}
}
1;