Raw content of Bio::EnsEMBL::Pipeline::Utils::PipelineSanityChecks
use strict;
use warnings;
package Bio::EnsEMBL::Pipeline::Utils::PipelineSanityChecks;
use vars qw(@ISA);
use Bio::EnsEMBL::Utils::Exception qw(verbose throw warning info);
use Bio::EnsEMBL::Utils::Argument qw( rearrange );
use Bio::EnsEMBL::Pipeline::Config::General;
use Bio::EnsEMBL::Pipeline::Config::BatchQueue;
use Bio::EnsEMBL::Root;
@ISA = qw(Bio::EnsEMBL::Root);
sub new{
my $caller = shift;
my $class = ref($caller) || $caller;
my $self = bless({}, $class);
$self->{'db'} = undef;
my ($db)= rearrange([qw(DB)], @_);
$self->db($db) if($db);
throw("you need to pass at least a DBAdaptor to an PipelineSanityChecks") unless($self->db);
return $self;
}
sub db{
my $self = shift;
if(@_){
$self->{'db'} = shift;
}
return $self->{'db'};
}
sub db_sanity_check{
my ($self) = @_;
my ($query, $msg);
my $warn = 1;
#check all rules in the rule_goal table have existing analyses
$query = qq{SELECT COUNT(DISTINCT g.rule_id)
FROM rule_goal g
LEFT JOIN analysis a ON g.goal = a.analysis_id
WHERE a.analysis_id IS NULL};
$msg = "Some of your goals in the rule_goal table don't seem".
" to have entries in the analysis table";
$self->execute_sanity_check($query, $msg);
#check all rules in the rule_condition table have existing analyses
$query = qq{SELECT COUNT(DISTINCT c.rule_id)
FROM rule_conditions c
LEFT JOIN analysis a ON c.rule_condition = a.logic_name
WHERE a.logic_name IS NULL};
$msg = "Some of your conditions in the rule_condition table don't" .
" seem to have entries in the analysis table";
$self->execute_sanity_check($query, $msg);
#check all the analyses have types
$query = qq{SELECT COUNT(DISTINCT(a.analysis_id))
FROM analysis a
LEFT JOIN input_id_type_analysis t
ON a.analysis_id = t.analysis_id
WHERE t.analysis_id IS NULL};
$msg = "Some of your analyses don't have entries in the".
" input_id_type_analysis table";
$self->execute_sanity_check($query, $msg, $warn);
#check that all types which aren't accumulators have entries in
#input__id_analysis table
$query = qq{SELECT DISTINCT(t.input_id_type)
FROM input_id_analysis i
LEFT JOIN input_id_type_analysis t
ON i.input_id_type = t.input_id_type
WHERE t.input_id_type IS NULL
&& t.input_id_type != 'ACCUMULATOR'};
$msg = "Some of your types don't have entries in the".
" input_id_type_analysis table";
$self->execute_sanity_check($query, $msg);
$query = qq{SELECT count(input_id)
FROM input_id_analysis
WHERE input_id_type = ''};
$msg = "Some of your input_ids don't have a type in the input_id_analysis ".
"table";
$self->execute_sanity_check($query, $msg);
}
sub execute_sanity_check{
my ($self, $query, $msg, $warn) = @_;
my $db = $self->db;
my $sth = $db->prepare($query);
$sth->execute();
if($warn){
warning($msg) if $sth->fetchrow();
}else{
throw($msg) if $sth->fetchrow();
}
}
sub accumulator_sanity_check{
my ($self, $rules, $accumulators) = @_;
my $sic = $self->db->get_StateInfoContainer;
my $aa = $self->db->get_AnalysisAdaptor;
RULE:foreach my $rule(@$rules){
if($rule->goalAnalysis->input_id_type eq 'ACCUMULATOR'){
#print STDERR "dealing with rule ".$rule->goalAnalysis->logic_name."\n";
my @conditions = @{$rule->list_conditions};
my %input_id_type;
foreach my $c(@conditions){
#print STDERR "have condition ".$c."\n";
my $analysis = $aa->fetch_by_logic_name($c);
if(!$input_id_type{$analysis->input_id_type}){
$input_id_type{$analysis->input_id_type} = [];
}
push(@{$input_id_type{$analysis->input_id_type}}, $c);
}
TYPE:foreach my $type(keys(%input_id_type)){
#print STDERR "have type ".$type."\n";
my @ids = @{$sic->list_input_ids_by_type($type)};
#print STDERR "have ".@ids." ids\n";
if(!@ids){
my $logic_names = join(",", @{$input_id_type{$type}});
print STDERR "can't run with accumulators on as ".
$rule->goalAnalysis->logic_name." depends on $logic_names with type ".
$type." which has no entries in the input_id_".
"analysis table\n";
$accumulators = 0;
}else{
next TYPE;
}
}
}
else{
next RULE;
}
}
return $accumulators;
}
sub rule_type_sanity{
my ($self, $rules, $verbose) = @_;
my $aa = $self->db->get_AnalysisAdaptor;
RULE:foreach my $rule(@$rules){
my $type = $rule->goalAnalysis->input_id_type;
if($type eq 'ACCUMULATOR'){
next RULE;
}
CONDITION:foreach my $name(@{$rule->list_conditions}){
my $condition = $aa->fetch_by_logic_name($name);
if(!$condition){
my $msg = "Can't depend on an analysis which doesn't exist $name";
throw($msg);
}
if($condition->input_id_type eq 'ACCUMULATOR'){
print STDERR "Skipping ".$name." is an accumulator\n" if($verbose);
next CONDITION;
}
if($condition->input_id_type ne $type){
my $msg = $rule->goalAnalysis->logic_name."'s type ".$type.
" doesn't match condition ".$condition->logic_name.
"'s type ".$condition->input_id_type;
throw($msg);
}
}
}
}
sub config_sanity_check {
my ($self) = @_;
my $ok = 1;
unless ($QUEUE_MANAGER) {
print "Need to specify QUEUE_MANAGER in Config/BatchQueue.pm\n";
$ok = 0;
}
unless ($LIB_DIR) {
print "Need to specify LIB_DIR in Config/General.pm\n";
$ok = 0;
}
unless ($DATA_DIR) {
print "Need to specify DATA_DIR in Config/General.pm\n";
$ok = 0;
}
unless ($BIN_DIR) {
print "Need to specify BIN_DIR in Config/General.pm\n";
$ok = 0;
}
if(!$ok){
throw("The pipeline config isn't sane");
}
}
1;