Raw content of RuleCreation
#RuleCreation is a package for creating ensembl-pipeline rules on the
#basis of a config file or writing those rules to a config file once
#retrived from a database. It has the same basis as AnalysisCreation
#and is based on an idea from Glenn Proctor
# Cared for by ensembl
#
# Copyright ensembl
#
# You may distribute this module under the same terms as perl itself
#
# POD documentation - main docs before the code
=pod
=head1 NAME
RuleCreation
=head1 SYNOPSIS
This class will parse config files to produce rule objects and store
them and will take rule tables and write a config file
[RepeatMask]
condition=SubmitContig
[Pmatch]
condition=SubmitChromosome
[Pmatch_Wait]
condition=Pmatch
[BestPmatch]
condition=Pmatch_Wait
condition=SubmitGenome
#comment lines can be made if they start with a # symbol
=head1 DESCRIPTION
=head1 CONTACT
Post general queries to B
=head1 APPENDIX
the class itself obviously doesn't' need to be instantiated but the
either the script which uses it should be in the same directory as
it or the directory which contains it should be in you PERL5LIB
the rule_setup script which should be found in the directory can
perform both functions for you if you have the appropriate database
and config files
an example config file can be found in this directory called
example_rule.conf
=cut
package RuleCreation;
use strict;
use warnings;
use Bio::EnsEMBL::Pipeline::Rule;
use Bio::EnsEMBL::Utils::Exception qw(verbose throw warning info);
require Exporter;
our @ISA = qw(Exporter);
our @EXPORT = qw(parse_files write_into_db read_db write_file
create_rules);
verbose('WARNING');
=head2 parse_files
Arg [1] : array of filenames
Function : parse a rule config file and produce rule objects
Returntype: an hash ref of rule goal/conditions
Exceptions: if file doesn't exist
if config format is in correct
if key already exists for a particular header'
Caller :
Example : my $rule_hash = parse_files($file);
=cut
sub parse_files {
my @files = shift;
if(@files == 0){
throw("Can't parse files if we don't have any @files");
}
my %config;
# read each file
foreach my $file (@files) {
if (! -e $file) {
throw("rule file $file not found\n");
}
my $header = "";
open (FILE, $file) or throw "Couldn't open file $file";
while () {
chomp();
# Comment or blank line
next if (/^\s$/ || /^\#/);
# [HEADER]
if (/^\[(.*)\]$/) { # $1 will be the header name, without the []
$header = $1;
#print "Reading stanza $header\n";
if(!$config{$header}){
$config{$header} = [];
}else{
throw("Can't have two different rules with the same goal ".
"header");
}
}
# key=value
if (/^([^=\s]+)\s*=\s*(.+)/) { # $1 = key, $2 = value
my $key = lc($1); # keys stored as all lowercase,
#values have case preserved
my $value = $2;
if (length($header) == 0) {
throw("Found key/value pair $key/$value outside stanza");
}
if($key eq 'condition'){
push(@{$config{$header}}, $value);
}else{
warn("Don't recognise ".$key." so doing nothing with ".
$value);
}
}
} # while
close FILE;
}
foreach my $goal(keys(%config)){
if($config{$goal} == 0){
throw("Can't create a rule for ".$goal.
" without any conditions");
}
}
return \%config;
}
=head2 create_rules
Arg [1] : Bio::EnsEMBL::Pipeline::DBSQL::DBAdaptor
Arg [2] : hashref for hash keyed on goal analysis logic_name
containing condition logic_names
Function : create rules on the basis of the hash
Returntype: array ref of Bio::EnsEMBL::Pipeline::Rule's
Exceptions: throws if not passed a dbadaptor or it is of the wrong
type or if not passed a rule hash or it is empty
Example : my @rules = @{create_rules($db, $rule_hash)}'
=cut
sub create_rules{
my ($db, $config) = @_;
if(!$db || !$db->isa('Bio::EnsEMBL::Pipeline::DBSQL::DBAdaptor')){
throw("Must define a db and ".$db." must be a ".
"Bio::EnsEMBL::Pipeline::DBSQL::DBAdaptor");
}
if(!$config || keys(%$config) == 0){
throw("Not going to create any rules as rule hash ".
$config." is empty ");
}
my @rules;
my $analysis_adaptor = $db->get_AnalysisAdaptor;
my $rule_adaptor = $db->get_RuleAdaptor;
foreach my $goal(keys(%$config)){
my $analysis = $analysis_adaptor->fetch_by_logic_name($goal);
if(!$analysis){
throw("Can't have rule with goal ".$goal." if no analysis ".
"of that name exists");
}
my @conditions = @{$config->{$goal}};
my %checked;
foreach my $condition(@conditions) {
throw "$condition is not found in the database\n"
unless scalar($analysis_adaptor->fetch_by_logic_name($condition));
$checked{$condition} = 1;
}
throw("Sorry, you can't insert a rule if the goal is the same as ".
"one of the conditions") if exists $checked{$goal};
my $rule = Bio::EnsEMBL::Pipeline::Rule->new
(
-goalanalysis => $analysis
);
foreach my $condition(keys %checked) {
$rule->add_condition($condition);
}
push(@rules, $rule);
}
return \@rules;
}
=head2 write_into_db
Arg [1] : Bio::EnsEMBL::DBSQL::DBAdaptor
[2] : Ref to an array of rule objects
Function : Write the rule objects into the database
Returntype: N/A
Exceptions: if dbadaptor is the wrong type of object
Caller :
Example : &write_into_db($db, \@rules);
=cut
sub write_into_db{
my ($db, $rules) = @_;
if(!$db || !($db->isa('Bio::EnsEMBL::DBSQL::DBAdaptor'))){
throw("need a Pipeline::DBAdaptor not ".$db);
}
if(!$rules || @$rules == 0){
throw("Need rules to write to the database");
}
my $ruleadaptor = $db->get_RuleAdaptor;
my @existing_rules = $ruleadaptor->fetch_all;
my %existing;
foreach my $exist(@existing_rules){
$existing{$exist->goalAnalysis->logic_name} = $exist;
}
foreach my $rule(@$rules){
if(!$existing{$rule->goalAnalysis->logic_name}){
$ruleadaptor->store($rule);
}else{
warn("Not storing rule ".$rule->goalAnalysis->logic_name.
" rule with same goal already exists ".
$existing{$rule->goalAnalysis->logic_name}->dbID);
}
}
}
=head2 read_db
Arg [1] : Bio::EnsEMBL::DBAdaptor
Function : Read the rule objects from the database
Returntype: array ref of rule objects
Exceptions: if db isn't the correct type'
Caller :
Example : my $rules = &read_db($db);
=cut
sub read_db{
my $db = shift;
if(!($db->isa('Bio::EnsEMBL::DBSQL::DBAdaptor'))){
throw("need a DBAdaptor not ".$db);
}
my $rule_adaptor = $db->get_RuleAdaptor;
my @rules = $rule_adaptor->fetch_all;
return \@rules;
}
=head2 write_file
Arg [1] : filename
[2] : arrayref of rule objects
Function : write a config file for the objects given
Returntype: N/A
Exceptions: if file doesnt exist
Caller :
Example : &write_file($file, $rules);
=cut
sub write_file{
my ($file, $rules) = @_;
#print STDERR "Opening ".$file."\n";
open (FH, '>'.$file) or throw ("couldn't open $file to write to");
foreach my $rule(@$rules){
print FH "[".$rule->goalAnalysis->logic_name."]\n";
my @conditions = @{$rule->list_conditions};
foreach my $condition(@conditions){
print FH "condition=".$condition."\n";
}
}
}