Raw content of Bio::EnsEMBL::Analysis::Tools::Utilities
=head1 NAME
Bio::EnsEMBL::Analysis::Tools::Utilities
- base class which exports utility methods which don't take Bio::XX objects
=head1 SYNOPSIS
use Bio::EnsEMBL::Analysis::Tools::Utilities qw(shuffle);
or
use Bio::EnsEMBL::Analysis::Tools:Utilities
to get all methods
=head1 DESCRIPTION
This is a class which exports Utility methods for genebuilding and
other gene manupulation purposes.
=head1 CONTACT
please send any questions to ensembl-dev@ebi.ac.uk
=head1 METHODS
the rest of the documention details the exported static
class methods
=cut
package Bio::EnsEMBL::Analysis::Tools::Utilities;
use strict;
use warnings;
use Exporter;
use Bio::EnsEMBL::Analysis::Tools::Stashes qw( package_stash ) ; # needed for read_config()
use Bio::EnsEMBL::Pipeline::DBSQL::DBAdaptor;
use Bio::EnsEMBL::DBSQL::DBAdaptor;
use Bio::EnsEMBL::Utils::Exception qw(verbose throw warning
stack_trace_dump);
use vars qw (@ISA @EXPORT);
@ISA = qw(Exporter);
@EXPORT = qw( shuffle
parse_config
create_file_name
write_seqfile
merge_config_details
get_input_arg
get_db_adaptor_by_string read_config
import_var
get_database_connection_parameters_by_string ) ;
=head2 merge_config_details
Arg [0] : Array of Hashreferences
Arg [1] : Bio::EnsEMBL::Analysis::Runnable
Function : This func. merges the Configurations out differnt configuration-files into one Hash
Returntype: Hashref.
Exceptions: throws as this method should be implemented by any child
Example : merge_database_configs ($DATABASES, $EXONERATE2GENES, $TRANSCRIPT_COALESCER) ;
=cut
sub merge_config_details {
my ($self, @config_hashes )= @_ ;
my %result ;
# loop through all hrefs which are passed as input
foreach my $config_file ( @config_hashes ) {
my %file = %$config_file ;
foreach my $db_class ( keys %file ) {
# process Exonerate2Genes.pm config (has section --> OUTDB)
if ( exists ${$file{$db_class}}{OUTDB} ) {
if ( ref(${$file{$db_class}}{OUTDB}) !~m/HASH/) {
# section in Exonerate2Genes is not a HREF which defines details for
# database-connection - it's a hash-key pointing to Databases.pm
my $href = get_database_connection_parameters_by_string(${$file{$db_class}}{OUTDB}) ;
unless ( $href ) {
print " $db_class parameters are not defined in Databases.pm - skipping\n";
next ;
} else {
#print "Used database : $$href{'-dbname'}\n" ;
$result{$db_class}{db} = $href ;
}
}else {
if ( defined ${$file{$db_class}}{OUTDB}
&& length(${$file{$db_class}}{OUTDB}{'-dbname'}) > 0 ) {
# don't process undefiend OUT-DB's and
# don't process defined OUT-DB's which have no name
$result{$db_class}{db} = ${$file{$db_class}}{OUTDB} ;
}else {
next ;
}
}
}
# process /Conf/Databases.pm
if (defined ( ${$file{$db_class}}{'-dbname'}) && length ( ${$file{$db_class}}{'-dbname'}) > 0 ) {
# we process Databases.pm // parameteres for db-connection are ok
$result{$db_class}{db} = \%{$file{$db_class}} ;
} elsif (defined ( ${$file{$db_class}}{'-dbname'}) && length ( ${$file{$db_class}}{'-dbname'}) == 0 ) {
next ;
}
# add / process data from other configs in format TranscriptCoalescer.pm
# and attach data to main config hash
for my $key (keys %{$file{$db_class}}) {
$result{$db_class}{$key} = $file{$db_class}{$key};
}
}
}
return \%result ;
}
=head2 shuffle
Arg [1] : Reference to Array
Function : randomizes the order of an array
Returntype: arrayref
Exceptions: none
Example :
=cut
sub shuffle {
my $tref = shift ;
my $i = @$tref ;
while ($i--) {
my $j = int rand ($i+1);
@$tref[$i,$j] = @$tref[$j,$i];
}
return $tref ;
}
=head2 get_input_arg
Function : waits for input from STDIN and returns '1' if input =~m/y/i
and '0' if input matches /n/i.
Returntype: 1 or 0
Exceptions: none
=cut
sub get_input_arg {
while (defined (my $line=)){
chomp($line) ;
if ( $line=~m/y/i){
return 1 ;
}elsif( $line =~m/n/i){
return 0 ;
}
print "Wrong input - only answer 'y' or 'n'\n" ;
}
}
sub parse_config{
my ($obj, $var_hash, $label) = @_;
throw("Can't parse the ".$var_hash." hash for object ".$obj." if we are give no label") if(!$label);
my $DEFAULT_ENTRY_KEY = 'DEFAULT';
if(!$var_hash || ref($var_hash) ne 'HASH'){
my $err = "Must pass read_and_check_config a hashref with the config ".
"in ";
$err .= " not a ".$var_hash if($var_hash);
$err .= " Utilities::read_and_and_check_config";
throw($err);
}
my %check;
foreach my $k (keys %$var_hash) {
my $uc_key = uc($k);
if (exists $check{$uc_key}) {
throw("You have two entries in your config with the same name (ignoring case)\n");
}
$check{$uc_key} = $k;
}
# replace entries in config has with lower case versions.
foreach my $k (keys %check) {
my $old_k = $check{$k};
my $entry = $var_hash->{$old_k};
delete $var_hash->{$old_k};
$var_hash->{$k} = $entry;
}
if (not exists($var_hash->{$DEFAULT_ENTRY_KEY})) {
throw("You must define a $DEFAULT_ENTRY_KEY entry in your config");
}
my $default_entry = $var_hash->{$DEFAULT_ENTRY_KEY};
# the following will fail if there are config variables that
# do not have a corresponding method here
foreach my $config_var (keys %{$default_entry}) {
if ($obj->can($config_var)) {
$obj->$config_var($default_entry->{$config_var});
} else {
throw("no method defined in Utilities for config variable '$config_var'");
}
}
#########################################################
# read values of config variables for this logic name into
# instance variable, set by method
#########################################################
my $uc_logic = uc($label);
if (exists $var_hash->{$uc_logic}) {
# entry contains more specific values for the variables
my $entry = $var_hash->{$uc_logic};
foreach my $config_var (keys %{$entry}) {
if ($obj->can($config_var)) {
$obj->$config_var($entry->{$config_var});
} else {
throw("no method defined in Utilities for config variable '$config_var'");
}
}
}else{
throw("Your logic_name ".$uc_logic." doesn't appear in your config file hash - using default settings\n".
$var_hash);
}
}
=head2 create_file_name
Arg [1] : Bio::EnsEMBL::Analysis::Runnable
Arg [2] : string, stem of filename
Arg [3] : string, extension of filename
Arg [4] : directory file should live in
Function : create a filename containing the PID and a random number
with the specified directory, stem and extension
Returntype: string, filename
Exceptions: throw if directory specifed doesnt exist
Example : my $queryfile = $self->create_filename('seq', 'fa');
=cut
sub create_file_name{
my ($stem, $ext, $dir) = @_;
if(!$dir){
$dir = '/tmp';
}
$stem = '' if(!$stem);
$ext = '' if(!$ext);
throw($dir." doesn't exist SequenceUtils::create_filename")
unless(-d $dir);
my $num = int(rand(100000));
my $file = $dir."/".$stem.".".$$.".".$num.".".$ext;
while(-e $file){
$num = int(rand(100000));
$file = $dir."/".$stem.".".$$.".".$num.".".$ext;
}
return $file;
}
=head2 write_seq_file
Arg [1] : Bio::Seq
Arg [2] : string, filename
Function : This uses Bio::SeqIO to dump a sequence to a fasta file
Returntype: string, filename
Exceptions: throw if failed to write sequence
Example :
=cut
sub write_seqfile{
my ($seq, $filename, $format) = @_;
$format = 'fasta' if(!$format);
my @seqs;
if(ref($seq) eq "ARRAY"){
@seqs = @$seq;
throw("Seqs need to be Bio::PrimarySeqI object not a ".$seqs[0])
unless($seqs[0]->isa('Bio::PrimarySeqI'));
}else{
throw("Need a Bio::PrimarySeqI object not a ".$seq)
if(!$seq || !$seq->isa('Bio::PrimarySeqI'));
@seqs = ($seq);
}
$filename = create_file_name('seq', 'fa', '/tmp')
if(!$filename);
my $seqout = Bio::SeqIO->new(
-file => ">".$filename,
-format => $format,
);
foreach my $seq(@seqs){
eval{
$seqout->write_seq($seq);
};
if($@){
throw("FAILED to write $seq to $filename SequenceUtils:write_seq_file $@");
}
}
return $filename;
}
=head2 get_db_adaptor_by_string
Arg [1] : String
Arg [2] : verbose-flag
Arg [3] : return a pipeline db adaptor flag
Function : Returns a Bio::EnsEMBL::DBSQL::DBAdaptor for a given string.
or a Bio::EnsEMBL::Pipeline::DBSQL::DBAdaptor if requested
Requires proper configuration of
Bio::EnsEMBL::Analysis::Config::Databases
Returntype: Bio::EnsEMBL:DBSQL::DBAdaptor or Bio::EnsEMBL::Pipeline::DBSQL::DBAdaptor
Exceptions: throw if string can't be found in Databases.pm
=cut
sub get_db_adaptor_by_string {
my ($string, $verbose, $use_pipeline_adaptor) = @_ ;
#print "Fetching ".$string."\n";
require "Bio/EnsEMBL/Analysis/Config/Databases.pm" ;
no strict ;
Bio::EnsEMBL::Analysis::Config::Databases->import("DATABASES");
Bio::EnsEMBL::Analysis::Config::Databases->import("DNA_DBNAME");
unless ( ${$DATABASES}{$string} ) {
print "WARNING : Database parameters undefined for - skipping \n" ;
return undef ;
}
if ( length(${$DATABASES}{$string}{'-dbname'}) == 0 ) {
print "WARNING : You haven't defined a database-name in the Databases.pm config-file for $string\n" ;
return undef ;
}
my $db;
my $dnadb;
if ( $use_pipeline_adaptor ) {
$db = new Bio::EnsEMBL::Pipeline::DBSQL::DBAdaptor( %{ ${$DATABASES}{$string} } ) ;
$dnadb = new Bio::EnsEMBL::Pipeline::DBSQL::DBAdaptor( %{ ${$DATABASES}{$DNA_DBNAME} } ) ;
} else {
$db = new Bio::EnsEMBL::DBSQL::DBAdaptor( %{ ${$DATABASES}{$string} } ) ;
$dnadb = new Bio::EnsEMBL::DBSQL::DBAdaptor( %{ ${$DATABASES}{$DNA_DBNAME} } ) ;
}
#print "Got ".$db."\n";
if ( $verbose ) {
my %tmp = %{${$DATABASES}{$string}} ;
print STDERR "Database : $tmp{'-dbname'} @ $tmp{'-host'} : $tmp{'-port'} AS $tmp{'-user'} - $tmp{'-pass'}\n" ;
}
if($string ne $DNA_DBNAME ){
if (length($DNA_DBNAME) ne 0 ){
$db->dnadb($dnadb);
}else{
warning("You haven't defined a DNA_DBNAME in Config/Databases.pm");
}
}
use strict ;
return $db;
}
=head2 get_database_connection_parameters_by_string
Arg [1] : String
Function : Returns a Bio::EnsEMBL::Pipeline::DBSQL::DBAdaptor for a given string.
Requires proper configuration of
Bio::EnsEMBL::Analysis::Config::Databases
Returntype: Hashref
Exceptions: throw if string can't be found in Databases.pm
=cut
sub get_database_connection_parameters_by_string {
my ($string) = @_ ;
require "Bio/EnsEMBL/Analysis/Config/Databases.pm" ;
no strict ;
Bio::EnsEMBL::Analysis::Config::Databases->import("DATABASES");
Bio::EnsEMBL::Analysis::Config::Databases->import("DNA_DBNAME");
unless ( ${$DATABASES}{$string} ) {
print "WARNING : Database parameters undefined - skipping \n" ;
return undef ;
}
if ( length(${$DATABASES}{$string}{'-dbname'}) == 0 ) {
print "You haven't defined a database-name in the Databases.pm config-file for $string\n" ;
return undef ;
}
return ${$DATABASES}{$string} ;
}
=head2 read_config
Arg [1] : String
Arg [2] : optional Array-reference
Function : reads a configuration file in the ensembl perl module format
in runtime and, accesses the variables ( either only the variables
listend in $aref or all variables ) and returns a hash-reference to this.
Returntype:
Exceptions:
Requires : use Bio::EnsEMBL::Analysis::Tools::Stashes qw( package_stash ) ;
=cut
sub read_config {
my ($module_name , $aref ) = @_ ;
(my $module_path = $module_name )=~s/::/\//g;
require "$module_path.pm" ;
# get the names of the variables
unless ($aref) {
my ($config_href, $varname ) = @{package_stash("$module_name")};
map { $module_name->import($_) } keys %$config_href ;
return $config_href;
}
# import only variables specified in $aref
no strict ;
map { $module_name->import($_) } @$aref ;
my %import ;
map {$import{$_} = ${$_}} @$aref ;
use strict ;
return \%import;
}
=head2 import_var
Arg [0] : Array of Hashreferences
Arg [1] : optional Array-ref
Function : gets a hash-reference and adds them to the namespace of the module - they can
be accesed in the package by using 'no strict;'
Returntype: Hashref.
Examples : import_var ($href) ;
import_var(read_config("Bio::EnsEMBL::Analysis::Config::Databases"));
=cut
sub import_var {
my ($callpack) = caller(0); # Name of the calling package
# my $pack = shift; # Need to move package off @_
my $vars_to_import = shift ;
# $vars_to_import = $pack unless $vars_to_import ;
# Get list of variables supplied, or else all
my @vars = @_ ? @_ : keys(%{$vars_to_import});
return unless @vars;
# Predeclare global variables in calling package
eval "package $callpack; use vars qw("
. join(' ', map { '$'.$_ } @vars) . ")";
die $@ if $@;
foreach (@vars) {
if (defined ${$vars_to_import}{ $_ }) {
no strict 'refs';
# Exporter does a similar job to the following
# statement, but for function names, not
# scalar variables:
*{"${callpack}::$_"} = \${$vars_to_import}{ $_ };
} else {
die "Error: Config: $_ not known\n";
}
}
}
1;