Bio::EnsEMBL::Analysis::Tools Utilities
SummaryIncluded librariesPackage variablesSynopsisDescriptionGeneral documentationMethods
Toolbar
WebCvsRaw content
Summary
Bio::EnsEMBL::Analysis::Tools::Utilities
- base class which exports utility methods which don't take Bio::XX objects
Package variables
Globals (from "use vars" definitions)
@EXPORT
Included modules
Bio::EnsEMBL::Analysis::Tools::Stashes qw ( package_stash )
Bio::EnsEMBL::DBSQL::DBAdaptor
Bio::EnsEMBL::Pipeline::DBSQL::DBAdaptor
Bio::EnsEMBL::Utils::Exception qw ( verbose throw warning stack_trace_dump )
Exporter
Inherit
Exporter
Synopsis
  use Bio::EnsEMBL::Analysis::Tools::Utilities qw(shuffle); 
or use Bio::EnsEMBL::Analysis::Tools:Utilities to get all methods
Description
This is a class which exports Utility methods for genebuilding and
other gene manupulation purposes.
Methods
create_file_nameDescriptionCode
get_database_connection_parameters_by_stringDescriptionCode
get_db_adaptor_by_stringDescriptionCode
get_input_argDescriptionCode
import_varDescriptionCode
merge_config_detailsDescriptionCode
parse_config
No description
Code
read_configDescriptionCode
shuffleDescriptionCode
write_seqfile
No description
Code
Methods description
create_file_namecode    nextTop
  Arg [1]   : Bio::EnsEMBL::Analysis::Runnable
Arg [2] : string, stem of filename
Arg [3] : string, extension of filename
Arg [4] : directory file should live in
Function : create a filename containing the PID and a random number
with the specified directory, stem and extension
Returntype: string, filename
Exceptions: throw if directory specifed doesnt exist
Example : my $queryfile = $self->create_filename('seq', 'fa');
get_database_connection_parameters_by_stringcodeprevnextTop
  Arg [1]   : String 
Function : Returns a Bio::EnsEMBL::Pipeline::DBSQL::DBAdaptor for a given string.
Requires proper configuration of
Bio::EnsEMBL::Analysis::Config::Databases

Returntype: Hashref
Exceptions: throw if string can't be found in Databases.pm
get_db_adaptor_by_stringcodeprevnextTop
  Arg [1]   : String 
Arg [2] : verbose-flag
Arg [3] : return a pipeline db adaptor flag
Function : Returns a Bio::EnsEMBL::DBSQL::DBAdaptor for a given string.
or a Bio::EnsEMBL::Pipeline::DBSQL::DBAdaptor if requested
Requires proper configuration of
Bio::EnsEMBL::Analysis::Config::Databases

Returntype: Bio::EnsEMBL:DBSQL::DBAdaptor or Bio::EnsEMBL::Pipeline::DBSQL::DBAdaptor
Exceptions: throw if string can't be found in Databases.pm
get_input_arg codeprevnextTop
  Function  : waits for input from STDIN and returns '1' if input =~m/y/i         
and '0' if input matches /n/i.
Returntype: 1 or 0
Exceptions: none
import_var codeprevnextTop
  Arg [0]   : Array of Hashreferences
Arg [1] : optional Array-ref
Function : gets a hash-reference and adds them to the namespace of the module - they can
be accesed in the package by using 'no strict;'
Returntype: Hashref.
Examples : import_var ($href) ;
import_var(read_config("Bio::EnsEMBL::Analysis::Config::Databases"));
merge_config_detailscodeprevnextTop
  Arg [0]   : Array of Hashreferences
Arg [1] : Bio::EnsEMBL::Analysis::Runnable
Function : This func. merges the Configurations out differnt configuration-files into one Hash
Returntype: Hashref.
Exceptions: throws as this method should be implemented by any child
Example : merge_database_configs ($DATABASES, $EXONERATE2GENES, $TRANSCRIPT_COALESCER) ;
read_config codeprevnextTop
  Arg [1]   : String 
Arg [2] : optional Array-reference
Function : reads a configuration file in the ensembl perl module format
in runtime and, accesses the variables ( either only the variables
listend in $aref or all variables ) and returns a hash-reference to this.

Returntype:
Exceptions:
Requires : use Bio::EnsEMBL::Analysis::Tools::Stashes qw( package_stash ) ;
shuffle codeprevnextTop
  Arg [1]   : Reference to Array
Function : randomizes the order of an array
Returntype: arrayref
Exceptions: none
Example :
Methods code
create_file_namedescriptionprevnextTop
sub create_file_name {
  my ($stem, $ext, $dir) = @_;
  if(!$dir){
    $dir = '/tmp';
  }
  $stem = '' if(!$stem);
  $ext = '' if(!$ext);
  throw($dir." doesn't exist SequenceUtils::create_filename") 
    unless(-d $dir);
  my $num = int(rand(100000));
  my $file = $dir."/".$stem.".".$$.".".$num.".".$ext;
  while(-e $file){
    $num = int(rand(100000));
    $file = $dir."/".$stem.".".$$.".".$num.".".$ext;
  }
  return $file;
}
get_database_connection_parameters_by_stringdescriptionprevnextTop
sub get_database_connection_parameters_by_string {
   my ($string) = @_ ;


   require "Bio/EnsEMBL/Analysis/Config/Databases.pm" ;
   no strict ;
   Bio::EnsEMBL::Analysis::Config::Databases->import("DATABASES");
   Bio::EnsEMBL::Analysis::Config::Databases->import("DNA_DBNAME");

   unless ( ${$DATABASES}{$string} ) {  
     print "WARNING : Database parameters undefined - skipping\n " ; 
     return undef ; 
   } 
   
   if ( length(${$DATABASES}{$string}{'-dbname'}) == 0 ) {  
     print "You haven't defined a database-name in the Databases.pm config-file for $string\n" ; 
     return undef ; 
   }
   return ${$DATABASES}{$string} ;
}
get_db_adaptor_by_stringdescriptionprevnextTop
sub get_db_adaptor_by_string {
   my ($string, $verbose, $use_pipeline_adaptor) = @_ ;

   #print "Fetching ".$string."\n";
require "Bio/EnsEMBL/Analysis/Config/Databases.pm" ; no strict ; Bio::EnsEMBL::Analysis::Config::Databases->import("DATABASES"); Bio::EnsEMBL::Analysis::Config::Databases->import("DNA_DBNAME"); unless ( ${$DATABASES}{$string} ) { print "WARNING : Database parameters undefined for - skipping\n " ; return undef ; } if ( length(${$DATABASES}{$string}{'-dbname'}) == 0 ) { print "WARNING : You haven't defined a database-name in the Databases.pm config-file for $string\n" ; return undef ; } my $db; my $dnadb; if ( $use_pipeline_adaptor ) { $db = new Bio::EnsEMBL::Pipeline::DBSQL::DBAdaptor( %{ ${$DATABASES}{$string} } ) ; $dnadb = new Bio::EnsEMBL::Pipeline::DBSQL::DBAdaptor( %{ ${$DATABASES}{$DNA_DBNAME} } ) ; } else { $db = new Bio::EnsEMBL::DBSQL::DBAdaptor( %{ ${$DATABASES}{$string} } ) ; $dnadb = new Bio::EnsEMBL::DBSQL::DBAdaptor( %{ ${$DATABASES}{$DNA_DBNAME} } ) ; } #print "Got ".$db."\n";
if ( $verbose ) { my %tmp = %{${$DATABASES}{$string}} ; print STDERR "Database : $tmp{'-dbname'} @ $tmp{'-host'} : $tmp{'-port'} AS $tmp{'-user'} - $tmp{'-pass'}\n" ; } if($string ne $DNA_DBNAME ){ if (length($DNA_DBNAME) ne 0 ){ $db->dnadb($dnadb); }else{ warning("You haven't defined a DNA_DBNAME in Config/Databases.pm"); } } use strict ; return $db;
}
get_input_argdescriptionprevnextTop
sub get_input_arg {
   while (defined (my $line=<STDIN>)){ 
   chomp($line) ;
   if ( $line=~m/y/i){
      return 1 ;
   }elsif( $line =~m/n/i){
     return 0 ;
   }
   print "Wrong input - only answer 'y' or 'n'\n" ; 
  }
}
import_vardescriptionprevnextTop
sub import_var {
    my ($callpack) = caller(0); # Name of the calling package
# my $pack = shift; # Need to move package off @_
my $vars_to_import = shift ; # $vars_to_import = $pack unless $vars_to_import ;
# Get list of variables supplied, or else all
my @vars = @_ ? @_ : keys(%{$vars_to_import}); return unless @vars; # Predeclare global variables in calling package
eval "package $callpack; use vars qw(" . join(' ', map { '$'.$_ } @vars) . ")"; die $@ if $@; foreach (@vars) { if (defined ${$vars_to_import}{ $_ }) { no strict 'refs'; # Exporter does a similar job to the following
# statement, but for function names, not
# scalar variables:
*{"${callpack}::$_"} =\$ {$vars_to_import}{ $_ }; } else { die "Error: Config: $_ not known\n"; } } } 1;
}
merge_config_detailsdescriptionprevnextTop
sub merge_config_details {
  my ($self,  @config_hashes )= @_ ;

  my %result ;

  # loop through all hrefs which are passed as input 
foreach my $config_file ( @config_hashes ) { my %file = %$config_file ; foreach my $db_class ( keys %file ) { # process Exonerate2Genes.pm config (has section --> OUTDB)
if ( exists ${$file{$db_class}}{OUTDB} ) { if ( ref(${$file{$db_class}}{OUTDB}) !~m/HASH/) { # section in Exonerate2Genes is not a HREF which defines details for
# database-connection - it's a hash-key pointing to Databases.pm
my $href = get_database_connection_parameters_by_string(${$file{$db_class}}{OUTDB}) ; unless ( $href ) { print " $db_class parameters are not defined in Databases.pm - skipping\n"; next ; } else { #print "Used database : $$href{'-dbname'}\n" ;
$result{$db_class}{db} = $href ; } }else { if ( defined ${$file{$db_class}}{OUTDB} && length(${$file{$db_class}}{OUTDB}{'-dbname'}) > 0 ) { # don't process undefiend OUT-DB's and
# don't process defined OUT-DB's which have no name
$result{$db_class}{db} = ${$file{$db_class}}{OUTDB} ; }else { next ; } } } # process /Conf/Databases.pm
if (defined ( ${$file{$db_class}}{'-dbname'}) && length ( ${$file{$db_class}}{'-dbname'}) > 0 ) { # we process Databases.pm // parameteres for db-connection are ok
$result{$db_class}{db} =\% {$file{$db_class}} ; } elsif (defined ( ${$file{$db_class}}{'-dbname'}) && length ( ${$file{$db_class}}{'-dbname'}) == 0 ) { next ; } # add / process data from other configs in format TranscriptCoalescer.pm
# and attach data to main config hash
for my $key (keys %{$file{$db_class}}) { $result{$db_class}{$key} = $file{$db_class}{$key}; } } } return\% result ;
}
parse_configdescriptionprevnextTop
sub parse_config {
  my ($obj, $var_hash, $label) = @_;
  throw("Can't parse the ".$var_hash." hash for object ".$obj." if we are give no label") if(!$label);
  my $DEFAULT_ENTRY_KEY = 'DEFAULT';
  if(!$var_hash || ref($var_hash) ne 'HASH'){
    my $err = "Must pass read_and_check_config a hashref with the config ".
      "in ";
    $err .= " not a ".$var_hash if($var_hash);
    $err .= " Utilities::read_and_and_check_config";
    throw($err);
  }

  my %check;
  foreach my $k (keys %$var_hash) {
    my $uc_key = uc($k);
    if (exists $check{$uc_key}) {
      throw("You have two entries in your config with the same name (ignoring case)\n");
    }
    $check{$uc_key} = $k;
  }
  # replace entries in config has with lower case versions. 
foreach my $k (keys %check) { my $old_k = $check{$k}; my $entry = $var_hash->{$old_k}; delete $var_hash->{$old_k}; $var_hash->{$k} = $entry; } if (not exists($var_hash->{$DEFAULT_ENTRY_KEY})) { throw("You must define a $DEFAULT_ENTRY_KEY entry in your config"); } my $default_entry = $var_hash->{$DEFAULT_ENTRY_KEY}; # the following will fail if there are config variables that
# do not have a corresponding method here
foreach my $config_var (keys %{$default_entry}) { if ($obj->can($config_var)) { $obj->$config_var($default_entry->{$config_var}); } else { throw("no method defined in Utilities for config variable '$config_var'"); } } #########################################################
# read values of config variables for this logic name into
# instance variable, set by method
#########################################################
my $uc_logic = uc($label); if (exists $var_hash->{$uc_logic}) { # entry contains more specific values for the variables
my $entry = $var_hash->{$uc_logic}; foreach my $config_var (keys %{$entry}) { if ($obj->can($config_var)) { $obj->$config_var($entry->{$config_var}); } else { throw("no method defined in Utilities for config variable '$config_var'"); } } }else{ throw("Your logic_name ".$uc_logic." doesn't appear in your config file hash - using default settings\n". $var_hash); }
}
read_configdescriptionprevnextTop
sub read_config {
   my ($module_name , $aref ) = @_ ;

   (my $module_path = $module_name )=~s/::/\//g; 
require "$module_path.pm" ; # get the names of the variables
unless ($aref) { my ($config_href, $varname ) = @{package_stash("$module_name")}; map { $module_name->import($_) } keys %$config_href ; return $config_href; } # import only variables specified in $aref
no strict ; map { $module_name->import($_) } @$aref ; my %import ; map {$import{$_} = ${$_}} @$aref ; use strict ; return\% import;
}
shuffledescriptionprevnextTop
sub shuffle {
  my $tref = shift ;
  my $i = @$tref ;
  while ($i--) {
     my $j = int rand ($i+1);
     @$tref[$i,$j] = @$tref[$j,$i];
  }
  return $tref ;
}
write_seqfiledescriptionprevnextTop
sub write_seqfile {
  my ($seq, $filename, $format) = @_;
  $format = 'fasta' if(!$format);
  my @seqs;
  if(ref($seq) eq "ARRAY"){
    @seqs = @$seq;
    throw("Seqs need to be Bio::PrimarySeqI object not a ".$seqs[0])
      unless($seqs[0]->isa('Bio::PrimarySeqI'));
  }else{
    throw("Need a Bio::PrimarySeqI object not a ".$seq)
      if(!$seq || !$seq->isa('Bio::PrimarySeqI'));
    @seqs = ($seq);
  }
  $filename = create_file_name('seq', 'fa', '/tmp') 
    if(!$filename);
  my $seqout = Bio::SeqIO->new(
                               -file => ">".$filename,
                               -format => $format,
                              );
  foreach my $seq(@seqs){
    eval{
      $seqout->write_seq($seq);
    };
    if($@){
      throw("FAILED to write $seq to $filename SequenceUtils:write_seq_file $@");
    }
  }
  return $filename;
}
General documentation
CONTACTTop
please send any questions to ensembl-dev@ebi.ac.uk
write_seq_fileTop
  Arg [1]   : Bio::Seq
Arg [2] : string, filename
Function : This uses Bio::SeqIO to dump a sequence to a fasta file
Returntype: string, filename
Exceptions: throw if failed to write sequence
Example :