Bio::EnsEMBL::Analysis::Tools
Utilities
Toolbar
Summary
Bio::EnsEMBL::Analysis::Tools::Utilities
- base class which exports utility methods which don't take Bio::XX objects
Package variables
Globals (from "use vars" definitions)
@EXPORT
Included modules
Inherit
Exporter
Synopsis
use Bio::EnsEMBL::Analysis::Tools::Utilities qw(shuffle);
or
use Bio::EnsEMBL::Analysis::Tools:Utilities
to get all methods
Description
This is a class which exports Utility methods for genebuilding and
other gene manupulation purposes.
Methods
Methods description
Arg [1] : Bio::EnsEMBL::Analysis::Runnable Arg [2] : string, stem of filename Arg [3] : string, extension of filename Arg [4] : directory file should live in Function : create a filename containing the PID and a random number with the specified directory, stem and extension Returntype: string, filename Exceptions: throw if directory specifed doesnt exist Example : my $queryfile = $self->create_filename('seq', 'fa'); |
Arg [1] : String Function : Returns a Bio::EnsEMBL::Pipeline::DBSQL::DBAdaptor for a given string. Requires proper configuration of Bio::EnsEMBL::Analysis::Config::Databases Returntype: Hashref Exceptions: throw if string can't be found in Databases.pm |
Arg [1] : String Arg [2] : verbose-flag Arg [3] : return a pipeline db adaptor flag Function : Returns a Bio::EnsEMBL::DBSQL::DBAdaptor for a given string. or a Bio::EnsEMBL::Pipeline::DBSQL::DBAdaptor if requested Requires proper configuration of Bio::EnsEMBL::Analysis::Config::Databases Returntype: Bio::EnsEMBL:DBSQL::DBAdaptor or Bio::EnsEMBL::Pipeline::DBSQL::DBAdaptor Exceptions: throw if string can't be found in Databases.pm |
Function : waits for input from STDIN and returns '1' if input =~m/y/i and '0' if input matches /n/i. Returntype: 1 or 0 Exceptions: none |
Arg [0] : Array of Hashreferences Arg [1] : optional Array-ref Function : gets a hash-reference and adds them to the namespace of the module - they can be accesed in the package by using 'no strict;' Returntype: Hashref. Examples : import_var ($href) ; import_var(read_config("Bio::EnsEMBL::Analysis::Config::Databases")); |
Arg [0] : Array of Hashreferences Arg [1] : Bio::EnsEMBL::Analysis::Runnable Function : This func. merges the Configurations out differnt configuration-files into one Hash Returntype: Hashref. Exceptions: throws as this method should be implemented by any child Example : merge_database_configs ($DATABASES, $EXONERATE2GENES, $TRANSCRIPT_COALESCER) ; |
Arg [1] : String Arg [2] : optional Array-reference Function : reads a configuration file in the ensembl perl module format in runtime and, accesses the variables ( either only the variables listend in $aref or all variables ) and returns a hash-reference to this. Returntype: Exceptions: Requires : use Bio::EnsEMBL::Analysis::Tools::Stashes qw( package_stash ) ; |
Arg [1] : Reference to Array Function : randomizes the order of an array Returntype: arrayref Exceptions: none Example : |
Methods code
sub create_file_name
{ my ($stem, $ext, $dir) = @_;
if(!$dir){
$dir = '/tmp';
}
$stem = '' if(!$stem);
$ext = '' if(!$ext);
throw($dir." doesn't exist SequenceUtils::create_filename")
unless(-d $dir);
my $num = int(rand(100000));
my $file = $dir."/".$stem.".".$$.".".$num.".".$ext;
while(-e $file){
$num = int(rand(100000));
$file = $dir."/".$stem.".".$$.".".$num.".".$ext;
}
return $file; } |
sub get_database_connection_parameters_by_string
{ my ($string) = @_ ;
require "Bio/EnsEMBL/Analysis/Config/Databases.pm" ;
no strict ;
Bio::EnsEMBL::Analysis::Config::Databases->import("DATABASES");
Bio::EnsEMBL::Analysis::Config::Databases->import("DNA_DBNAME");
unless ( ${$DATABASES}{$string} ) {
print "WARNING : Database parameters undefined - skipping\n " ;
return undef ;
}
if ( length(${$DATABASES}{$string}{'-dbname'}) == 0 ) {
print "You haven't defined a database-name in the Databases.pm config-file for $string\n" ;
return undef ;
}
return ${$DATABASES}{$string} ; } |
sub get_db_adaptor_by_string
{ my ($string, $verbose, $use_pipeline_adaptor) = @_ ;
require "Bio/EnsEMBL/Analysis/Config/Databases.pm" ;
no strict ;
Bio::EnsEMBL::Analysis::Config::Databases->import("DATABASES");
Bio::EnsEMBL::Analysis::Config::Databases->import("DNA_DBNAME");
unless ( ${$DATABASES}{$string} ) {
print "WARNING : Database parameters undefined for - skipping\n " ;
return undef ;
}
if ( length(${$DATABASES}{$string}{'-dbname'}) == 0 ) {
print "WARNING : You haven't defined a database-name in the Databases.pm config-file for $string\n" ;
return undef ;
}
my $db;
my $dnadb;
if ( $use_pipeline_adaptor ) {
$db = new Bio::EnsEMBL::Pipeline::DBSQL::DBAdaptor( %{ ${$DATABASES}{$string} } ) ;
$dnadb = new Bio::EnsEMBL::Pipeline::DBSQL::DBAdaptor( %{ ${$DATABASES}{$DNA_DBNAME} } ) ;
} else {
$db = new Bio::EnsEMBL::DBSQL::DBAdaptor( %{ ${$DATABASES}{$string} } ) ;
$dnadb = new Bio::EnsEMBL::DBSQL::DBAdaptor( %{ ${$DATABASES}{$DNA_DBNAME} } ) ;
}
if ( $verbose ) {
my %tmp = %{${$DATABASES}{$string}} ;
print STDERR "Database : $tmp{'-dbname'} @ $tmp{'-host'} : $tmp{'-port'} AS $tmp{'-user'} - $tmp{'-pass'}\n" ;
}
if($string ne $DNA_DBNAME ){
if (length($DNA_DBNAME) ne 0 ){
$db->dnadb($dnadb);
}else{
warning("You haven't defined a DNA_DBNAME in Config/Databases.pm");
}
}
use strict ;
return $db; } |
sub get_input_arg
{ while (defined (my $line=<STDIN>)){
chomp($line) ;
if ( $line=~m/y/i){
return 1 ;
}elsif( $line =~m/n/i){
return 0 ;
}
print "Wrong input - only answer 'y' or 'n'\n" ;
} } |
sub import_var
{ my ($callpack) = caller(0); my $vars_to_import = shift ;
my @vars = @_ ? @_ : keys(%{$vars_to_import});
return unless @vars;
eval "package $callpack; use vars qw("
. join(' ', map { '$'.$_ } @vars) . ")";
die $@ if $@;
foreach (@vars) {
if (defined ${$vars_to_import}{ $_ }) {
no strict 'refs';
*{"${callpack}::$_"} =\$ {$vars_to_import}{ $_ };
} else {
die "Error: Config: $_ not known\n";
}
}
}
1; } |
sub merge_config_details
{ my ($self, @config_hashes )= @_ ;
my %result ;
foreach my $config_file ( @config_hashes ) {
my %file = %$config_file ;
foreach my $db_class ( keys %file ) {
if ( exists ${$file{$db_class}}{OUTDB} ) {
if ( ref(${$file{$db_class}}{OUTDB}) !~m/HASH/) {
my $href = get_database_connection_parameters_by_string(${$file{$db_class}}{OUTDB}) ;
unless ( $href ) {
print " $db_class parameters are not defined in Databases.pm - skipping\n";
next ;
} else {
$result{$db_class}{db} = $href ;
}
}else {
if ( defined ${$file{$db_class}}{OUTDB}
&& length(${$file{$db_class}}{OUTDB}{'-dbname'}) > 0 ) {
$result{$db_class}{db} = ${$file{$db_class}}{OUTDB} ;
}else {
next ;
}
}
}
if (defined ( ${$file{$db_class}}{'-dbname'}) && length ( ${$file{$db_class}}{'-dbname'}) > 0 ) {
$result{$db_class}{db} =\% {$file{$db_class}} ;
} elsif (defined ( ${$file{$db_class}}{'-dbname'}) && length ( ${$file{$db_class}}{'-dbname'}) == 0 ) {
next ;
}
for my $key (keys %{$file{$db_class}}) {
$result{$db_class}{$key} = $file{$db_class}{$key};
}
}
}
return\% result ; } |
sub parse_config
{ my ($obj, $var_hash, $label) = @_;
throw("Can't parse the ".$var_hash." hash for object ".$obj." if we are give no label") if(!$label);
my $DEFAULT_ENTRY_KEY = 'DEFAULT';
if(!$var_hash || ref($var_hash) ne 'HASH'){
my $err = "Must pass read_and_check_config a hashref with the config ".
"in ";
$err .= " not a ".$var_hash if($var_hash);
$err .= " Utilities::read_and_and_check_config";
throw($err);
}
my %check;
foreach my $k (keys %$var_hash) {
my $uc_key = uc($k);
if (exists $check{$uc_key}) {
throw("You have two entries in your config with the same name (ignoring case)\n");
}
$check{$uc_key} = $k;
}
foreach my $k (keys %check) {
my $old_k = $check{$k};
my $entry = $var_hash->{$old_k};
delete $var_hash->{$old_k};
$var_hash->{$k} = $entry;
}
if (not exists($var_hash->{$DEFAULT_ENTRY_KEY})) {
throw("You must define a $DEFAULT_ENTRY_KEY entry in your config");
}
my $default_entry = $var_hash->{$DEFAULT_ENTRY_KEY};
foreach my $config_var (keys %{$default_entry}) {
if ($obj->can($config_var)) {
$obj->$config_var($default_entry->{$config_var});
} else {
throw("no method defined in Utilities for config variable '$config_var'");
}
}
my $uc_logic = uc($label);
if (exists $var_hash->{$uc_logic}) {
my $entry = $var_hash->{$uc_logic};
foreach my $config_var (keys %{$entry}) {
if ($obj->can($config_var)) {
$obj->$config_var($entry->{$config_var});
} else {
throw("no method defined in Utilities for config variable '$config_var'");
}
}
}else{
throw("Your logic_name ".$uc_logic." doesn't appear in your config file hash - using default settings\n".
$var_hash);
} } |
sub read_config
{ my ($module_name , $aref ) = @_ ;
(my $module_path = $module_name )=~s/::/\//g; require "$module_path.pm" ;
unless ($aref) {
my ($config_href, $varname ) = @{package_stash("$module_name")};
map { $module_name->import($_) } keys %$config_href ;
return $config_href;
}
no strict ;
map { $module_name->import($_) } @$aref ;
my %import ;
map {$import{$_} = ${$_}} @$aref ;
use strict ;
return\% import; } |
sub shuffle
{ my $tref = shift ;
my $i = @$tref ;
while ($i--) {
my $j = int rand ($i+1);
@$tref[$i,$j] = @$tref[$j,$i];
}
return $tref ; } |
write_seqfile | description | prev | next | Top |
sub write_seqfile
{ my ($seq, $filename, $format) = @_;
$format = 'fasta' if(!$format);
my @seqs;
if(ref($seq) eq "ARRAY"){
@seqs = @$seq;
throw("Seqs need to be Bio::PrimarySeqI object not a ".$seqs[0])
unless($seqs[0]->isa('Bio::PrimarySeqI'));
}else{
throw("Need a Bio::PrimarySeqI object not a ".$seq)
if(!$seq || !$seq->isa('Bio::PrimarySeqI'));
@seqs = ($seq);
}
$filename = create_file_name('seq', 'fa', '/tmp')
if(!$filename);
my $seqout = Bio::SeqIO->new(
-file => ">".$filename,
-format => $format,
);
foreach my $seq(@seqs){
eval{
$seqout->write_seq($seq);
};
if($@){
throw("FAILED to write $seq to $filename SequenceUtils:write_seq_file $@");
}
}
return $filename; } |
General documentation
Arg [1] : Bio::Seq
Arg [2] : string, filename
Function : This uses Bio::SeqIO to dump a sequence to a fasta file
Returntype: string, filename
Exceptions: throw if failed to write sequence
Example :