Raw content of Bio::EnsEMBL::Analysis::Config::Blast
# Ensembl module for Bio::EnsEMBL::Analysis::Config::Blast
#
# Copyright (c) 2004 Ensembl
#
=head1 NAME
Bio::EnsEMBL::Analysis::Config::Blast
=head1 SYNOPSIS
use Bio::EnsEMBL::Analysis::Config::Blast;
use Bio::EnsEMBL::Analysis::Config::Blast qw(BLAST_CONTIG);
=head1 DESCRIPTION
This is a module needed to provide configuration for the
blast RunnableDBs. This informs the various blast runnabledbs what
Parser and Filter objects they should instantiate and also of any
constructor arguments which should go to the Blast Runnable note any
Blast constructor arguments will be overridden by the same key in
analysis table parameters column
BLAST_CONFIG is an array of hashes which contains analysis specific
settings and is keyed on logic_name
Important values are logic_name which should be the same as the
equivalent column in the analysis table.
and blast parser which should be a perl path to a object to parser the
blast report.
the blast_parser object should fit the standard interface which is to
have a method called parse_file which accepts a filename as an argument and
returns a set of results all other constructor arguements are optional but
the two parser objects which currently exist both need a regex, query type
and database type the types needed for different flavours of blast
can be found below and the two parser objects BPliteWrapper and
FilterBPlite both live in Bio/EnsEMBL/Analysis/Tools
blast_filter should be a perl path to a filter object The filter module
isn't obligatory, if none is specified then the blast results won't be
filters after parsing (note some parsers may do filtering) The only filter
object which currently exists is
Bio::EnsEMBL::Analysis::Tools::FeatureFilter. All filter objects must
have a filter_results method but aside from that there is no other
requirements. Any constructor args should be specified in the hash.
FeatureFilter can take min_score, max_pvalue coverage prune and hardprune
blast_params is any constructor parameters for which ever blast module
you are using. sequence ID format sometimes changes in the Uniprot, Unigene
etc databases used in BLAST, so modify the regex if necessary. note all
constructor parameters will be overridden by values got from the
parameters column of the analysis table.
AB_INITIO_LOGICNAME if for BlastGenscanPep/DNA runnabledbs for which
ab initio predictions to use when running the blast and they currently
default to Genscan
this example file contains a default setting which is what we use for out
BlastGenscanPep against Swall
=head1 CONTACT
Post questions to the Ensembl development list: ensembl-dev@ebi.ac.uk
=cut
package Bio::EnsEMBL::Analysis::Config::Blast;
use strict;
use vars qw(%Config);
# Analysis Query type Database typeRSER_PARAMS-table for Blast-Configuration (below):
#
# Analysis query_type database_type
#-------------------------------------
# blastp pep pep ("standard" Uniprot-run using module BlastGenscanPep.pm)
# blastn dna dna
# blastx dna pep (i.e.Uniprot-wublastx with module Blast.pm)
# tblastn pep dna
# tblastx dna dna
#
#
# ######## Example-configuration for a 'standard' Uniprot run which uses
# ######## the program wublastp and the module BlastGenscanPep.pm
#
# Uniprot =>
# {
# BLAST_PARSER => 'Bio::EnsEMBL::Analysis::Tools::FilterBPlite',
# PARSER_PARAMS => {
# -regex => '(^\w+\W\d+)', # Modify if seq ID format has changed in Uniprot DB
# -query_type => 'pep', # see PARSER_PARAMS-table
# -database_type => 'pep', # see PARSER_PARAMS-table
# -threshold_type => 'PVALUE',
# -threshold => 0.01,
# },
# BLAST_FILTER => 'Bio::EnsEMBL::Analysis::Tools::FeatureFilter',
# FILTER_PARAMS => { },
# BLAST_PARAMS => {
# -unknown_error_string => 'FAILED',
# -type => 'wu',
# },
%Config = (
BLAST_CONFIG => {
DEFAULT => {
BLAST_PARSER => 'Bio::EnsEMBL::Analysis::Tools::BPliteWrapper',
PARSER_PARAMS => {
-regex => '^(\w+)',
-query_type => undef,
-database_type => undef,
},
BLAST_FILTER => undef,
FILTER_PARAMS => {},
BLAST_PARAMS => {
-unknown_error_string => 'FAILED',
-type => 'wu',
},
},
Uniprot => {
BLAST_PARSER => 'Bio::EnsEMBL::Analysis::Tools::FilterBPlite',
PARSER_PARAMS => {
-regex => '^(\w+\W\d+)',
-query_type => 'pep',
-database_type => 'pep',
-threshold_type => 'PVALUE',
-threshold => 0.01,
},
BLAST_FILTER => 'Bio::EnsEMBL::Analysis::Tools::FeatureFilter',
FILTER_PARAMS => {
-min_score => 200,
-prune => 1,
},
BLAST_PARAMS => {
-unknown_error_string => 'FAILED',
-type => 'wu',
},
},
Vertrna => {
BLAST_PARSER => 'Bio::EnsEMBL::Analysis::Tools::FilterBPlite',
PARSER_PARAMS => {
-regex => '^(\w+\W\d+)',
-query_type => 'pep',
-database_type => 'dna',
-threshold_type => 'PVALUE',
-threshold => 0.001,
},
BLAST_FILTER => 'Bio::EnsEMBL::Analysis::Tools::FeatureFilter',
FILTER_PARAMS => {
-prune => 1,
},
BLAST_PARAMS => {
-unknown_error_string => 'FAILED',
-type => 'wu',
},
},
Unigene => {
BLAST_PARSER => 'Bio::EnsEMBL::Analysis::Tools::FilterBPlite',
PARSER_PARAMS => {
-regex => '\/ug\=([\w\.]+)',
-query_type => 'pep',
-database_type => 'dna',
-threshold_type => 'PVALUE',
-threshold => 0.001,
},
BLAST_FILTER => 'Bio::EnsEMBL::Analysis::Tools::FeatureFilter',
FILTER_PARAMS => {
-prune => 1,
},
BLAST_PARAMS => {
-unknown_error_string => 'FAILED',
-type => 'wu',
},
},
}, # End of BLAST config
BLAST_AB_INITIO_LOGICNAME => 'Genscan',
); # End of %config
sub import {
my ($callpack) = caller(0); # Name of the calling package
my $pack = shift; # Need to move package off @_
# Get list of variables supplied, or else all
my @vars = @_ ? @_ : keys(%Config);
return unless @vars;
# Predeclare global variables in calling package
eval "package $callpack; use vars qw("
. join(' ', map { '$'.$_ } @vars) . ")";
die $@ if $@;
foreach (@vars) {
if (defined $Config{ $_ }) {
no strict 'refs';
# Exporter does a similar job to the following
# statement, but for function names, not
# scalar variables:
*{"${callpack}::$_"} = \$Config{ $_ };
} else {
die "Error: Config: $_ not known\n";
}
}
}
1;