Bio::EnsEMBL::Analysis::RunnableDB
FilterGenes
Toolbar
Summary
Bio::EnsEMBL::Analysis::RunnableDB::FilterGenes;
Package variables
No package variables defined.
Included modules
Bio::EnsEMBL::Analysis::Config::GeneBuild::FilterGenes qw ( FILTER_CONFIG_BY_LOGIC )
Inherit
Synopsis
my $filtergenes = Bio::EnsEMBL::Analysis::RunnableDB::Exonerate2Genes->new(
-db => $refdb,
-analysis => $analysis_obj,
-input_id => $slice_name
);
$filtergenes->fetch_input();
$filtergenes->run();
$filtergenes->output();
$filtergenes->write_output(); #writes to DB
Description
This object wraps a genefilter object as defined in its configuration file
The filter object must implement a method called filter_genes which expects
an arrayref of genes as its arguments and returns 2 arrayref of genes, the
first the accepted gene objects, the 2nd the rejected gene objects
These genes are then relabelled as described in the Config file and stored in
the output database
Methods
Methods description
Arg [1] : Bio::EnsEMBL::Analysis::RunnableDB::FilterGenes Function : gets the genes on the basis of the GENE_SET hash in the configuration Returntype: arrayref of Bio::EnsEMBL::Gene Exceptions: n/a Example : |
Arg [1] : Bio::EnsEMBL::Analysis::RunnableDB::FilterGenes Function : fetch genes and sequence for the analysis Returntype: n/a Exceptions: Example : |
Arg [1] : Bio::EnsEMBL::Analysis::RunnableDB::FilterGenes Arg [2] : a filter object with the method filter_genes Function : returns said filter object which is created on the basis of the config is one doesn't already exist Returntype: filter object Exceptions: throws if object can't carry out the needed method Example : |
Arg [1] : Bio::EnsEMBL::Analysis::RunnableDB::FilterGenes Arg [2] : arrayref of Bio::EnsEMBL::Gene Function : stores the arrayref Returntype: arrayref of Bio::EnsEMBL::Gene Exceptions: Example : |
Arg [1] : Bio::EnsEMBL::Analysis::RunnableDB::FilterGenes Function : This simply checks the configuration is correct Returntype: Bio::EnsEMBL::Analysis::RunnableDB::FilterGenes Exceptions: Example : |
Arg [1] : Bio::EnsEMBL::Analysis::RunnableDB::FilterGenes Function : calls the superclass method then does some sanity checking of the config Returntype: n/a Exceptions: throws if there are problems with the config Example : |
Arg [1] : Bio::EnsEMBL::Analysis::RunnableDB::FilterGenes Arg [2] : arrayref of Bio::EnsEMBL::Gene Function : stores the arrayref Returntype: arrayref of Bio::EnsEMBL::Gene Exceptions: Example : |
Arg [1] : Bio::EnsEMBL::Analysis::RunnableDB::FilterGenes Function : run the defined filter object storing the output in the object Returntype: n/a Exceptions: Example : |
Arg [1] : Bio::EnsEMBL::Analysis::RunnableDB::FilterGenes Function : store the output and filtered out genes in the database, relabelling the biotype as appropriate Returntype: n/a Exceptions: warns if fails to store a gene, throws if all stores fail Example : |
Methods code
FILTER_OBJECT | description | prev | next | Top |
sub FILTER_OBJECT
{ my ($self, $arg) = @_;
if($arg){
$self->{FILTER_OBJECT} = $arg;
}
return $self->{FILTER_OBJECT}; } |
sub FILTER_PARAMS
{ my ($self, $arg) = @_;
if($arg){
$self->{FILTER_PARAMS} = $arg;
}
return $self->{FILTER_PARAMS}; } |
sub GENE_SET
{ my ($self, $arg) = @_;
if($arg){
$self->{GENE_SET} = $arg;
}
return $self->{GENE_SET}; } |
sub OUTPUT_DB
{ my ($self, $arg) = @_;
if($arg){
$self->{OUTPUT_DB} = $arg;
}
return $self->{OUTPUT_DB}; } |
sub RELABEL_KEPT
{ my ($self, $arg) = @_;
if($arg){
$self->{RELABEL_KEPT} = $arg;
}
return $self->{RELABEL_KEPT}; } |
sub RELABEL_REMOVED
{ my ($self, $arg) = @_;
if($arg){
$self->{RELABEL_REMOVED} = $arg;
}
return $self->{RELABEL_REMOVED}; } |
sub REMOVE_STABLE_IDS
{ my ($self, $arg) = @_;
if(defined $arg){
$self->{REMOVE_STABLE_IDS} = $arg;
}
return $self->{REMOVE_STABLE_IDS}; } |
sub REMOVE_XREFS
{ my ($self, $arg) = @_;
if(defined $arg){
$self->{REMOVE_XREFS} = $arg;
}
return $self->{REMOVE_XREFS}; } |
sub fetch_genes
{ my ($self) = @_;
my $geneset_hash = $self->GENE_SET;
my %slice_hash;
my @to_filter;
foreach my $key(keys(%$geneset_hash)){
my $hash = $geneset_hash->{$key};
my $dbname = $hash->{dbname};
my $biotype_list = $hash->{biotypes};
my %biotype_hash;
foreach my $biotype (@$biotype_list){
$biotype_hash{$biotype} = 1;
}
logger_info("Fetching ".$key." geneset from ".$dbname);
if(!$slice_hash{$dbname}){
my $db = $self->get_dbadaptor($dbname);
my $slice = $self->fetch_sequence($self->input_id, $db);
$slice_hash{$dbname} = $slice;
}
my $slice = $slice_hash{$dbname};
my $genes = $slice->get_all_Genes;
GENE:foreach my $gene(@$genes){
next GENE if(keys(%biotype_hash) && !$biotype_hash{$gene->biotype});
push(@to_filter,$gene);
}
}
$self->genes(\@to_filter);
return\@ to_filter; } |
sub fetch_input
{ my ($self) = @_;
$self->fetch_genes;
my $slice = $self->fetch_sequence($self->input_id, $self->db);
$self->query($slice); } |
sub filter_object
{ my ($self, $arg) = @_;
if($arg){
$self->{filter_object} = $arg;
}
if(!$self->{filter_object}){
$self->require_module($self->FILTER_OBJECT);
my %params = %{$self->FILTER_PARAMS};
my $filter = $self->FILTER_OBJECT->new(
%params
);
$self->{filter_object} = $filter;
}
throw($self->{filter_object}." must have a method called filter_genes")
unless(!$self->{filter_object} ||
$self->{filter_object}->can("filter_genes"));
return $self->{filter_object}; } |
sub genes
{ my ($self, $arg) = @_;
if($arg){
$self->{genes} = $arg;
}
return $self->{genes}; } |
sub new
{ my ($class,@args) = @_;
my $self = $class->SUPER::new(@args);
$self->read_and_check_config($FILTER_CONFIG_BY_LOGIC);
return $self; } |
sub read_and_check_config
{ my $self = shift;
$self->SUPER::read_and_check_config($FILTER_CONFIG_BY_LOGIC);
my $logic = $self->analysis->logic_name;
foreach my $config_var (qw(GENE_SET
OUTPUT_DB
FILTER_OBJECT)) {
throw("You must define $config_var in config for logic '$logic'")
if not defined $self->$config_var;
}
my $gene_hash = $self->GENE_SET;
my $count = 0;
foreach my $label(keys(%$gene_hash)){
$count++;
my $values_hash = $gene_hash->{$label};
if(!$values_hash->{dbname} || !$values_hash->{biotypes}){
throw("The GENE_SET hash must be a hash of hashes, each individual ".
"hash cotaining 2 key dbname pointing to the database desired ".
"from Databases.pm and biotypes pointing to an array ref of ".
"biotypes you want fetched from said databases ".$label." hash ".
"is missing one or both of these");
}
}
if(!$count){
throw("There GENE_SET hash must contain keys pointing to other hashes".
" This hash ".$gene_hash." doesn't");
} } |
sub removed
{ my ($self, $arg) = @_;
if($arg){
$self->{removed} = $arg;
}
return $self->{removed}; } |
sub run
{ my ($self) = @_;
my $filter_object = $self->filter_object;
my $genes = $self->genes;
my ($kept, $removed) = $filter_object->filter_genes($genes);
$self->output($kept);
$self->removed($removed); } |
sub write_output
{ my ($self) = @_;
my $gene_adaptor = $self->get_dbadaptor($self->OUTPUT_DB)->get_GeneAdaptor;
my $store_count = 0;
my $total = @{$self->output};
logger_info("Storing ".@{$self->output}." genes in ".$self->OUTPUT_DB);
foreach my $output(@{$self->output}){
$output->biotype($self->RELABEL_KEPT) if($self->RELABEL_KEPT);
empty_Gene($output, $self->REMOVE_STABLE_IDS, $self->REMOVE_XREFS);
eval{
$gene_adaptor->store($output);
};
if($@){
warning("Failed to store gene ".$@." FilterGenes:write_output");
}else{
$store_count++;
}
}
if($self->RELABEL_REMOVED){
$total += @{$self->removed};
foreach my $removed(@{$self->removed}){
$removed->biotype($self->RELABEL_REMOVED);
empty_Gene($removed, 1, 1);
eval{
$gene_adaptor->store($removed);
};
if($@){
warning("Failed to store relabled removed gene ".$@.
" FilterGenes:write_output");
}else{
$store_count++;
}
}
}
if($store_count == 0){
throw("Failed to store any genes in FilterGenes:write_output");
} } |
General documentation
Post general queries to ensembl-dev@ebi.ac.uk
The rest of the documentation details each of the object methods.
Internal methods are usually preceded with a '_'
Arg [1] : Bio::EnsEMBL::Analysis::RunnableDB::FilterGenes
Arg [2] : Here are a variety of things, it is mostly strings but there are
some hash and arrayrefs
Function : just to store the defined config variable
Returntype: what ever is passed in in Arg[2]
Exceptions: n/a
Example :