Raw content of Bio::EnsEMBL::Analysis::RunnableDB::FilterGenes
=pod
=head1 NAME
Bio::EnsEMBL::Analysis::RunnableDB::FilterGenes;
=head1 SYNOPSIS
my $filtergenes = Bio::EnsEMBL::Analysis::RunnableDB::Exonerate2Genes->new(
-db => $refdb,
-analysis => $analysis_obj,
-input_id => $slice_name
);
$filtergenes->fetch_input();
$filtergenes->run();
$filtergenes->output();
$filtergenes->write_output(); #writes to DB
=head1 DESCRIPTION
This object wraps a genefilter object as defined in its configuration file
The filter object must implement a method called filter_genes which expects
an arrayref of genes as its arguments and returns 2 arrayref of genes, the
first the accepted gene objects, the 2nd the rejected gene objects
These genes are then relabelled as described in the Config file and stored in
the output database
=head1 CONTACT
Post general queries to B
=head1 APPENDIX
The rest of the documentation details each of the object methods.
Internal methods are usually preceded with a '_'
=cut
package Bio::EnsEMBL::Analysis::RunnableDB::FilterGenes;
use strict;
use Bio::EnsEMBL::Utils::Exception qw(throw warning);
use Bio::EnsEMBL::Analysis::RunnableDB::BaseGeneBuild;
use Bio::EnsEMBL::Analysis::Tools::Logger qw(logger_info);
use Bio::EnsEMBL::Analysis::Tools::GeneBuildUtils qw(coord_string id);
use Bio::EnsEMBL::Analysis::Tools::GeneBuildUtils::GeneUtils
qw(empty_Gene);
use Bio::EnsEMBL::Analysis::Config::GeneBuild::FilterGenes
qw(FILTER_CONFIG_BY_LOGIC);
use vars qw(@ISA);
@ISA = qw (Bio::EnsEMBL::Analysis::RunnableDB::BaseGeneBuild);
=head2 new
Arg [1] : Bio::EnsEMBL::Analysis::RunnableDB::FilterGenes
Function : This simply checks the configuration is correct
Returntype: Bio::EnsEMBL::Analysis::RunnableDB::FilterGenes
Exceptions:
Example :
=cut
sub new{
my ($class,@args) = @_;
my $self = $class->SUPER::new(@args);
$self->read_and_check_config($FILTER_CONFIG_BY_LOGIC);
return $self;
}
=head2 fetch_input
Arg [1] : Bio::EnsEMBL::Analysis::RunnableDB::FilterGenes
Function : fetch genes and sequence for the analysis
Returntype: n/a
Exceptions:
Example :
=cut
sub fetch_input{
my ($self) = @_;
$self->fetch_genes;
my $slice = $self->fetch_sequence($self->input_id, $self->db);
$self->query($slice);
}
=head2 run
Arg [1] : Bio::EnsEMBL::Analysis::RunnableDB::FilterGenes
Function : run the defined filter object storing the output
in the object
Returntype: n/a
Exceptions:
Example :
=cut
sub run{
my ($self) = @_;
my $filter_object = $self->filter_object;
my $genes = $self->genes;
my ($kept, $removed) = $filter_object->filter_genes($genes);
$self->output($kept);
$self->removed($removed);
}
=head2 write_output
Arg [1] : Bio::EnsEMBL::Analysis::RunnableDB::FilterGenes
Function : store the output and filtered out genes in the
database, relabelling the biotype as appropriate
Returntype: n/a
Exceptions: warns if fails to store a gene, throws if all stores
fail
Example :
=cut
sub write_output{
my ($self) = @_;
my $gene_adaptor = $self->get_dbadaptor($self->OUTPUT_DB)->get_GeneAdaptor;
my $store_count = 0;
my $total = @{$self->output};
logger_info("Storing ".@{$self->output}." genes in ".$self->OUTPUT_DB);
foreach my $output(@{$self->output}){
$output->biotype($self->RELABEL_KEPT) if($self->RELABEL_KEPT);
empty_Gene($output, $self->REMOVE_STABLE_IDS, $self->REMOVE_XREFS);
eval{
$gene_adaptor->store($output);
};
if($@){
warning("Failed to store gene ".$@." FilterGenes:write_output");
}else{
$store_count++;
}
}
if($self->RELABEL_REMOVED){
$total += @{$self->removed};
foreach my $removed(@{$self->removed}){
$removed->biotype($self->RELABEL_REMOVED);
empty_Gene($removed, 1, 1);
eval{
$gene_adaptor->store($removed);
};
if($@){
warning("Failed to store relabled removed gene ".$@.
" FilterGenes:write_output");
}else{
$store_count++;
}
}
}
if($store_count == 0){
throw("Failed to store any genes in FilterGenes:write_output");
}
}
=head2 fetch_genes
Arg [1] : Bio::EnsEMBL::Analysis::RunnableDB::FilterGenes
Function : gets the genes on the basis of the GENE_SET hash in the
configuration
Returntype: arrayref of Bio::EnsEMBL::Gene
Exceptions: n/a
Example :
=cut
sub fetch_genes{
my ($self) = @_;
my $geneset_hash = $self->GENE_SET;
my %slice_hash;
my @to_filter;
foreach my $key(keys(%$geneset_hash)){
my $hash = $geneset_hash->{$key};
my $dbname = $hash->{dbname};
my $biotype_list = $hash->{biotypes};
my %biotype_hash;
foreach my $biotype (@$biotype_list){
$biotype_hash{$biotype} = 1;
}
logger_info("Fetching ".$key." geneset from ".$dbname);
if(!$slice_hash{$dbname}){
my $db = $self->get_dbadaptor($dbname);
my $slice = $self->fetch_sequence($self->input_id, $db);
$slice_hash{$dbname} = $slice;
}
my $slice = $slice_hash{$dbname};
my $genes = $slice->get_all_Genes;
GENE:foreach my $gene(@$genes){
next GENE if(keys(%biotype_hash) && !$biotype_hash{$gene->biotype});
push(@to_filter,$gene);
}
}
$self->genes(\@to_filter);
return \@to_filter;
}
=head2 genes
Arg [1] : Bio::EnsEMBL::Analysis::RunnableDB::FilterGenes
Arg [2] : arrayref of Bio::EnsEMBL::Gene
Function : stores the arrayref
Returntype: arrayref of Bio::EnsEMBL::Gene
Exceptions:
Example :
=cut
sub genes{
my ($self, $arg) = @_;
if($arg){
$self->{genes} = $arg;
}
return $self->{genes};
}
=head2 removed
Arg [1] : Bio::EnsEMBL::Analysis::RunnableDB::FilterGenes
Arg [2] : arrayref of Bio::EnsEMBL::Gene
Function : stores the arrayref
Returntype: arrayref of Bio::EnsEMBL::Gene
Exceptions:
Example :
=cut
sub removed{
my ($self, $arg) = @_;
if($arg){
$self->{removed} = $arg;
}
return $self->{removed};
}
=head2 filter_object
Arg [1] : Bio::EnsEMBL::Analysis::RunnableDB::FilterGenes
Arg [2] : a filter object with the method filter_genes
Function : returns said filter object which is created on the
basis of the config is one doesn't already exist
Returntype: filter object
Exceptions: throws if object can't carry out the needed method
Example :
=cut
sub filter_object{
my ($self, $arg) = @_;
if($arg){
$self->{filter_object} = $arg;
}
if(!$self->{filter_object}){
$self->require_module($self->FILTER_OBJECT);
my %params = %{$self->FILTER_PARAMS};
my $filter = $self->FILTER_OBJECT->new(
%params
);
$self->{filter_object} = $filter;
}
throw($self->{filter_object}." must have a method called filter_genes")
unless(!$self->{filter_object} ||
$self->{filter_object}->can("filter_genes"));
return $self->{filter_object};
}
=head2 read_and_check_config
Arg [1] : Bio::EnsEMBL::Analysis::RunnableDB::FilterGenes
Function : calls the superclass method then does some
sanity checking of the config
Returntype: n/a
Exceptions: throws if there are problems with the config
Example :
=cut
sub read_and_check_config {
my $self = shift;
$self->SUPER::read_and_check_config($FILTER_CONFIG_BY_LOGIC);
my $logic = $self->analysis->logic_name;
##########
# CHECKS
##########
foreach my $config_var (qw(GENE_SET
OUTPUT_DB
FILTER_OBJECT)) {
throw("You must define $config_var in config for logic '$logic'")
if not defined $self->$config_var;
}
my $gene_hash = $self->GENE_SET;
my $count = 0;
foreach my $label(keys(%$gene_hash)){
$count++;
my $values_hash = $gene_hash->{$label};
if(!$values_hash->{dbname} || !$values_hash->{biotypes}){
throw("The GENE_SET hash must be a hash of hashes, each individual ".
"hash cotaining 2 key dbname pointing to the database desired ".
"from Databases.pm and biotypes pointing to an array ref of ".
"biotypes you want fetched from said databases ".$label." hash ".
"is missing one or both of these");
}
}
if(!$count){
throw("There GENE_SET hash must contain keys pointing to other hashes".
" This hash ".$gene_hash." doesn't");
}
}
=head2 CONFIG methods
Arg [1] : Bio::EnsEMBL::Analysis::RunnableDB::FilterGenes
Arg [2] : Here are a variety of things, it is mostly strings but there are
some hash and arrayrefs
Function : just to store the defined config variable
Returntype: what ever is passed in in Arg[2]
Exceptions: n/a
Example :
=cut
sub GENE_SET{
my ($self, $arg) = @_;
if($arg){
$self->{GENE_SET} = $arg;
}
return $self->{GENE_SET};
}
sub OUTPUT_DB{
my ($self, $arg) = @_;
if($arg){
$self->{OUTPUT_DB} = $arg;
}
return $self->{OUTPUT_DB};
}
sub RELABEL_REMOVED{
my ($self, $arg) = @_;
if($arg){
$self->{RELABEL_REMOVED} = $arg;
}
return $self->{RELABEL_REMOVED};
}
sub RELABEL_KEPT{
my ($self, $arg) = @_;
if($arg){
$self->{RELABEL_KEPT} = $arg;
}
return $self->{RELABEL_KEPT};
}
sub FILTER_OBJECT{
my ($self, $arg) = @_;
if($arg){
$self->{FILTER_OBJECT} = $arg;
}
return $self->{FILTER_OBJECT};
}
sub FILTER_PARAMS{
my ($self, $arg) = @_;
if($arg){
$self->{FILTER_PARAMS} = $arg;
}
return $self->{FILTER_PARAMS};
}
sub REMOVE_STABLE_IDS{
my ($self, $arg) = @_;
if(defined $arg){
$self->{REMOVE_STABLE_IDS} = $arg;
}
return $self->{REMOVE_STABLE_IDS};
}
sub REMOVE_XREFS{
my ($self, $arg) = @_;
if(defined $arg){
$self->{REMOVE_XREFS} = $arg;
}
return $self->{REMOVE_XREFS};
}