Raw content of Bio::EnsEMBL::Analysis::RunnableDB::miRNA
## Ensembl module for Bio::EnsEMBL::Analysis::RunnableDB::miRNA
#
# Copyright (c) 2004 Ensembl
#
=head1 NAME
Bio::EnsEMBL::Analysis::RunnableDB::miRNA
=head1 SYNOPSIS
my $runnableDB = Bio::EnsEMBL::Analysis::RunnableDB::miRNA->new(
-db => $db_adaptor,
-input_id => 'analysis logic name',
-analysis => $analysis,
);
$runnabledb->fetch_input();
$runnabledb->run();
$runnabledb->write_output();
=head1 DESCRIPTION
RunnableDB to provide database access for miRNA detection.
Runs as an accumulator job on miRNA blast hits found by Bio::EnsEMBL::RunnableDB::BlastmiRNA
Takes an analysis logic name as an input id and uses it to fetch all dna align features associated with
that analysis.
It then groups the dna align features by miRNA families and ignores families with > 50
members as there is a high probability that these are hitting repetitive sequences.
Creates and runs the miRNA runnable.
=head1 CONTACT
Post questions to the Ensembl development list: ensembl-dev@ebi.ac.uk
=cut
package Bio::EnsEMBL::Analysis::RunnableDB::miRNA;
use strict;
use warnings;
use Bio::EnsEMBL::Utils::Exception qw(throw warning);
use Bio::EnsEMBL::Analysis::Config::Databases;
use Bio::EnsEMBL::Analysis::RunnableDB::BaseGeneBuild ;
use Bio::EnsEMBL::Analysis::RunnableDB;
use Bio::EnsEMBL::Analysis::Runnable::miRNA;
use Bio::EnsEMBL::Pipeline::DBSQL::FlagAdaptor;
use vars qw(@ISA);
@ISA = qw(Bio::EnsEMBL::Analysis::RunnableDB Bio::EnsEMBL::Analysis::RunnableDB::BaseGeneBuild);
=head2 fetch_input
Title : fetch_input
Usage : $miRNA->fetch_input();
Function : opens and stores connections to databases in Bio::EnsEMBL::Analysis::Config::Databases
: fetches all dna align features by analysis logic name specified in the
: input id
Returns : Hash reference
Exceptions : throws if the analysis object is not found or no dna align features are retrieved
Args : None
=cut
sub fetch_input{
my ($self) = @_;
# dna database
my $dna_db = $self->get_dbadaptor($DNA_DBNAME) ;
# if you want to write the final genes into the pipeline database need
# to catch it first and store the $self->db as the genes->db otherwise the
# registry will cause problems
if ( $$DATABASES{'GENEBUILD_DB'}{'-dbname'} eq $self->db->dbc->dbname &&
$$DATABASES{'GENEBUILD_DB'}{'-port'} == $self->db->dbc->port &&
$$DATABASES{'GENEBUILD_DB'}{'-host'} eq $self->db->dbc->host){
$self->gene_db($self->db);
} else {
my $genes_db = $self->get_dbadaptor("GENEBUILD_DB");
$self->gene_db($genes_db);
}
$self->db->dnadb($dna_db);
my $aa = $self->db->get_AnalysisAdaptor;
my $analysis = $aa->fetch_by_logic_name($self->input_id);
$self->throw("Analysis BlastmiRNA not found $@\n") unless $analysis;
my $dafa = $self->db->get_DnaAlignFeatureAdaptor;
my @flags;
my @dafs;
my $fa = Bio::EnsEMBL::Pipeline::DBSQL::FlagAdaptor->new($self->db);
print "Fetching features\n";
eval{
@flags = @{$fa->fetch_by_analysis($self->analysis)};
};
foreach my $flag (@flags){
if ($flag->goalAnalysis->logic_name eq $self->analysis->logic_name){
my $daf = $dafa->fetch_by_dbID($flag->ensembl_id);
push @dafs, $daf;
}
}
$self->throw("No dna align features found ") unless (scalar(@dafs) >=1);
print scalar(@dafs)." dafs found\n";
my %families = %{$self->family(\@dafs)};
# empty the array
@dafs = ();
my $runnable = Bio::EnsEMBL::Analysis::Runnable::miRNA->new
(
-queries => \%families,
-analysis => $self->analysis,
);
$self->runnable($runnable);
}
=head2 family
Title : family
Usage : my %families = %{$self->family(\@dafs)};
Function : order dafs by family, removes families with blast hits to repeats
Returns : Hash reference
Exceptions : None
Args : Array ref of Bio::EnsEMBL::DnaDnaAlignFeatures
=cut
sub family{
my ($self,$dafs_ref) = @_;
my %families;
foreach my $daf (@$dafs_ref){
push @{$families{$daf->hseqname}},$daf;
}
my %filtered_fam;
foreach my $key (keys %families){
if (scalar @{$families{$key}} <= 50){
$filtered_fam{$key} = $families{$key};
} else {
# take top scoring 50 hits
my @array = sort {$a->p_value <=> $b->p_value} @{$families{$key}};
my @filtered_array = splice(@array,0,50);
$filtered_fam{$key} = \@filtered_array;
}
}
return \%filtered_fam;
}
=head2 write_output
Args : none
Description: Writes the single exon miRNA genes into the final genebuild database,
: also stores attributes associated with the transcript
Exceptions : Throws if gene or transcript attribute fail to write to the database
Returntype : scalar
=cut
sub write_output{
my ($self) = @_;
my $adaptor = $self->gene_db->get_GeneAdaptor;
my $aa = $self->gene_db->get_AttributeAdaptor;
my @attributes;
my $xref;
foreach my $gene_hash (@{$self->output}){
my $gene = $gene_hash->{'gene'};
@attributes = @{$gene_hash->{'attrib'}};
$gene->analysis($self->analysis);
$gene->status('PREDICTED');
foreach my $trans (@{$gene->get_all_Transcripts}){
$trans->analysis($self->analysis);
$trans->status('PREDICTED');
}
$gene->slice($self->query) if(!$gene->slice);
$self->feature_factory->validate($gene);
eval{
$adaptor->store($gene);
print STDERR "Attemting to store in ".$adaptor->db->dbname."\n";
};
if($@){
$self->throw("miRNA:store failed, failed to write ".$gene." to ".
"the database $@");
}
foreach my $trans (@{$gene->get_all_Transcripts}){
eval{
$aa->store_on_Transcript($trans->dbID,\@attributes);
$self->gene_db->get_TranscriptAdaptor->update($trans);
$self->gene_db->get_GeneAdaptor->update($gene);
};
if($@){
$self->throw("miRNA:store failed, failed to write ".@attributes." on transcript ".
$trans." in the database $@");
}
}
}
return 1;
}
#########################################################
# Containers
=head2 gene_db
Arg [1] : Bio::EnsEMBL::DBSQL::DBAdaptor
Description: get/set gene db adaptor
Returntype : Bio::EnsEMBL::DBSQL::DBAdaptor
Exceptions : none
Caller : general
=cut
sub gene_db {
my ($self, $gene_db) = @_;
if ($gene_db){
unless ($gene_db->isa("Bio::EnsEMBL::DBSQL::DBAdaptor")){
$self->throw("gene db is not a Bio::EnsEMBL::DBSQL::DBAdaptor, it is a $gene_db");
}
$self->{'_gene_db'} = $gene_db;
}
return $self->{'_gene_db'};
}
1;