Bio::EnsEMBL::Analysis::RunnableDB
Funcgen
Toolbar
Summary
Bio::EnsEMBL::Analysis::RunnableDB::Fungen
Package variables
No package variables defined.
Included modules
Inherit
Synopsis
Description
This module is the base class for the Fungen Runnabledbs that act as an
interface between the functional genomics database and the Funcgen Runnables
both fetching input data and writing data back to the databases.
Methods
Methods description
Arg [1] : Bio::EnsEMBL::Analysis::RunnableDB Arg [2] : Bio::EnsEMBL::DBSQL::DBAdaptor Function : container for dbadaptor Returntype: Bio::EnsEMBL::DBSQL::DBAdaptor Exceptions: throws if not passed a Bio::EnsEMBL::DBSQL::DBConnection object Example : |
Arg [1] : Bio::EnsEMBL::Analysis::RunnableDB Arg [2] : Bio::EnsEMBL::Funcgen::DBSQL::DBAdaptor Function : container for dbadaptor Returntype: Bio::EnsEMBL::Funcgen::DBSQL::DBAdaptor Exceptions: throws if not passed a Bio::EnsEMBL::Funcgen::DBSQL::DBConnection object Example : |
Arg [1] : Bio::EnsEMBL::Analysis::RunnableDB Function : fetch and set ResultSets of interest Returntype: 1 Exceptions: none Example : |
Arg [1] : Bio::EnsEMBL::Analysis::RunnableDB::Funcgen Description : fetch data out of database and create runnable Returns : 1 Exceptions : none Example : |
Arg [1] : file Description : check whether a file is gzipped or ASCII Returns : boolean Exceptions : none Example : |
Arg [1] : Bio::EnsEMBL::Analysis::RunnableDB Arg [2] : Bio::EnsEMBL::Pipeline::DBSQL::DBAdaptor Arg [3] : Bio::EnsEMBL::Analysis Function : create a Bio::EnsEMBL::Analysis::RunnableDB object Returntype: Bio::EnsEMBL::Analysis::RunnableDB Exceptions: throws if not passed either a dbadaptor, input id or an analysis object Example : $rdb = $perl_path->new( -analysis => $self->analysis, -input_id => $self->input_id, -db => $self->adaptor->db ); |
Arg [1] : Bio::EnsEMBL::Analysis::RunnableDB::Funcgen::Chipotle Function : set analysis and slice on each feature Returntype: 1 Exceptions: none Example : |
Methods code
ANALYSIS_WORK_DIR | description | prev | next | Top |
sub ANALYSIS_WORK_DIR
{ my ( $self, $value ) = @_;
if ( defined $value ) {
$self->{'_CONFIG_ANALYSIS_WORK_DIR'} = $value;
}
if ( exists( $self->{'_CONFIG_ANALYSIS_WORK_DIR'} ) ) {
return $self->{'_CONFIG_ANALYSIS_WORK_DIR'};
} else {
return undef;
} } |
sub DNADB
{ my ( $self, $value ) = @_;
if ( defined $value ) {
$self->{'_CONFIG_DNADB'} = $value;
}
if ( exists( $self->{'_CONFIG_DNADB'} ) ) {
return $self->{'_CONFIG_DNADB'};
} else {
return undef;
}
}
1; } |
sub EFGDB
{ my ( $self, $value ) = @_;
if ( defined $value ) {
$self->{'_CONFIG_EFGDB'} = $value;
}
if ( exists( $self->{'_CONFIG_EFGDB'} ) ) {
return $self->{'_CONFIG_EFGDB'};
} else {
return undef;
} } |
sub MODULE
{ my ( $self, $value ) = @_;
if ( defined $value ) {
$self->{'_CONFIG_MODULE'} = $value;
}
if ( exists( $self->{'_CONFIG_MODULE'} ) ) {
return $self->{'_CONFIG_MODULE'};
} else {
return undef;
} } |
sub NORM_METHOD
{ my ( $self, $value ) = @_;
if ( defined $value ) {
$self->{'_CONFIG_NORM_METHOD'} = $value;
}
if ( exists( $self->{'_CONFIG_NORM_METHOD'} ) ) {
return $self->{'_CONFIG_NORM_METHOD'};
} else {
return undef;
} } |
sub PARAMETERS
{ my ( $self, $value ) = @_;
if ( defined $value ) {
$self->{'_CONFIG_PARAMETERS'} = $value;
}
if ( exists( $self->{'_CONFIG_PARAMETERS'} ) ) {
return $self->{'_CONFIG_PARAMETERS'};
} else {
return undef;
} } |
sub PROGRAM
{ my ( $self, $value ) = @_;
if ( defined $value ) {
$self->{'_CONFIG_PROGRAM'} = $value;
}
if ( exists( $self->{'_CONFIG_PROGRAM'} ) ) {
return $self->{'_CONFIG_PROGRAM'};
} else {
return undef;
} } |
sub PROGRAM_FILE
{ my ( $self, $value ) = @_;
if ( defined $value ) {
$self->{'_CONFIG_PROGRAM_FILE'} = $value;
}
if ( exists( $self->{'_CONFIG_PROGRAM_FILE'} ) ) {
return $self->{'_CONFIG_PROGRAM_FILE'};
} else {
return undef;
} } |
sub RESULT_SET_REGEXP
{ my ( $self, $value ) = @_;
if ( defined $value ) {
$self->{'_CONFIG_RESULT_SET_REGEXP'} = $value;
}
if ( exists( $self->{'_CONFIG_RESULT_SET_REGEXP'} ) ) {
return $self->{'_CONFIG_RESULT_SET_REGEXP'};
} else {
return undef;
} } |
sub ResultSets
{ my ($self, $sets) = @_;
$self->{'ResultSets'} = $sets if ($sets);
throw("No ResultSets in RunnableDB.")
if (!$self->{'ResultSets'});
return $self->{'ResultSets'}; } |
sub SCORE_FACTOR
{ my ( $self, $value ) = @_;
if ( defined $value ) {
$self->{'_CONFIG_SCORE_FACTOR'} = $value;
}
if ( exists( $self->{'_CONFIG_SCORE_FACTOR'} ) ) {
return $self->{'_CONFIG_SCORE_FACTOR'};
} else {
return undef;
}
}
} |
sub VERSION
{ my ( $self, $value ) = @_;
if ( defined $value ) {
$self->{'_CONFIG_VERSION'} = $value;
}
if ( exists( $self->{'_CONFIG_VERSION'} ) ) {
return $self->{'_CONFIG_VERSION'};
} else {
return undef;
} } |
sub cell_type
{ my ($self, $ct) = @_;
if ($ct) {
throw("Must pass a Bio::EnsEMBL::Funcgen::CellType not a ".$ct)
unless($ct->isa('Bio::EnsEMBL::Funcgen::CellType'));
$self->{'cell_type'} = $ct;
}
return $self->{'cell_type'}; } |
sub check_Analysis
{
my ($self) = @_;
$self->efg_analysis->module($self->MODULE);
$self->efg_analysis->program($self->PROGRAM);
$self->efg_analysis->program_file($self->PROGRAM_FILE);
$self->efg_analysis->program_version($self->VERSION);
$self->efg_analysis->parameters($self->PARAMETERS);
$self->efg_analysis->displayable(1);
my $logic_name = $self->efg_analysis->logic_name;
my $aa = $self->efgdb->get_AnalysisAdaptor;
my $analysis = $aa->fetch_by_logic_name($logic_name);
if ( ! defined $analysis ) {
warn("Storing new analysis with logic name $logic_name.");
$aa->store($self->efg_analysis);
} elsif ( $self->efg_analysis->compare($analysis) ) {
warn('Analysis with logic name\' '.$logic_name.'\' already '.
'exists, but has different options! Updating analysis ...');
$self->efg_analysis->dbID($analysis->dbID);
$self->efg_analysis->adaptor($self->efgdb->get_AnalysisAdaptor);
$aa->update($self->efg_analysis);
} else {
warn('Analysis with logic name\' '.$logic_name.'\' already '.
'exists.');
}
$self->efg_analysis($aa->fetch_by_logic_name($logic_name)); } |
sub check_InputId
{
print "Analysis::RunnableDB::Funcgen::check_InputId\n";
my ($self) = @_;
my @input = split(':', $self->input_id);
my $ename = shift @input;
my $e = $self->efgdb->get_ExperimentAdaptor->fetch_by_name($ename);
$self->experiment($e)
or throw("Can't fetch experiment with name ".$ename);
warn("EXPERIMENT:\t".$self->experiment->name);
my $input_id_type = $self->analysis->input_id_type();
my $sa = $self->efgdb->get_SliceAdaptor;
my @slices = ();
if ($input_id_type eq 'Slice') {
@slices = ( $sa->fetch_by_name(join(':', @input)) );
throw("Can't fetch slice with name ".join(':', @input))
unless (@slices);
} elsif ($input_id_type eq 'Array') {
@slices = @{$sa->fetch_all('toplevel')};
throw("Can't fetch toplevel slices")
unless (@slices);
} else {
throw("Input_id type '$input_id_type' not implemented.");
}
$self->query(\@slices); } |
sub check_Sets
{
my ($self) = @_;
my $rsets = $self->fetch_ResultSets();
my @names = map { $_->name; } @{$self->ResultSets()};
my %hash = ();
foreach my $n (@names) {
my @chars = split(//, $n);
my $string = '';
foreach my $c (@chars) {
$string .= $c;
$hash{$string}++;
}
}
my $lcp = '';
foreach (sort keys %hash) {
last if ($hash{$_} < scalar(@$rsets));
$lcp = $_;
}
$lcp =~ s/_$//;
my $set_name = $self->efg_analysis->logic_name.'_'.$lcp; print 'Set name: ', $set_name, "\n";
my $fsa = $self->efgdb->get_FeatureSetAdaptor();
my $fset = $fsa->fetch_by_name($set_name);
if ( ! defined $fset ) {
$fset = Bio::EnsEMBL::Funcgen::FeatureSet->new
(
-analysis => $self->efg_analysis,
-feature_type => $self->feature_type,
-cell_type => $self->cell_type,
-name => $set_name,
-type => 'annotated'
);
warn("Storing new feature set\' $set_name\'");
eval {
($fset) = @{$fsa->store($fset)};
};
throw("Coudn't store feature set\' $set_name\': $!") if ($@);
} else {
warn("Feature set with name $set_name already exists.");
}
$self->feature_set($fset);
my $dsa = $self->efgdb->get_DataSetAdaptor;
my $dset = $dsa->fetch_by_name($set_name);
if ( ! defined $dset ) {
$dset = Bio::EnsEMBL::Funcgen::DataSet->new
(
-SUPPORTING_SETS => $self->ResultSets,
-FEATURE_SET => $fset,
-DISPLAYABLE => 1,
-NAME => $set_name,
-SUPPORTING_SET_TYPE => 'result',
);
warn("Storing new data set\' $set_name\'");
eval {
($dset) = @{$dsa->store($dset)}
};
throw("Coudn't store data set\' $set_name\': $!") if ($@);
} else {
warn("Data set with name $set_name already exists.");
my $ssets = $dset->get_supporting_sets();
my %ssets_dbIDs = ();
map { $ssets_dbIDs{$_->dbID}='' } (@{$ssets});
map {
$dset->add_supporting_sets([ $_ ]) if (! exists $ssets_dbIDs{$_->dbID});
} @{$self->ResultSets}
}
$self->data_set($dset); } |
sub data_set
{ my ($self, $ds) = @_;
if ($ds) {
throw("Must pass a Bio::EnsEMBL::Funcgen::DataSet not a ".$ds)
unless($ds->isa('Bio::EnsEMBL::Funcgen::DataSet'));
$self->{'data_set'} = $ds;
}
return $self->{'data_set'}; } |
sub dnadb
{ my ($self, $db) = @_;
if($db){
throw("Must pass RunnableDB:db a Bio::EnsEMBL::DBSQL::DBAdaptor ".
"not a ".$db)
unless($db->isa('Bio::EnsEMBL::DBSQL::DBAdaptor'));
$self->{'dnadb'} = $db;
}
return $self->{'dnadb'}; } |
sub efg_analysis
{ my ($self, $a) = @_;
if ($a) {
throw("Must pass a Bio::EnsEMBL::Analysis not a ".$a)
unless($a->isa('Bio::EnsEMBL::Analysis'));
$self->{'efg_analysis'} = $a;
}
return $self->{'efg_analysis'}; } |
sub efgdb
{ my ($self, $db) = @_;
if($db){
throw("Must pass RunnableDB:db a Bio::EnsEMBL::Funcgen::DBSQL::DBAdaptor ".
"not a ".$db)
unless($db->isa('Bio::EnsEMBL::Funcgen::DBSQL::DBAdaptor'));
$self->{'efgdb'} = $db;
}
return $self->{'efgdb'}; } |
sub experiment
{ my ($self, $e) = @_;
if ($e) {
throw("Must pass a Bio::EnsEMBL::Funcgen::Experiment not a ".$e)
unless($e->isa('Bio::EnsEMBL::Funcgen::Experiment'));
$self->{'experiment'} = $e;
}
return $self->{'experiment'}; } |
sub feature_set
{ my ($self, $fs) = @_;
if ($fs) {
throw("Must pass a Bio::EnsEMBL::Funcgen::FeatureSet not a ".$fs)
unless($fs->isa('Bio::EnsEMBL::Funcgen::FeatureSet'));
$self->{'feature_set'} = $fs;
}
return $self->{'feature_set'}; } |
sub feature_type
{ my ($self, $ft) = @_;
if ($ft) {
throw("Must pass a Bio::EnsEMBL::Funcgen::FeatureType not a ".$ft)
unless($ft->isa('Bio::EnsEMBL::Funcgen::FeatureType'));
$self->{'feature_type'} = $ft;
}
return $self->{'feature_type'}; } |
sub fetch_ResultSets
{
print "Analysis::RunnableDB::Funcgen::fetch_ResultSets\n";
my $self = shift;
my $rsa = $self->efgdb->get_ResultSetAdaptor;
my $rsets = $rsa->fetch_all_by_Experiment_Analysis
($self->experiment, $self->norm_method);
print "No. of available ResultSets: ", scalar(@$rsets), "\n";
my @rsets = ();
my $regex = $self->RESULT_SET_REGEXP;
foreach my $rset (@{$rsets}) {
next if ($rset->name() !~ m/$regex/); push(@rsets, $rset);
if (! defined $self->feature_type ) {
$self->feature_type($rset->feature_type);
} else {
throw("replicates differ in feature types")
if ($self->feature_type->dbID != $rset->feature_type->dbID);
}
if ( ! defined $self->cell_type() ) {
$self->cell_type($rset->cell_type);
} else {
throw("replicates differ in cell types")
if ($self->cell_type->dbID != $rset->cell_type->dbID);
}
}
if (!@rsets) {
my $rset_list = join(' ', map { $_->name } @{$rsets});
throw ("RESULT_SET_REGEXP doesn't match any the following result set:\n$rset_list");
}
$self->ResultSets(\@rsets);
print "Selected result sets: ", join(', ', map { $_->name } @rsets),
' (in total ', scalar(@rsets), ")\n";
return\@ rsets;
}
} |
sub fetch_input
{
my ($self) = @_;
print "Bio::EnsEMBL::Analysis::RunnableDB::Funcgen::fetch_input\n";
my %result_features = ();
my $norf;
my $cachedir = $self->ANALYSIS_WORK_DIR.'/cache';
if ( ! -d $cachedir) {
my $retval = system("mkdir -p $cachedir");
throw("Couldn't create cache directory $cachedir")
unless ($retval == 0);
}
warn("INPUT_ID_TYPE: ". $self->analysis->input_id_type);
my $input_id_type = $self->analysis->input_id_type;
my $query_name;
foreach my $rset (@{$self->ResultSets}) {
print join(" ", $rset->dbID, $rset->name), "\n";
if ($input_id_type eq 'Slice') {
$query_name = $self->query->[0]->name();
} elsif ($input_id_type eq 'Array') {
my @echips = @{$rset->get_ExperimentalChips()};
warn("WARNING: Result set '".$rset->name."' comprises more than one experimental chip, ".
"which will result in very large datafiles!")
if (scalar @echips > 1);
$query_name = 'ec_' . join (':', map { $_->unique_id } @echips);
} else {
throw("No idea how to deal with input_id type ".$input_id_type);
}
my $datfile = $cachedir.'/'.$query_name.'.'.$rset->name.'.dat';
warn('datafile: '.$datfile);
my @result_features = ();
warn("SCORE_FACTOR: ".$self->SCORE_FACTOR);
unless ( -e $datfile ) {
foreach my $slice ( @{$self->query} ) {
my $result_features = $rset->get_ResultFeatures_by_Slice($slice);
print "No. of ResultFeatures_by_Slice:\t", scalar(@$result_features), "\n";
if (scalar(@$result_features) == 0) {
warn("No result_features on slice ".$slice->name());
next;
}
my $ft_cnt = 1;
foreach my $rft (sort {$a->start <=> $b->start} @{$result_features}) {
push (@result_features,
[
$slice->seq_region_name,
$rft->start,
$rft->end,
$rft->score*$self->SCORE_FACTOR,
$ft_cnt++,
]
);
}
}
open(RF, "> $datfile")
or throw("Can't open file $datfile");
map {
print RF join("\t", @$_),"\n";
} @result_features;
close RF
or throw("Can't close file $datfile");
} else {
print "Using cached ResultFeatures:\t", $datfile, "\n";
open(CACHE, $datfile)
or throw("Can't open file cache $datfile");
while (<CACHE>) {
chomp;
my @values = split(/\t/);
push (@result_features,
[
@values
]
);
}
close CACHE;
}
warn("No. of ResultFeatures_by_Slice:\t".scalar(@result_features));
throw ("No of result_features doesn't match")
if (@result_features && $norf && $norf != scalar(@result_features));
$result_features{$rset->name} =\@ result_features;
$norf = scalar(@result_features);
}
if (scalar(keys %result_features) == 0) {
warn('No ResultFeatures on slice');
return 1;
}
my %parameters_hash = %{$self->parameters_hash($self->efg_analysis->parameters)};
$parameters_hash{-result_features} =\% result_features;
my $runnable = 'Bio::EnsEMBL::Analysis::Runnable::Funcgen::'
.$self->efg_analysis->module;
$runnable = $runnable->new
(
-query => $self->query,
-program => $self->efg_analysis->program_file,
-analysis => $self->efg_analysis,
-workdir => $self->ANALYSIS_WORK_DIR,
%parameters_hash
);
$self->runnable($runnable);
return 1; } |
sub is_gzip
{
my ($self, $file) = @_;
warn "Bio::EnsEMBL::Analysis::RunnableDB::Funcgen::is_gzip";
open(FILE, "file -L $file |")
or throw("Can't execute command 'file' on '$file'");
my $retval = <FILE>;
close FILE;
return ($retval =~ m/gzip compressed data/) ? 1 : 0;
} |
sub new
{
print "Analysis::RunnableDB::Funcgen::new\n";
my ($class,@args) = @_;
my $self = $class->SUPER::new(@args);
return $self;
}
} |
sub norm_method
{ my ($self, $m) = @_;
if ($m) {
throw("Must pass a Bio::EnsEMBL::Ananysis not a ".$m)
unless($m->isa('Bio::EnsEMBL::Analysis'));
$self->{'norm_method'} = $m;
}
return $self->{'norm_method'}; } |
sub query
{
my ($self, $query) = @_;
if ( $query ) {
throw("Must pass RunnableDB:Funcgen:query a array ref not a ".
ref($query)) unless (ref($query) eq 'ARRAY');
map {
throw($_->name . " is not a Bio::EnsEMBL::Slice")
unless ($_->isa("Bio::EnsEMBL::Slice"));
} @$query;
$self->{'query'} = $query;
}
return $self->{'query'}; } |
sub read_and_check_config
{
print "Analysis::RunnableDB::Funcgen::read_and_check_config\n";
my ($self, $config) = @_;
warn("MODULE:\t ".$self->analysis->module);
warn("LOGIC_NAME:\t".$self->analysis->logic_name);
warn("INPUT_ID:\t".$self->input_id);
warn("INPUT_ID_TYPE:\t".$self->analysis->input_id_type);
warn("Reading config for '".$self->analysis->logic_name."'");
parse_config($self, $config, $self->analysis->logic_name);
if($self->DNADB->{-dbname}){
$self->dnadb(Bio::EnsEMBL::DBSQL::DBAdaptor->new(%{ $self->DNADB }));
}
$self->efgdb(Bio::EnsEMBL::Funcgen::DBSQL::DBAdaptor->new
(
%{ $self->EFGDB },
-dnadb => $self->dnadb,
));
my $efg_analysis = new Bio::EnsEMBL::Analysis( -logic_name => $self->analysis->logic_name );
$self->efg_analysis($efg_analysis);
$self->check_InputId();
unless ($self->analysis->input_id_type eq "File") {
my $m = $self->efgdb->get_AnalysisAdaptor->fetch_by_logic_name($self->NORM_METHOD);
$self->norm_method($m)
or throw("Can't fetch analysis object for norm method ".$self->NORM_METHOD);
}
$self->SCORE_FACTOR(1);
} |
sub write_output
{
print "Bio::EnsEMBL::Analysis::RunnableDB::Funcgen::write_output\n";
my ($self) = @_;
if (scalar(@{$self->output}) == 0) {
warn("No features to annotate on slice ".$self->query->name.
" for experiment ".$self->experiment->name()."!");
return 1;
}
if (! defined $self->feature_set->dbID) {
$self->efgdb->get_FeatureSetAdaptor->store($self->feature_set());
}
if (! defined $self->data_set->dbID) {
$self->efgdb->get_DataSetAdaptor->store($self->data_set());
}
my $fset = $self->feature_set;
my $fs_id = $fset->dbID();
my @stored_af = ();
foreach my $s (@{$self->query}) {
push @stored_af,
@{$self->efgdb->get_AnnotatedFeatureAdaptor->fetch_all_by_Slice_FeatureSets($s, [$fset])};
}
warn('No. of annotated features already stored: '.scalar(@stored_af));
warn('No. of annotated features to be stored: '.scalar(@{$self->output}));
if (@stored_af) {
warn("NOT IMPORTING ".scalar(@{$self->output})." annotated features! Slice(s) ".
join (', ', map {$_->name} @{$self->query})." already contains ".scalar(@stored_af).
" annotated features of feature set '".$fset->name."' (dbId ".$fset->dbID.').');
return 1;
} else {
my ($transfer, $slice, $tl_slice);
my $input_id_type = $self->analysis->input_id_type;
if ($input_id_type eq 'Slice' &&
($self->query->[0]->start != 1 || $self->query->[0]->strand != 1)) {
my $sa = $self->efgdb->get_SliceAdaptor;
$tl_slice = $sa->fetch_by_region($self->query->[0]->coord_system->name(),
$self->query->[0]->seq_region_name(),
undef, undef, undef, $self->query->[0]->coord_system->version());
$transfer = 1;
warn('TRANSFER: 1');
}
my %af_slice = ();
if ($input_id_type eq 'Array') {
map { $af_slice{$_->seq_region_name} = $_ } @{$self->query};
}
my @af = ();
foreach my $ft (@{$self->output}){
my ($sr_name, $start, $end, $score) = @{$ft};
$slice = $input_id_type eq 'Slice' ? $self->query->[0] : $af_slice{"$sr_name"};
my $af = Bio::EnsEMBL::Funcgen::AnnotatedFeature->new
(
-slice => $slice,
-start => $start,
-end => $end,
-strand => 0,
-display_label => $self->efg_analysis->logic_name,
-score => $score,
-feature_set => $fset,
);
if ($transfer) {
$af = $af->transfer($tl_slice);
}
push(@af, $af);
}
$self->efgdb->get_AnnotatedFeatureAdaptor->store(@af);
}
return 1;
}
} |
General documentation
Stefan Graf, Ensembl Functional Genomics -