Bio::EnsEMBL::Analysis::RunnableDB::Funcgen
SWEmbl
Toolbar
Summary
Bio::EnsEMBL::Analysis::RunnableDB::Funcgen::SWEmbl
Package variables
No package variables defined.
Included modules
Inherit
Synopsis
my $runnable = Bio::EnsEMBL::Analysis::RunnableDB::Funcgen::SWEmbl->new
(
-db => $db,
-input_id => 'chromosome::20:1:100000:1',
-analysis => $analysis,
);
$runnable->fetch_input;
$runnable->run;
$runnable->write_output;
Description
This module provides an interface between the ensembl functional genomics
database and the Runnable SWEmbl which wraps the ChIP-Seq peak caller SWEmbl.
Methods
Methods description
Arg [1] : Arg [2] : Description : Instantiates new SWEmbl runnabledb Returntype : Bio::EnsEMBL::Analysis::RunnableDB::Funcgen::SWEmbl object Exceptions : Example : |
Methods code
check_InputId | description | prev | next | Top |
sub check_InputId
{
warn("Analysis::RunnableDB::Funcgen::SWEmbl::check_InputId");
my ($self) = @_;
my ($ename, $file) = split(':', $self->input_id);
my $ea = $self->efgdb->get_ExperimentAdaptor;
my $e = $ea->fetch_by_name($ename);
my @date = (localtime)[5,4,3];
$date[0] += 1900; $date[1]++;
if (! defined $e) {
warn("Experiment NOT defined");
my $exp = Bio::EnsEMBL::Funcgen::Experiment->new
(
-NAME => $ename,
-GROUP => "$ENV{EFG_GROUP}",
-DATE => join('-', @date),
-PRIMARY_DESIGN_TYPE => 'binding_site_identification',
-ADAPTOR => $ea,
);
my ($g_dbid) = $self->efgdb->fetch_group_details($exp->group());
if (! $g_dbid) {
warn("Group specified does, not exist. Importing (group, location, contact)");
my $sql = "INSERT INTO experimental_group (name, location, contact) ".
"values ('$ENV{EFG_GROUP}','$ENV{EFG_LOCATION}','$ENV{EFG_CONTACT}')";
eval {
$self->efgdb->dbc->do($sql);
};
throw("Couldn't import group information. Double-check that the environment variables ".
"EFG_GROUP, EFG_LOCATION, and EFG_CONTACT are set.") if ($@);
}
($e) = @{$ea->store($exp)};
}
$self->experiment($e)
or throw("Can't fetch experiment with name ".$ename);
my $infile = $self->ANALYSIS_WORK_DIR.'/infiles/'.$self->input_id;
warn('INFILE: '.$infile);
$self->query($infile); } |
sub check_Sets
{
warn("Analysis::RunnableDB::Funcgen::SWEmbl::check_Experiment");
my ($self) = @_;
my $set_name = $self->analysis->logic_name.'_'.$self->experiment->name();
warn("SetName: $set_name");
my ($ct_name, $ft_name) = split(/_/, $self->experiment->name());
my $feature_type = $self->efgdb->get_FeatureTypeAdaptor()->fetch_by_name($ft_name)
or throw("Feature type '$ft_name' does not exist");
$self->feature_type($feature_type);
my $cell_type = $self->efgdb->get_CellTypeAdaptor()->fetch_by_name($ct_name)
or throw("Cell type '$ct_name' does not exist");
$self->cell_type($cell_type);
my $esa = $self->efgdb->get_ExperimentalSetAdaptor();
my $eset = $esa->fetch_by_name($set_name);
if (! defined $eset){
warn("ExperimentalSet NOT defined");
$eset = Bio::EnsEMBL::Funcgen::ExperimentalSet->new
(
-name => $set_name,
-experiment => $self->experiment(),
-feature_type => $feature_type,
-cell_type => $cell_type,
-vendor => 'SOLEXA',
-format => 'SEQUENCING',
);
warn("Storing new experimental set\' $set_name\'");
eval {
($eset) = @{$esa->store($eset)};
};
throw("Coudn't store experimental set\' $set_name\': $!") if ($@);
}
my $fsa = $self->efgdb->get_FeatureSetAdaptor();
my $fset = $fsa->fetch_by_name($set_name);
if ( ! defined $fset ) {
$fset = Bio::EnsEMBL::Funcgen::FeatureSet->new
(
-analysis => $self->efg_analysis,
-feature_type => $self->feature_type,
-cell_type => $self->cell_type,
-name => $set_name,
-type => 'annotated'
);
warn("Storing new feature set\' $set_name\'");
eval {
($fset) = @{$fsa->store($fset)};
};
throw("Coudn't store feature set\' $set_name\': $!") if ($@);
} else {
warn("Feature set with name $set_name already exists.");
}
$self->feature_set($fset);
my $dsa = $self->efgdb->get_DataSetAdaptor;
my $dset = $dsa->fetch_by_name($set_name);
if ( ! defined $dset ) {
$dset = Bio::EnsEMBL::Funcgen::DataSet->new
(
-SUPPORTING_SETS => [$eset],
-FEATURE_SET => $fset,
-DISPLAYABLE => 1,
-NAME => $set_name,
-SUPPORTING_SET_TYPE => 'experimental',
);
warn("Storing new data set\' $set_name\'");
eval {
($dset) = @{$dsa->store($dset)}
};
throw("Coudn't store data set\' $set_name\': $!") if ($@);
} else {
warn("Data set with name $set_name already exists.");
my $ssets = $dset->get_supporting_sets();
my %ssets_dbIDs = ();
map { $ssets_dbIDs{$_->dbID}='' } (@{$ssets});
$dset->add_supporting_sets([ $eset ]) if (! exists $ssets_dbIDs{$eset->dbID});
}
$self->data_set($dset); } |
sub fetch_input
{
my ($self) = @_;
my $cachedir = $self->ANALYSIS_WORK_DIR.'/cache';
if ( ! -d $cachedir) {
my $retval = system("mkdir -p $cachedir");
throw("Couldn't create cache directory $cachedir") unless ($retval == 0);
}
warn('infile: '.$self->query);
(my $cachefile = $self->query) =~ s,.*/([^/]+)$,$1,;
my $datfile = $cachedir.'/'.$cachefile;
warn('datafile: '.$datfile);
unless (-e $datfile) {
my $sort = "gzip -dc ".$self->query.
" | sort -k1,1 -k2,2n -k3,3n | gzip -c > $datfile |";
warn("Executing $sort");
open(SORT, "$sort")
or throw("Can't open and sort gzipped file ".$self->query);
while (<SORT>) {warn($_)}
close SORT;
}
$self->query($datfile);
my %parameters_hash = %{$self->parameters_hash($self->efg_analysis->parameters)};
my $runnable = 'Bio::EnsEMBL::Analysis::Runnable::Funcgen::'
.$self->efg_analysis->module;
$runnable = $runnable->new
(
-query => $self->query,
-program => $self->efg_analysis->program_file,
-analysis => $self->efg_analysis,
-workdir => $self->ANALYSIS_WORK_DIR,
%parameters_hash
);
$self->runnable($runnable);
return 1; } |
sub new
{
print "Analysis::RunnableDB::Funcgen::SWEmbl::new\n";
my ($class,@args) = @_;
my $self = $class->SUPER::new(@args);
$self->read_and_check_config($CONFIG);
$self->check_Analysis();
$self->check_Sets();
return $self; } |
sub query
{ my $self = shift;
$self->{'query'} = shift if(@_);
throw("file ".$self->{'query'}. " doesn't exist")
unless ( -e $self->{'query'});
return $self->{'query'}; } |
sub write_output
{
print "Bio::EnsEMBL::Analysis::RunnableDB::Funcgen::SWEmbl::write_output\n";
my ($self) = @_;
if (scalar(@{$self->output}) == 0) {
warn("No features to annotate on slice ".$self->query.
" for experiment ".$self->experiment->name()."!");
return 1;
}
if (! defined $self->feature_set->dbID) {
$self->efgdb->get_FeatureSetAdaptor->store($self->feature_set());
}
if (! defined $self->data_set->dbID) {
$self->efgdb->get_DataSetAdaptor->store($self->data_set());
}
my $fset = $self->feature_set;
my $fs_id = $fset->dbID();
my $af = $self->efgdb->get_AnnotatedFeatureAdaptor->fetch_all_by_FeatureSets([$fset]);
warn('No. of annotated features already stored: '.scalar(@$af).' ('.$self->query.' '.$fset->name.')');
warn('No. of annotated features to be stored: '.scalar(@{$self->output}).' ('.$self->query.')');
if (@$af) {
warn("NOT IMPORTING ".scalar(@{$self->output})." annotated features! File ".
$self->query." already has been processed; contains ".scalar(@$af).
" annotated features of feature set ".$fset->dbID.".");
return 1;
} else {
my @af;
my $sa = $self->efgdb->get_SliceAdaptor();
my %slice;
foreach my $ft (@{$self->output}){
my ($seqid, $start, $end, $score) = @{$ft};
next if ($seqid =~ m/^M/);
unless (exists $slice{"$seqid"}) {
$slice{"$seqid"} = $sa->fetch_by_region('chromosome', $seqid);
}
my $af = Bio::EnsEMBL::Funcgen::AnnotatedFeature->new
(
-slice => $slice{"$seqid"},
-start => $start,
-end => $end,
-strand => 0,
-display_label => $self->efg_analysis->logic_name,
-score => $score,
-feature_set => $fset,
);
push(@af, $af);
}
$self->efgdb->get_AnnotatedFeatureAdaptor->store(@af);
}
return 1;
}
1; } |
General documentation
Stefan Graf, Ensembl Functional Genomics -
/