Bio::EnsEMBL::Analysis::RunnableDB::Funcgen SWEmbl
SummaryIncluded librariesPackage variablesSynopsisDescriptionGeneral documentationMethods
Toolbar
WebCvsRaw content
Summary
Bio::EnsEMBL::Analysis::RunnableDB::Funcgen::SWEmbl
Package variables
No package variables defined.
Included modules
Bio::EnsEMBL::Analysis::Config::Funcgen::SWEmbl
Bio::EnsEMBL::Analysis::Config::General
Bio::EnsEMBL::Analysis::Runnable::Funcgen::SWEmbl
Bio::EnsEMBL::Analysis::RunnableDB
Bio::EnsEMBL::Analysis::RunnableDB::Funcgen
Bio::EnsEMBL::Utils::Exception qw ( throw warning stack_trace_dump )
Data::Dumper
Inherit
Bio::EnsEMBL::Analysis::RunnableDB::Funcgen
Synopsis
  my $runnable = Bio::EnsEMBL::Analysis::RunnableDB::Funcgen::SWEmbl->new
(
-db => $db,
-input_id => 'chromosome::20:1:100000:1',
-analysis => $analysis,
);
$runnable->fetch_input;
$runnable->run;
$runnable->write_output;
Description
This module provides an interface between the ensembl functional genomics
database and the Runnable SWEmbl which wraps the ChIP-Seq peak caller SWEmbl.
Methods
check_InputId
No description
Code
check_Sets
No description
Code
fetch_input
No description
Code
newDescriptionCode
query
No description
Code
write_output
No description
Code
Methods description
newcode    nextTop
  Arg [1]     : 
Arg [2] :
Description : Instantiates new SWEmbl runnabledb
Returntype : Bio::EnsEMBL::Analysis::RunnableDB::Funcgen::SWEmbl object
Exceptions :
Example :
Methods code
check_InputIddescriptionprevnextTop
sub check_InputId {
    warn("Analysis::RunnableDB::Funcgen::SWEmbl::check_InputId");

    my ($self) = @_;
    my ($ename, $file) = split(':', $self->input_id);

    # set experiment
my $ea = $self->efgdb->get_ExperimentAdaptor; my $e = $ea->fetch_by_name($ename); my @date = (localtime)[5,4,3]; $date[0] += 1900; $date[1]++; #print Dumper @date;
if (! defined $e) { warn("Experiment NOT defined"); my $exp = Bio::EnsEMBL::Funcgen::Experiment->new ( -NAME => $ename, -GROUP => "$ENV{EFG_GROUP}", -DATE => join('-', @date), -PRIMARY_DESIGN_TYPE => 'binding_site_identification', -ADAPTOR => $ea, ); my ($g_dbid) = $self->efgdb->fetch_group_details($exp->group()); if (! $g_dbid) { warn("Group specified does, not exist. Importing (group, location, contact)"); my $sql = "INSERT INTO experimental_group (name, location, contact) ". "values ('$ENV{EFG_GROUP}','$ENV{EFG_LOCATION}','$ENV{EFG_CONTACT}')"; eval { $self->efgdb->dbc->do($sql); }; throw("Couldn't import group information. Double-check that the environment variables ". "EFG_GROUP, EFG_LOCATION, and EFG_CONTACT are set.") if ($@); } ($e) = @{$ea->store($exp)}; } $self->experiment($e) or throw("Can't fetch experiment with name ".$ename); #print Dumper $self->experiment;
my $infile = $self->ANALYSIS_WORK_DIR.'/infiles/'.$self->input_id; warn('INFILE: '.$infile); $self->query($infile);
}
check_SetsdescriptionprevnextTop
sub check_Sets {
    warn("Analysis::RunnableDB::Funcgen::SWEmbl::check_Experiment");

    my ($self) = @_;

    my $set_name = $self->analysis->logic_name.'_'.$self->experiment->name();
    warn("SetName: $set_name");

    my ($ct_name, $ft_name) = split(/_/, $self->experiment->name());

    my $feature_type = $self->efgdb->get_FeatureTypeAdaptor()->fetch_by_name($ft_name)
        or throw("Feature type '$ft_name' does not exist");
    $self->feature_type($feature_type);

    my $cell_type = $self->efgdb->get_CellTypeAdaptor()->fetch_by_name($ct_name)
        or throw("Cell type '$ct_name' does not exist");
    $self->cell_type($cell_type);

    my $esa = $self->efgdb->get_ExperimentalSetAdaptor();
    my $eset = $esa->fetch_by_name($set_name);
    
    if (! defined $eset){

        warn("ExperimentalSet NOT defined");

        $eset = Bio::EnsEMBL::Funcgen::ExperimentalSet->new
            (
             -name         => $set_name,
             -experiment   => $self->experiment(),
             -feature_type => $feature_type,
             -cell_type    => $cell_type,
             -vendor       => 'SOLEXA',
             -format       => 'SEQUENCING',
             #-analysis     => $self->feature_analysis,
); warn("Storing new experimental set\' $set_name\'"); eval { ($eset) = @{$esa->store($eset)}; }; throw("Coudn't store experimental set\' $set_name\': $!") if ($@); } my $fsa = $self->efgdb->get_FeatureSetAdaptor(); my $fset = $fsa->fetch_by_name($set_name); #print Dumper $fset;
if ( ! defined $fset ) { $fset = Bio::EnsEMBL::Funcgen::FeatureSet->new ( -analysis => $self->efg_analysis, -feature_type => $self->feature_type, -cell_type => $self->cell_type, -name => $set_name, -type => 'annotated' ); #print Dumper $fset;
warn("Storing new feature set\' $set_name\'"); eval { ($fset) = @{$fsa->store($fset)}; }; throw("Coudn't store feature set\' $set_name\': $!") if ($@); } else { warn("Feature set with name $set_name already exists."); } # save FeatureSet
$self->feature_set($fset); my $dsa = $self->efgdb->get_DataSetAdaptor; my $dset = $dsa->fetch_by_name($set_name); #print Dumper $dset;
if ( ! defined $dset ) { $dset = Bio::EnsEMBL::Funcgen::DataSet->new ( -SUPPORTING_SETS => [$eset], -FEATURE_SET => $fset, -DISPLAYABLE => 1, -NAME => $set_name, -SUPPORTING_SET_TYPE => 'experimental', ); #print Dumper $dset;
warn("Storing new data set\' $set_name\'"); eval { ($dset) = @{$dsa->store($dset)} }; throw("Coudn't store data set\' $set_name\': $!") if ($@); } else { warn("Data set with name $set_name already exists."); # need to check whether ExperimentalSets and supporting_sets are the same and
# possibly add ExperimentalSet to supporting_sets
my $ssets = $dset->get_supporting_sets(); my %ssets_dbIDs = (); map { $ssets_dbIDs{$_->dbID}='' } (@{$ssets}); $dset->add_supporting_sets([ $eset ]) if (! exists $ssets_dbIDs{$eset->dbID}); } # save DataSet
$self->data_set($dset);
}
fetch_inputdescriptionprevnextTop
sub fetch_input {
    
    my ($self) = @_;
    #warn ("Bio::EnsEMBL::Analysis::RunnableDB::Funcgen::SWEmbl::fetch_input");
# make sure the cache directory exists
my $cachedir = $self->ANALYSIS_WORK_DIR.'/cache'; if ( ! -d $cachedir) { my $retval = system("mkdir -p $cachedir"); throw("Couldn't create cache directory $cachedir") unless ($retval == 0); } warn('infile: '.$self->query); ### Checking for gzip and bed format has been moved to create_input_id stage!!!
#my $open = ($self->is_gzip($self->query))? 'gzip -dc' : 'cat';
#my $cmd = $open.' '.$self->query.' |';
#warn $cmd;
### bed file format ?
#open(INFILE, "$cmd")
# or throw("Can't open gzipped infile ".$self->query);
#while (<INFILE>) {
#
# my @line = split("\t");
# throw("Infile does not have 6 or more columns. We expect bed format: CHROM START END NAME SCORE STRAND.")
# if scalar @line < 6;
#
# throw ("1st column must contain name of seq_region (e.g. chr1 or 1)")
# unless ($line[0] =~ m/^(chr[MTXY\d]+)$/);
# throw ("2nd and 3rd column must contain start and end respectively")
# unless ($line[1] =~ m/^\d+$/ && $line[2] =~ m/^\d+$/);
# throw ("6th column must define strand (either '+' or '-')")
# unless ($line[5] =~ m/^[+-]$/);
# last;
#}
#close INFILE;
(my $cachefile = $self->query) =~ s,.*/([^/]+)$,$1,; my $datfile = $cachedir.'/'.$cachefile; warn('datafile: '.$datfile); unless (-e $datfile) { # dat file already dumped sorted; skipping dump
# make sure reads are dumped in sorted order
my $sort = "gzip -dc ".$self->query. " | sort -k1,1 -k2,2n -k3,3n | gzip -c > $datfile |"; warn("Executing $sort"); open(SORT, "$sort") or throw("Can't open and sort gzipped file ".$self->query); while (<SORT>) {warn($_)} close SORT; } $self->query($datfile); my %parameters_hash = %{$self->parameters_hash($self->efg_analysis->parameters)}; my $runnable = 'Bio::EnsEMBL::Analysis::Runnable::Funcgen::' .$self->efg_analysis->module; $runnable = $runnable->new ( -query => $self->query, -program => $self->efg_analysis->program_file, -analysis => $self->efg_analysis, -workdir => $self->ANALYSIS_WORK_DIR, %parameters_hash ); $self->runnable($runnable); return 1;
}
newdescriptionprevnextTop
sub new {
    print "Analysis::RunnableDB::Funcgen::SWEmbl::new\n";
    my ($class,@args) = @_;
    my $self = $class->SUPER::new(@args);

    $self->read_and_check_config($CONFIG);

    # make sure we have the correct analysis object
$self->check_Analysis(); # make sure we can store the correct feature_set, data_sets, and result_sets
$self->check_Sets(); return $self;
}
querydescriptionprevnextTop
sub query {
  my $self = shift;
  $self->{'query'} = shift if(@_);

  throw("file ".$self->{'query'}. " doesn't exist") 
      unless ( -e $self->{'query'});

  return $self->{'query'};
}
write_outputdescriptionprevnextTop
sub write_output {
    
    print "Bio::EnsEMBL::Analysis::RunnableDB::Funcgen::SWEmbl::write_output\n";
    my ($self) = @_;

    if (scalar(@{$self->output}) == 0) {
        warn("No features to annotate on slice ".$self->query.
             " for experiment ".$self->experiment->name()."!");
        return 1;
    }

    # store analysis, feature set and data set
# analysis was alredy been stored while checking config in read_and_check_config
#$self->adaptor('Analysis')->store($self->analysis());
if (! defined $self->feature_set->dbID) { $self->efgdb->get_FeatureSetAdaptor->store($self->feature_set()); } if (! defined $self->data_set->dbID) { $self->efgdb->get_DataSetAdaptor->store($self->data_set()); } ### annotated features
my $fset = $self->feature_set; my $fs_id = $fset->dbID(); my $af = $self->efgdb->get_AnnotatedFeatureAdaptor->fetch_all_by_FeatureSets([$fset]); warn('No. of annotated features already stored: '.scalar(@$af).' ('.$self->query.' '.$fset->name.')'); warn('No. of annotated features to be stored: '.scalar(@{$self->output}).' ('.$self->query.')'); if (@$af) { warn("NOT IMPORTING ".scalar(@{$self->output})." annotated features! File ". $self->query." already has been processed; contains ".scalar(@$af). " annotated features of feature set ".$fset->dbID."."); return 1; } else { my @af; my $sa = $self->efgdb->get_SliceAdaptor(); my %slice; foreach my $ft (@{$self->output}){ #print Dumper $ft;
my ($seqid, $start, $end, $score) = @{$ft}; #print Dumper ($seqid, $start, $end, $score);
# skip mito calls
next if ($seqid =~ m/^M/);
unless (exists $slice{"$seqid"}) { $slice{"$seqid"} = $sa->fetch_by_region('chromosome', $seqid); } my $af = Bio::EnsEMBL::Funcgen::AnnotatedFeature->new ( -slice => $slice{"$seqid"}, -start => $start, -end => $end, -strand => 0, -display_label => $self->efg_analysis->logic_name, -score => $score, -feature_set => $fset, ); push(@af, $af); } $self->efgdb->get_AnnotatedFeatureAdaptor->store(@af); } return 1; } 1;
}
General documentation
AUTHORTop
Stefan Graf, Ensembl Functional Genomics - /
CONTACTTop
Post questions to the Ensembl development list: ensembl-dev@ebi.ac.uk