Raw content of Bio::EnsEMBL::Analysis::Runnable::Funcgen::Splitter
# Ensembl module for Bio::EnsEMBL::Analysis::Runnable::Funcgen::Splitter
#
# Copyright (c) 2007 Ensembl
#
=head1 NAME
Bio::EnsEMBL::Analysis::Runnable::Funcgen::Splitter
=head1 SYNOPSIS
my $runnable = Bio::EnsEMBL::Analysis::Runnable::Funcgen::Splitter->new
(
-analysis => $analysis,
-query => 'slice',
-program => 'program.pl',
);
$runnable->run;
my @features = @{$runnable->output};
=head1 DESCRIPTION
Splitter expects to run the command line version of the Splitter program
(http://zlab.bu.edu/splitter) and predicts features which can be stored in
the annotated_feature table in the eFG database
=head1 LICENCE
This code is distributed under an Apache style licence. Please see
/info/about/code_licence.html for details.
=head1 AUTHOR
Stefan Graf, Ensembl Functional Genomics -
=head1 CONTACT
Post questions to the Ensembl development list: ensembl-dev@ebi.ac.uk
=cut
package Bio::EnsEMBL::Analysis::Runnable::Funcgen::Splitter;
use strict;
use warnings;
use Data::Dumper;
use Bio::EnsEMBL::Analysis::Runnable;
use Bio::EnsEMBL::Analysis::Runnable::Funcgen;
use Bio::EnsEMBL::Utils::Exception qw(throw warning);
use Bio::EnsEMBL::Utils::Argument qw( rearrange );
use vars qw(@ISA);
@ISA = qw(Bio::EnsEMBL::Analysis::Runnable::Funcgen);
=head2 write_infile
Arg [1] : Bio::EnsEMBL::Analysis::Runnable::Splitter
Arg [2] : filename
Description :
Returntype :
Exceptions :
Example :
=cut
sub write_infile {
my ($self, $filename) = @_;
if (! $filename) {
$filename = $self->infile();
}
# everything needs to happen in a particular subdirectory of the workdir,
# filenames are set by the Splitter program
(my $workdir = $filename) =~ s/\.dat$//;
eval { system("mkdir -p $workdir") };
throw ("Couldn't create directory $workdir: $@") if ($@);
$self->workdir($workdir);
$filename =~ s,.+/,,;
# determine both number of result sets (replicates) and features
my $noa = scalar(keys %{$self->result_features});
warn('no. of result sets: ', $noa);
my $nof = scalar(@{(values %{$self->result_features})[0]});
warn('no. of result fts: ', $nof);
# dump features
open(F, "> $workdir/$filename")
or throw("Can't open file $workdir/$filename.");
#print Dumper $self->result_features;
for (my $i=0; $i<$nof; $i++) {
my $coord=0;
foreach my $rset (values %{$self->result_features}) {
printf F "chr%s\t%d\t%d\t", ${$rset}[$i][0], ${$rset}[$i][1], ${$rset}[$i][2] unless ($coord);
$coord=1;
print F "\t".${$rset}[$i][3];
}
print F "\n";
}
close F;
$self->infile($workdir.'/'.$filename);
return "$workdir/$filename";
}
=head2 run_analysis
Arg [1] : Bio::EnsEMBL::Analysis::Runnable::Splitter
Arg [2] : string, program name
Usage :
Description :
Returns :
Exceptions :
=cut
### step 1
# ./splitter.cgi "id=SplitterJob.$(gdate -I)" 'submit=Step by step' 'signalfile=samplesplittersignal' 'sorted=yes' 'norm=qnorm' 'combine=median' 'splitterstep=10' 'splitterratio=2' 'cutoff=2.5sd' 'maxgap=100' 'minrun=5'
### step 2
# ./splitter.cgi "id=SplitterJob.$(gdate -I)" 'submit=Step 2' 'sorted=yes' 'norm=none' 'combine=median'
### step 3
# ./splitter.cgi "id=SplitterJob.$(gdate -I)" 'submit=Step 3' 'last=pipeline.median' 'splitterfrom=0.0149' 'splitterto=0.4884' 'splitterstep=10' 'splitterratio=2' 'cutoff=2.5sd' 'custom=0.4884' '2.5sd=0.4884' '1pct=0.395' '5pct=0.25' '2sd=0.3937' 'maxgap=100' 'minrun=5'
### One shot (all in one)
# ./splitter.cgi "id=SplitterJob.TEST" 'submit=One shot' 'signalfile=samplesplittersignal' 'sorted=yes' 'norm=none' 'combine=median' 'splitterstep=10' 'splitterratio=2' 'cutoff=2.5sd' 'maxgap=100' 'minrun=5'
#available Splitter parameters (for details see http://zlab.bu.edu/splitter)
# -Input already sorted by genomic positions of probes
# 'sorted=yes|no'
# -Perform normalization between replicates
# 'norm=none|qnorm|rnorm'
# -Combine replicates
# 'combine=mean|median'
# -Signal cutoff for the combined intensities>=
# 'cutoff=splitter|5pct|1pct|2sd|2.5sd'
# 'splitterfrom='
# 'splitterto='
# 'splitterstep=10'
# 'splitterratio=2'
# -Determine how the probes are clustered
# Gap (Maxgap) <= n base pairs
# 'maxgap=100'
# Clustering (Minrun) >= n probes
# 'minrun=4'
sub run_analysis {
my ($self, $program) = @_;
if(!$program){
$program = $self->program;
}
throw($program." is not executable Splitter::run_analysis ")
unless($program && -x $program);
(my $id = $self->workdir) =~ s,^.+/,,;
my $command = $self->program." \'id=$id\' \'submit=One shot\' \'signalfile=".$self->infile."\' " .
$self->analysis->parameters;
warn("Running analysis " . $command . "\n");
eval { system( $command ) };
throw("FAILED to run $command: ", $@) if ($@);
opendir(DIR, $self->workdir())
or throw("Can't open workdir ".$self->workdir());
my @resultfiles = grep { /\.scored\.txt$/ } readdir DIR;
closedir(DIR);
throw("More than one resultfile in ".$self->workdir())
if (scalar(@resultfiles) > 1);
# set resultsfile
$self->resultsfile($self->workdir.'/'.$resultfiles[0]);
}
=head2 infile
Arg [1] : Bio::EnsEMBL::Analysis::Runnable::Splitter
Arg [2] : filename (string)
Description : will hold a given filename or if one is requested but none
defined it will use the create_filename method to create a filename
Returntype : string, filename
Exceptions : none
Example :
=cut
sub infile{
my ($self, $filename) = @_;
if($filename){
$self->{'infile'} = $filename;
}
if(!$self->{'infile'}){
$self->{'infile'} = $self->create_filename($self->analysis->logic_name, 'dat');
}
return $self->{'infile'};
}
=head2 parse_results
Arg [1] : Bio::EnsEMBL::Analysis::Runnable::Chipotle
Arg [2] : filename (string)
Decription : open and parse resultsfile
Returntype : none
Exceptions : throws
Example :
=cut
sub parse_results{
my ($self, $resultsfile) = @_;
if (! defined $resultsfile) {
$resultsfile = $self->resultsfile;
}
throw ('No resultsfile defined in instance!')
if(! defined $resultsfile);
throw("parse_results: results file ".$resultsfile." does not exist.")
if (! -e $resultsfile);
throw("parse_results: can't open file ".$resultsfile.": ". $!)
unless (open(F, $resultsfile));
my @output = ();
while () {
next unless (s/^chr//);
chomp;
my @ft = split;
my $coord = shift(@ft);
my @coord = split(/[:-]/, $coord);
push(@output, [ @coord, $ft[0] ]);
}
throw("No features to annotate on slice ".$self->query->seq_region_name."!") if (scalar(@output) == 0);
$self->output(\@output);
#print Dumper $self->output();
throw("parse_results: can't close file ".$resultsfile.".")
unless (close(F));
}
1;