Raw content of Bio::EnsEMBL::Analysis::Runnable::Funcgen::TileMap
# Ensembl module for Bio::EnsEMBL::Analysis::Runnable::Funcgen::TileMap
#
# Copyright (c) 2007 Ensembl
#
=head1 NAME
Bio::EnsEMBL::Analysis::Runnable::Funcgen::TileMap
=head1 SYNOPSIS
my $runnable = Bio::EnsEMBL::Analysis::Runnable::Funcgen::TileMap->new
(
-analysis => $analysis,
-query => 'slice',
-program => 'program.pl',
);
$runnable->run;
my @features = @{$runnable->output};
=head1 DESCRIPTION
TileMap expects to run the program TileMap (Ji and Wong (2005), PMID: 16046496)
and predicts features which can be stored in the annotated_feature table
in the eFG database
=head1 LICENCE
This code is distributed under an Apache style licence. Please see
/info/about/code_licence.html for details.
=head1 AUTHOR
Stefan Graf, Ensembl Functional Genomics -
=head1 CONTACT
Post questions to the Ensembl development list: ensembl-dev@ebi.ac.uk
=cut
package Bio::EnsEMBL::Analysis::Runnable::Funcgen::TileMap;
use strict;
use warnings;
use Data::Dumper;
use Bio::EnsEMBL::Analysis::Runnable;
use Bio::EnsEMBL::Analysis::Runnable::Funcgen;
use Bio::EnsEMBL::Utils::Exception qw(throw warning);
use Bio::EnsEMBL::Utils::Argument qw( rearrange );
use vars qw(@ISA);
@ISA = qw(Bio::EnsEMBL::Analysis::Runnable::Funcgen);
=head2 write_infile
Arg [1] : Bio::EnsEMBL::Analysis::Runnable::TileMap
Arg [2] : filename
Description :
Returntype :
Exceptions :
Example :
=cut
sub write_infile {
my ($self, $filename) = @_;
if (! $filename) {
$filename = $self->infile();
}
# determine both number of result sets (replicates/arrays) and features
my $noa = scalar(keys %{$self->result_features});
warn("\tNo. of result sets: ". $noa);
my $nof = scalar(@{(values %{$self->result_features})[0]});
warn("\tNo. of result features: ". $nof);
# dump features
my $header = join("\t", 'chromosome', 'position', keys %{$self->result_features});
$header .= "\tDUMMY" if ($noa == 1);
open(F, ">".$filename)
or throw("Can't open file $filename.");
print F $header, "\n";
#print Dumper $self->result_features;
for (my $i=0; $i<$nof; $i++) {
my $coord=0;
foreach my $rset (values %{$self->result_features}) {
print F join("\t", ${$rset}[$i][0], ${$rset}[$i][1]) unless ($coord);
$coord=1;
#why do we have to invert the score here???
print F "\t".(-1*${$rset}[$i][3]);
}
print F "\t0" if ($noa == 1);
print F "\n";
}
close F;
warn("\tresult features written to ".$filename);
my $workdir = $self->workdir.'/'.$self->analysis->module();
#warn("\tworkdir: ".$workdir);
# write config and cmpinfo files from templates
(my $project = $filename) =~ s,.+/(.+)\.dat,$1,;
# program parameters
warn('ANALYSIS PARAMETERS: '.$self->analysis->parameters);
my %parameters = ();
map {
my ($key, $value) = split (/=/);
$parameters{$key} = $value;
} split(/;\s+/, $self->analysis->parameters);
#print Dumper %parameters;
my $config = $workdir.'/'.$project.'_args.txt';
#print Dumper $config;
$self->config_file($config);
my $template_file = $parameters{TEMPLATE_FILE};
open(IN, $template_file)
or throw("Can't open config file $template_file");
open(OUT, ">$config")
or throw("Can't open config file $config");
map {
s,^(O.1-.+=).+$,$1 $workdir,; #[Working directory]
s,^(O.2-.+=).+$,$1 $project,; #[Project Title]
s,^(I.2-.+=).+$,$1 $project.dat,; #[Raw data file]
s,^(I.3-.+=).+$,$1 2,; #[Range of test-statistics] (0: default; 1: [0,1], 2: (-inf, +inf))
s,^(II.1-.+=).+$,$1 0,; #[Apply local repeat filter?] (0:No; 1:Yes)
s,^(II.2-.+=).+$,$1 NULL,; #[*.refmask file]
s,^(III.2-.+=).+$,$1 $parameters{METHOD},; #[Method to combine neighboring probes] (0:HMM, 1:MA)
s,^(IV.1-.+=).+$,$1 $parameters{POSTPROB},; #[Posterior probability >]
s,^(IV.2-.+=).+$,$1 $parameters{MAXGAP},; #[Maximal gap allowed] (1000: default)
s,^(IV.4-.+=).+$,$1 0,; #[Provide your own selection statistics?] (0: No, use default; 1: Yes)
s,^(IV.5-.+=).+$,$1 NULL,; #[If Yes to IV.4, selection statistics file]
s,^(IV.10-.+=).+$,$1 $parameters{HYBLENGTH},; #[Expected hybridization length]
s,^(V.2-.+=).+$,$1 $parameters{MAXGAP},; #[Maximal gap allowed] (500: default)
print OUT;
} ;
close IN;
close OUT;
my $cmpinfo = $workdir.'/'.$project.'.cmpinfo';
warn("cmp info file: $cmpinfo");
open(CMP, ">$cmpinfo")
or throw("Can't open cmpinfo file $cmpinfo");
my $array_no = ($noa == 1)? '2' : $noa;
my $group_no = ($noa == 1)? '2' : 1;
my $groups = ($noa == 1)? '1 2' : '1 'x $noa;
warn("array_no: " . $array_no);
warn("group_no: " . $group_no);
warn("groups: " . $groups);
print CMP <2
##############################
# Preprocessing #
##############################
[Truncation lower bound] = -1000000000000.0
[Take log2 before calculation?] (1:yes; 0:no) = 0
##############################
# Simulation Setup #
##############################
[Monte Carlo draws for posterior prob.] = 0
##############################
# Common Variance Groups #
##############################
[Common variance groups] = 1
$groups
##############################
# Permutation Setup #
##############################
[Number of permutations] = 0
[Exchangeable groups] = 1
$groups
EOCMP
close OUT;
#system("cp $cmpinfo_template $cmpinfo");
my $ext;
if ($parameters{METHOD}) {
$ext = '_ma';
} else {
$ext = '_hmm';
}
# UCSC *.bed file to report significant regions.
# Regions are sorted according to their genomic locations.
my $results = $workdir.'/'.$project.$ext.'.bed';
# This .reg file is a tab-delimited file to report significant regions.
# Regions are ranked according to their significance levels.
#my $results = $workdir.'/'.$project.$ext.'.reg';
$self->resultsfile($results);
# set columns (fields) for output
my @fields = (0..2,4); # bed
#my @fields = (0..2,6); # reg
$self->output_fields(\@fields);
return $filename;
}
=head2 run_analysis
Arg [1] : Bio::EnsEMBL::Analysis::Runnable::TileMap
Arg [2] : string, program name
Usage :
Description :
Returns :
Exceptions :
=cut
sub run_analysis {
my ($self, $program) = @_;
if(!$program){
$program = $self->program;
}
throw($program." is not executable TileMap::run_analysis ")
unless($program && -x $program);
my $command = $self->program . ' ' . $self->config_file();
warn("Running analysis " . $command . "\n");
system($command) == 0
or throw("FAILED to run $command: ".$?);
}
#=head2 infile
#
# Arg [1] : Bio::EnsEMBL::Analysis::Runnable::TileMap
# Arg [2] : filename (string)
# Description : will hold a given filename or if one is requested but none
# defined it will use the create_filename method to create a filename
# Returntype : string, filename
# Exceptions : none
# Example :
#
#=cut
#
#
#sub infile{
#
# my ($self, $filename) = @_;
#
# if($filename){
# $self->{'infile'} = $filename;
# }
# if(!$self->{'infile'}){
# $self->{'infile'} = $self->create_filename($self->analysis->logic_name, 'dat');
# }
#
# return $self->{'infile'};
#
#}
sub config_file {
my $self = shift;
$self->{'config_file'} = shift if(@_);
return $self->{'config_file'};
}
1;