Raw content of Bio::EnsEMBL::Analysis::Runnable::FirstEF
# Ensembl module for Bio::EnsEMBL::Analysis::Runnable::FirstEF
#
# Copyright (c) 2004 Ensembl
#
=head1 NAME
Bio::EnsEMBL::Analysis::Runnable::FirstEF
=head1 SYNOPSIS
my $runnable = Bio::EnsEMBL::Analysis::Runnable::FirstEF->new
(
-query => $slice,
-program => 'cpg',
);
$runnable->run;
my @simple_features = @{$runnable->output};
=head1 DESCRIPTION
FirstEF expects to run the program FirstEF and produces SimpleFeature which
can be stored in the simple_feature table in the core database
=head1 CONTACT
Post questions to the Ensembl development list: ensembl-dev@ebi.ac.uk
=cut
package Bio::EnsEMBL::Analysis::Runnable::FirstEF;
use strict;
use warnings;
use Bio::EnsEMBL::Analysis::Runnable;
use Bio::EnsEMBL::Utils::Exception qw(throw warning);
use Bio::EnsEMBL::Utils::Argument qw( rearrange );
use vars qw(@ISA);
@ISA = qw(Bio::EnsEMBL::Analysis::Runnable);
=head2 new
Arg [1] : Bio::EnsEMBL::Analysis::Runnable::FirstEF
Arg [2] : string, path to parameters directory
Arg [3] : string, path to parse script
Function : create a Bio::EnsEMBL::Analysis::Runnable::FirstEF
Returntype: Bio::EnsEMBL::Analysis::Runnable::FirstEF
Exceptions:
Example :
=cut
sub new {
my ($class,@args) = @_;
my $self = $class->SUPER::new(@args);
my($param_dir, $parse_script) = rearrange(["PARAM_DIR", "PARSE_SCRIPT"], @args);
##################
#SETTING DEFAULTS#
##################
$self->program('firstef') if(!$self->program);
##################
$self->param_dir($param_dir) if($param_dir);
$self->parse_script($parse_script) if($parse_script);
return $self;
}
##container methods
=head2 parsed_output
Arg [1] : Bio::EnsEMBL::Analysis::Runnable::FirstEF
Arg [2] : string, file path
Function : container for filename, if none is passed in one is generated
the first time it is requested
Returntype: string
Exceptions:
Example :
=cut
sub parsed_output{
my ($self, $file) = @_;
if($file){
$self->files_to_delete($file);
$self->{'parsed_output'} = $file;
}
if(!$self->{'parsed_output'}){
my $file = $self->create_filename('first_parse');
$self->files_to_delete($file);
$self->{'parsed_output'} = $file;
}
return $self->{'parsed_output'};
}
=head2 listfile
Arg [1] : Bio::EnsEMBL::Analysis::Runnable::FirstEF
Arg [2] : string, filename
Function : this is a container for the listfile required by firstef
If no filename is passed in or the file doesnt already exist the
file is prepared
Returntype: string, filename
Exceptions:
Example :
=cut
sub listfile{
my ($self, $listfile) = @_;
if($listfile){
$self->{'listfile'} = $listfile;
$self->files_to_delete($listfile);
}
if(!$self->{'listfile'} || ! -e $self->{'listfile'}){
my $file = $self->prepare_listfile($self->{'listfile'});
$self->{'listfile'} = $file;
}
return $self->{'listfile'};
}
=head2 parase_script
Arg [1] : Bio::EnsEMBL::Analysis::Runnable::FirstEF
Arg [2] : string, path to script
Function : container for path to script
Returntype: string
Exceptions: throws if script doesnt exist
Example :
=cut
sub parse_script{
my ($self, $script) = @_;
if($script){
throw($script." does not exist can't use FirstEF:parse_script")
unless(-e $script);
$self->{'parse_script'} = $script;
}
return $self->{'parse_script'};
}
=head2 param_dir
Arg [1] : Bio::EnsEMBL::Analysis::Runnable::FirstEF
Arg [2] : string, directory path
Function : stores the parameters directory and checks all the files
which should exist in it do
Returntype: string
Exceptions: throws if any of the specified files dont exist
Example :
=cut
sub param_dir {
my $self = shift;
if (@_) {
$self->{'param_dir'} = shift;
$self->{'param_dir'} .= '/' unless $self->{'param_dir'} =~ /\/$/;
if($self->{'param_dir'}){
my @known_param_files = ('donor.3mer_wts_GChighDown',
'donor.3mer_wts_GClowDown',
'donor.6mer_wts_GChighDown',
'donor.6mer_wts_GChighUp',
'donor.6mer_wts_GClowDown',
'donor.6mer_wts_GClowUp',
'donor.decisiontree',
'donor.decisiontree.orig',
'donor.qdamodel.GChigh',
'donor.qdamodel.GClow',
'exon.qdamodel.CpGpoor_GChigh',
'exon.qdamodel.CpGpoor_GClow',
'exon.qdamodel.CpGrich_GChigh',
'exon.qdamodel.CpGrich_GClow',
'promoter.5mer_wts_CpGpoor_430.510',
'promoter.5mer_wts_CpGpoor_490.570',
'promoter.5mer_wts_CpGrich_430.510',
'promoter.5mer_wts_CpGrich_490.570',
'promoter.6mer_wts_CpGpoor_1.250',
'promoter.6mer_wts_CpGpoor_1.450',
'promoter.6mer_wts_CpGpoor_200.450',
'promoter.6mer_wts_CpGrich_1.250',
'promoter.6mer_wts_CpGrich_1.450',
'promoter.6mer_wts_CpGrich_200.450',
'promoter.qdamodel.CpGpoor',
'promoter.qdamodel.CpGrich');
my @missing_files;
foreach my $param_file (@known_param_files) {
unless (-e $self->{'param_dir'} . "/$param_file"){
push (@missing_files, $self->{'param_dir'}."/$param_file");
}
}
if(@missing_files > 0){
print STDERR join("\n", @missing_files);
throw("The above parameter files are missing.");
}
}
}
return $self->{'param_dir'}
}
=head2 prepare_listfile
Arg [1] : Bio::EnsEMBL::Analysis::Runnable::FirstEF
Arg [2] : string, filename
Function : createa a filename or uses the one passed in
opens this file and write a string, queryfilename -1500
and then closes the file and adds it plus a couple of other names
to the files to delete list
Returntype: string
Exceptions: throws if fails to open or close file
Example :
=cut
sub prepare_listfile{
my ($self, $listfile) = @_;
if(!$listfile){
$listfile = $self->create_filename('firstef_listfile_', '',
$self->workdir);
open(LISTFILE, ">$listfile") or throw("FAILED to open $listfile ".
"FirstEF:prepare_listfile");
print LISTFILE $self->queryfile." -1500\n";
close(LISTFILE) or throw("FAILED to close $listfile ".
"FirstEF:prepare_listfile");
$self->files_to_delete($listfile);
$self->files_to_delete($listfile."_domain");
$self->files_to_delete($listfile."_domain_comp");
$self->resultsfile($listfile."_out");
$self->files_to_delete($self->resultsfile);
}
return $listfile;
}
=head2 run_analysis
Arg [1] : Bio::EnsEMBL::Analysis::Runnable::FirstEF
Arg [2] : string, program name
Function : constructs commandline and runs commandline
Returntype:
Exceptions: throws if system doesnt return a 0
Example :
=cut
sub run_analysis{
my ($self, $program) = @_;
if(!$program){
$program = $self->program;
}
my $command = $program." 1500 ".$self->listfile." ".$self->param_dir.
" 0 0.4 0.4 0.5";
print "Running analysis ".$command."\n";
system($command) == 0 or throw("FAILED to run ".$command);
}
=head2 parse_results
Arg [1] : Bio::EnsEMBL::Analysis::Runnable::FirstEF
Arg [2] : string, results filename
Function : parses results, initially using an external script which
also does some statistics and then using a simple regex to produce
simple features
Returntype: none
Exceptions: throws if fails to open or close first parses output file
Example :
=cut
sub parse_results{
my ($self, $results) = @_;
if(!$results){
$results = $self->resultsfile;
}
my $ff = $self->feature_factory;
my @output;
my $output = $self->first_parse($results);
throw("FAILED to run ".$self->parse_script." ".$output." doesn't exist")
unless(-e $output);
open(FH, $output) or throw("FAILED to open ".$output);
my $strand;
LINE:while(){
chomp;
$strand = 1 if (/direct strand/);
$strand = -1 if (/complementary strand/);
if (/\d+\s+\S+\s+\S+([^\.]+)\.\.(\d+)\s+(\S+)\s+\S+\s+\S+\s+(\d+)/) {
my ($start, $end) = sort {$a <=> $b} ($1 * 1 , $2 * 1);
my $sf = $ff->create_simple_feature($start, $end, $strand, $3,
"rank = $4", '', $self->query);
push(@output, $sf);
}
}
$self->output(\@output);
}
=head2 first_parse
Arg [1] : Bio::EnsEMBL::Analysis::Runnable::FirstEF
Arg [2] : string, a results filename
Function : runs the parsing script across the initial firstef output
Returntype: string, filename
Exceptions: throws if script doesnt return 0
Example :
=cut
sub first_parse{
my ($self, $results) = @_;
if(!$results){
$results = $self->resultsfile;
}
my $command = $self->parse_script." ".$results." ".$self->parsed_output;
print "Running analysis ".$command."\n";
system($command) == 0 or throw("FAILED to run ".$command);
return $self->parsed_output;
}
1;