Raw content of Bio::EnsEMBL::Analysis::Runnable::Dust
# Ensembl module for Bio::EnsEMBL::Analysis::Runnable::Dust
#
# Copyright (c) 2004 Ensembl
#
=head1 NAME
Bio::EnsEMBL::Analysis::Runnable::Dust
=head1 SYNOPSIS
my $dust = Bio::EnsEMBL::Analysis::Runnable::Dust->
new(
-query => $slice,
-program => 'tcdust',
);
$dust->run;
my @repeats = @{$dust->output};
=head1 DESCRIPTION
Dust is a wrapper for the tcdust program which runs the dust algorithm
to identify and mask simple repeats
=head1 CONTACT
Post questions to the Ensembl development list: ensembl-dev@ebi.ac.uk
=cut
package Bio::EnsEMBL::Analysis::Runnable::Dust;
use strict;
use warnings;
use Bio::EnsEMBL::Analysis::Runnable;
use Bio::EnsEMBL::Utils::Exception qw(throw warning);
use Bio::EnsEMBL::Utils::Argument qw( rearrange );
use vars qw(@ISA);
@ISA = qw(Bio::EnsEMBL::Analysis::Runnable);
=head2 new
Arg [1] : Bio::EnsEMBL::Analysis::Runnable::Dust
Arg [2] : int, masking threshold
Arg [3] : int, word size
Arg [4] : int, window size
Function : create a new Bio::EnsEMBL::Analysis::Runnable::Dust
Returntype: Bio::EnsEMBL::Analysis::Runnable::Dust
Exceptions:
Example :
=cut
sub new {
my ($class,@args) = @_;
my $self = $class->SUPER::new(@args);
my ($level, $word_size, $window_size) = rearrange(['MASKING_THRESHOLD',
'WORD_SIZE',
'WINDOW_SIZE',
], @args);
##################
#SETTING DEFAULTS#
##################
$self->program('tcdust') if(!$self->program);
##################
$self->level($level) if($level);
$self->word_size($word_size) if($word_size);
$self->window_size($window_size) if($window_size);
return $self;
}
#container methods
=head2 containers
Arg [1] : Bio::EnsEMBL::Analysis::Runnable::Dust
Arg [2] : int for specified value (for more info see tcdust)
Function : container for specified variable. This pod refers to the
three methods below level, window size and word size. These are simple
containers which dont do more than hold and return an given value
nothing is defined
Returntype: int
Exceptions:
Example :
=cut
sub level{
my $self = shift;
$self->{'min_length'} = shift if(@_);
return $self->{'min_length'};
}
sub window_size{
my $self = shift;
$self->{'min_gc_content'} = shift if(@_);
return $self->{'min_gc_content'};
}
sub word_size{
my $self = shift;
$self->{'min_oe'} = shift if(@_);
return $self->{'min_oe'};
}
#utility methods
=head2 run_analysis
Arg [1] : Bio::EnsEMBL::Analysis::Runnable::Dust
Arg [2] : string, program name
Function : constructs a commandline and runs the program passed
in, the generic method in Runnable isnt used as Dust doesnt
fit this module
Returntype: none
Exceptions: throws if run failed because system doesnt
return 0
Example :
=cut
sub run_analysis{
my ($self, $program) = @_;
if(!$program){
$program = $self->program;
}
throw($program." is not executable Dust::run_analysis ")
unless($program && -x $program);
my $command = $self->program;
$command .= " -l ".$self->level if($self->level);
$command .= " -k ".$self->word_size if($self->word_size);
$command .= " -w ".$self->window_size if($self->window_size);
$command .= " -x ".$self->queryfile." > ".$self->resultsfile;
print "Running analysis ".$command."\n";
system($command) == 0 or throw("FAILED to run ".$command);
}
=head2 parse_results
Arg [1] : Bio::EnsEMBL::Analysis::Runnable::Dust
Arg [2] : string, filename
Function : open and parse the results file into repeat
features
Returntype: none
Exceptions: throws on failure to open or close output file
Example :
=cut
sub parse_results{
my ($self, $results) = @_;
if(!$results){
$results = $self->resultsfile;
}
my $ff = $self->feature_factory;
my @output;
open(DUST, $results) or throw("FAILED to open ".$results);
LINE:while(){
chomp;
next LINE if(/^>/);
if (/(\d+)\.\.(\d+)/) {
my ($start, $end) = ($1, $2);
$start++;
$end++;
my $rc = $ff->create_repeat_consensus('dust', 'dust', 'simple', 'N');
my $rf = $ff->create_repeat_feature($start, $end, 0, 0, $start,
$end, $rc, $self->query->name,
$self->query);
push(@output, $rf);
}
}
$self->output(\@output);
close(DUST) or throw("FAILED to close ".$results);
}