Raw content of Bio::EnsEMBL::Analysis::Runnable::TRF
# Ensembl module for Bio::EnsEMBL::Analysis::Runnable::TRF
#
# Copyright (c) 2004 Ensembl
#
=head1 NAME
Bio::EnsEMBL::Analysis::Runnable::TRF
=head1 SYNOPSIS
my $runnable = Bio::EnsEMBL::Analysis::Runnable::TRF->
new(
-query => $slice,
-program => 'trf',
);
$runnable->run;
my @repeats = @{$runnable->output};
=head1 DESCRIPTION
TRF expects to run the program trf
and produce RepeatFeatures which can be stored in the repeat_feature
and repeat_consensus tables in the core database
=head1 CONTACT
Post questions to the Ensembl development list: ensembl-dev@ebi.ac.uk
=cut
package Bio::EnsEMBL::Analysis::Runnable::TRF;
use strict;
use warnings;
use Bio::EnsEMBL::Analysis::Runnable;
use Bio::EnsEMBL::Utils::Exception qw(throw warning);
use Bio::EnsEMBL::Utils::Argument qw( rearrange );
use vars qw(@ISA);
@ISA = qw(Bio::EnsEMBL::Analysis::Runnable);
=head2 new
Arg [1] : Bio::EnsEMBL::Analysis::Runnable::TRF
Arg [2] : int, match
Arg [3] : int, mismatch
Arg [4] : int, delta
Arg [5] : int, match probablilty
Arg [6] : int, indel probablilty
Arg [7] : int, minimun score
Arg [8] : int, maximun period size
Function : contruct a new Bio::EnsEMBL::Analysis::Runnable::TRF
runnable
Returntype: Bio::EnsEMBL::Analysis::Runnable::TRF
Exceptions: none
Example :
=cut
sub new {
my ($class,@args) = @_;
my $self = $class->SUPER::new(@args);
my ($match, $mismatch, $delta, $pm, $pi, $minscore,
$maxperiod) = rearrange(['MATCH', 'MISMATCH', 'DELTA', 'PM', 'PI',
'MINSCORE', 'MAX_PERIOD'], @args);
######################
#SETTING THE DEFAULTS#
######################
$match = 2 unless(defined $match);
$mismatch = 5 unless(defined $mismatch);
$delta = 7 unless(defined $delta);
$pm = 80 unless(defined $pm);
$pi = 10 unless(defined $pi);
$minscore = 40 unless(defined $minscore);
$maxperiod = 500 unless(defined $maxperiod);
######################
$self->program('trf') if(!$self->program);
$self->match ($match) if defined $match;
$self->mismatch ($mismatch) if defined $mismatch;
$self->delta ($delta) if defined $delta;
$self->pm ($pm) if defined $pm;
$self->pi ($pi) if defined $pi;
$self->minscore ($minscore) if defined $minscore;
$self->maxperiod ($maxperiod) if defined $maxperiod;
return $self;
}
# Please use: trf File Match Mismatch Delta PM PI Minscore MaxPeriod
# [options]
# Where: (all scores are positive)
# File = sequences input file
# Match = matching weight
# Mismatch = mismatching penalty
# Delta = indel penalty
# PM = match probability (whole number)
# PI = indel probability (whole number)
# Minscore = minimum alignment score to report
# MaxPeriod = maximum period size to report
# [options] = one or more of the following :
# -m masked sequence file
# -f flanking sequence
# -h avoid html output
=head2 containers
Arg [1] : Bio::EnsEMBL::Analysis::Runnable::CPG
Arg [2] : int,
Function : container for specified variable. This pod refers to the
7 methods below match, mismatch, delta, pm, pi, minscore and maxperiod.
These are simple containers which dont do more than hold and return an
given value nothing is defined
Returntype: int
Exceptions:
Example :
=cut
sub match {
my ($self, $arg) = @_;
$self->{'_match'} = $arg if defined $arg;
return $self->{'_match'};
}
sub mismatch {
my ($self, $arg) = @_;
$self->{'_mismatch'} = $arg if defined $arg;
return $self->{'_mismatch'};
}
sub delta {
my ($self, $arg) = @_;
$self->{'_delta'} = $arg if defined $arg;
return $self->{'_delta'};
}
sub pm {
my ($self, $arg) = @_;
$self->{'_pm'} = $arg if defined $arg;
return $self->{'_pm'};
}
sub pi {
my ($self, $arg) = @_;
$self->{'_pi'} = $arg if defined $arg;
return $self->{'_pi'};
}
sub minscore {
my ($self, $arg) = @_;
$self->{'_minscore'} = $arg if defined $arg;
return $self->{'_minscore'};
}
sub maxperiod {
my ($self, $arg) = @_;
$self->{'_maxperiod'} = $arg if defined $arg;
return $self->{'_maxperiod'};
}
=head2 run_analysis
Arg [1] : Bio::EnsEMBL::Analysis::Runnable::TRF
Arg [2] : string, program name
Function : create and open a commandline for the program trf
Returntype: none
Exceptions: throws if the program in not executable or if the results
file doesnt exist
Example :
=cut
sub run_analysis{
my ($self, $program) = @_;
if(!$program){
$program = $self->program;
}
my @arg_list = ($self->match, $self->mismatch, $self->delta, $self->pm,
$self->pi, $self->minscore, $self->maxperiod);
$self->options(join(' ', @arg_list));
throw($program." is not executable TRF::run_analysis ")
unless($program && -x $program);
# Use this command if you are using TRF version 4.0.0
my $command = $program." ".$self->queryfile." ".$self->options." -d -h";
# Use this command if your TRF program is previous to the 4.0.0 version
# my $command = $program." ".$self->queryfile." ".$self->options." -d";
print "Running analysis ".$command."\n";
my $exit = system($command);
foreach my $file(glob $self->queryfile."*"){
$self->files_to_delete($file);
}
$self->resultsfile($self->queryfile.".".join('.', @arg_list).".dat");
if(!-e $self->resultsfile){
throw("FAILED to run TRF on ".$self->queryfile." ".
$self->resultsfile." has not been produced ".
"TRF:run_analysis");
}
}
=head2 parse_results
Arg [1] : Bio::EnsEMBL::Analysis::Runnable::TRF
Arg [2] : string, results filename
Function : parse the specifed file and produce RepeatFeatures
Returntype: nine
Exceptions: throws if fails to open or close the results file
Example :
=cut
sub parse_results{
my ($self, $results) = @_;
if(!$results){
$results = $self->resultsfile;
}
my $ff = $self->feature_factory;
open(OUT, "<".$results) or throw("FAILED to open ".$results.
"TRF:parse_results");
my @output;
LINE:while(){
my $seqname;
if (/Sequence: (\w+)/) {
$seqname = $1;
}
next LINE unless (/^\d/); # ignore introductory lines
my ($start, $end, $period_size,$copy_number, $consensus_size,
$percent_matches, $percent_indels, $score, $A, $C, $G, $T,
$entropy, $mer) = split;
if (($score < 50 && $percent_matches >= 80 && $copy_number > 2 && $period_size < 10) || ($copy_number >= 2 && $percent_matches >= 70 && $score >= 50)){
my $rc = $ff->create_repeat_consensus('trf', 'trf', '', $mer);
my $rf = $ff->create_repeat_feature($start, $end, 0, $score, 1,
($end - $start +1), $rc);
push(@output, $rf);
}
}
print "No tandem repeats found" if(@output == 0);
$self->output(\@output);
close(OUT) or throw("FAILED to close ".$results.
"TRF:parse_results");
}
1;