Raw content of Bio::EnsEMBL::Analysis::Runnable::EPCR
# Ensembl module for Bio::EnsEMBL::Analysis::Runnable::EPCR
#
# Copyright (c) 2004 Ensembl
#
=head1 NAME
Bio::EnsEMBL::Analysis::Runnable::EPCR
=head1 SYNOPSIS
my $runnable = Bio::EnsEMBL::Analysis::Runnable::EPCR->new(
-query => $slice,
-program => $self->analysis->dbfile,
%{$self->parameters_hash};
);
$runnable->run;
my @marker_features = @{$runnable->output};
=head1 DESCRIPTION
Wrapper to run EPCR and parse the results into marker features
=head1 CONTACT
Post questions to the Ensembl development list: ensembl-dev@ebi.ac.uk
=cut
package Bio::EnsEMBL::Analysis::Runnable::EPCR;
use strict;
use warnings;
use Bio::EnsEMBL::Analysis::Runnable;
use Bio::EnsEMBL::Utils::Exception qw(throw warning);
use Bio::EnsEMBL::Utils::Argument qw( rearrange );
use vars qw(@ISA);
@ISA = qw(Bio::EnsEMBL::Analysis::Runnable);
=head2 new
Arg [1] : Bio::EnsEMBL::Analysis::Runnable::EPCR
Arg [2] : string, sts file
Arg [3] : arrayref of Bio::EnsEMBL::Map::Markers
Arg [4] : int, margin
Arg [5] : int, word_size
Arg [6] : int, min mismatch
Arg [7] : int, max mismatch
Function : create a Bio::EnsEMBL::Analysis::Runnable::EPCR
Returntype: Bio::EnsEMBL::Analysis::Runnable::EPCR
Exceptions: throws if passed both an sts file and an array of features
Example :
=cut
sub new {
my ($class,@args) = @_;
my $self = $class->SUPER::new(@args);
my ($sts_file, $sts_features, $margin, $word_size, $min_mismatch,
$max_mismatch) = rearrange(['STS_FILE', 'STS_FEATURES', 'M',
'W', 'NMIN', 'NMAX'], @args);
######################
#SETTING THE DEFAULTS#
######################
$self->program('e-PCR') if(!$self->program);
######################
if($sts_file && $sts_features){
throw("Must pass either an STS_FILE $sts_file or an array of ".
"STS_FEATURES $sts_features not both");
}
$self->sts_file($sts_file) if($sts_file);
$self->sts_features($sts_features) if($sts_features);
$self->margin($margin) if($margin);
$self->word_size($word_size) if($word_size);
$self->min_mismatch($min_mismatch) if(defined $min_mismatch);
$self->max_mismatch($max_mismatch) if(defined $max_mismatch);
return $self;
}
#containers
=head2 containers
Arg [1] : Bio::EnsEMBL::Analysis::Runnable::EPCR
Arg [2] : int/string variable
Function : container for the specified variable. This pod
refers the the 4 methods below, margin, word_size, min_mistmatch
and max_mismatch
Returntype: int/string
Exceptions: none
Example :
=cut
sub margin{
my $self = shift;
$self->{'margin'} = shift if(@_);
return $self->{'margin'};
}
sub word_size{
my $self = shift;
$self->{'word_size'} = shift if(@_);
return $self->{'word_size'};
}
sub min_mismatch{
my $self = shift;
$self->{'min_mismatch'} = shift if(@_);
return $self->{'min_mismatch'};
}
sub max_mismatch{
my $self = shift;
$self->{'max_mismatch'} = shift if(@_);
return $self->{'max_mismatch'};
}
=head2 sts_file
Arg [1] : Bio::EnsEMBL::Analysis::Runnable::EPCR
Arg [2] : string, file path
Function : container for sts file path, will use the find file
method for Runnable to locate the file
Returntype: string
Exceptions: none
Example :
=cut
sub sts_file{
my ($self, $file) = @_;
if($file){
my $found = $self->find_file($file);
$self->{'sts_file'} = $found;
}
return $self->{'sts_file'};
}
=head2 sts_features
Arg [1] : Bio::EnsEMBL::Analysis::Runnable::EPCR
Arg [2] : arrayref of Bio::EnsEMBL::Map::Markers
Function : container for arrayref of Bio::EnsEMBL::Map::Markers
Returntype: arrayref
Exceptions: throw if not passed an arrayref or if first element of
array isnt a Bio::EnsEMBL::Map::Marker
Example :
=cut
sub sts_features{
my ($self, $features) = @_;
if($features){
throw("Must pass EPCR sts_features an arrayref not ".$features)
unless(ref($features) eq 'ARRAY');
my $test = $features->[0];
if(!$test || !($test->isa("Bio::EnsEMBL::Map::Marker"))){
my $err = "arrayref ".$features." must contain ".
"Bio::EnsEMBL::Map::Marker";
$err .= " not ".$test if($test);
throw($err);
}
$self->{'sts_features'} = $features;
}
return $self->{'sts_features'};
}
=head2 hit_list
Arg [1] : Bio::EnsEMBL::Analysis::Runnable::EPCR
Arg [2] : int, hit database id
Function : take the hit ids passed and store in a hash
Returntype: hashref
Exceptions: none
Example :
=cut
sub hit_list{
my ($self, $hit) = @_;
if(!$self->{'hit_list'}){
$self->{'hit_list'} = {};
}
if($hit){
$self->{'hit_list'}->{$hit} = $hit;
}
return $self->{'hit_list'};
}
#utility methods
=head2 run
Arg [1] : Bio::EnsEMBL::Analysis::Runnable::EPCR
Arg [2] : string, working directory
Function : coordinates the running and parsing of EPCR
EPCR is run for every mismatch value between the minimum and maximum
Returntype: none
Exceptions: throws if doesnt have a query sequence or if doesnt have
either and sts file or sts features
Example :
=cut
sub run{
my ($self, $dir) = @_;
$self->workdir($dir) if($dir);
throw("Can't run ".$self." without a query sequence")
unless($self->query);
$self->checkdir($dir);
my $filename = $self->write_seq_file();
$self->files_to_delete($filename);
my $mismatch = $self->min_mismatch;
my $sts_file;
while($mismatch <= $self->max_mismatch){
if($self->sts_features){
$sts_file = $self->dump_sts_features($self->sts_features);
}elsif($self->sts_file){
$sts_file = $self->copy_sts_file($self->sts_file);
}else{
throw("Don't have either sts feature or a file");
}
my $results = $self->run_epcr($mismatch, $self->queryfile, $sts_file);
$self->parse_results($results);
$mismatch++;
}
$self->delete_files;
}
=head2 run_epcr
Arg [1] : Bio::EnsEMBL::Analysis::Runnable::EPCR
Arg [2] : int, the mismatch
Arg [3] : string, query sequence filename
Arg [4] : string, sts filename
Function : construct commandline and run epcr
Returntype: string, filename
Exceptions: throws if system call fails
Example :
=cut
sub run_epcr{
my ($self, $mismatch, $query_file, $sts_file) = @_;
my $results = $self->resultsfile;
if(-e $results){
$results .= ".".$mismatch.".results";
}
$mismatch = $self->min_mismatch if(!$mismatch);
$query_file = $self->query_file if(!$query_file);
$sts_file = $self->sts_file if(!$sts_file);
my $options;
$options = " M=".$self->margin if(defined $self->margin);
$options .= " W=".$self->word_size if(defined $self->word_size);
$options .= " N=$mismatch " if(defined $mismatch);
my $command = $self->program." ".$sts_file." ".$query_file." ".
$options." > ".$results;
print "Running analysis ".$command."\n";
system($command) == 0 or throw("FAILED to run ".$command);
$self->files_to_delete($results);
return $results;
}
=head2 dump_sts_features
Arg [1] : Bio::EnsEMBL::Analysis::Runnable::EPCR
Arg [2] : arrayref of Bio::EnsEMBL::Map::Markers
Function : dump the markers to a file in the format expected by epcr
markers who have already been hit or whose max_primer_dist is 0 or
has zero left or right markers are ignored
Returntype:
Exceptions:
Example :
=cut
sub dump_sts_features{
my ($self, $sts_features, $filename) = @_;
if(!$sts_features){
$sts_features = $self->sts_features;
}
my %hit_list = %{$self->hit_list};
if(!$filename){
$filename = $self->create_filename("sts", "out");
}
$self->files_to_delete($filename);
open(OUT, ">".$filename) or throw("FAILED to open $filename");
MARKER:foreach my $m (@$sts_features){
next MARKER if($hit_list{$m->dbID});
next MARKER if($m->max_primer_dist == 0);
next MARKER unless(length($m->left_primer) > 0);
next MARKER unless(length($m->right_primer) > 0);
my $string = $m->dbID."\t".$m->left_primer."\t".
$m->right_primer."\t".$m->min_primer_dist."-".
$m->max_primer_dist."\n";
print OUT $string;
}
close(OUT) or throw("FAILED to close $filename");
return $filename;
}
=head2 copy_sts_file
Arg [1] : Bio::EnsEMBL::Analysis::Runnable::EPCR
Arg [2] : string, sts file
Arg [3] : string, filename for copy
Function : copys entries from one file into another while
skipping the entries already on the hit list
Returntype: filename
Exceptions: throws if given sts file doesnt exist or if any of the
open or close file commands fail
Example :
=cut
sub copy_sts_file{
my ($self, $sts_file, $filename) = @_;
if(!$sts_file){
$sts_file = $self->sts_file;
}
if(! -e $sts_file){
throw("Can't copy file ".$sts_file." which doesn't exist");
}
if(!$filename){
$filename = $self->create_filename("sts", "out");
}
$self->files_to_delete($filename);
my %hit_list = %{$self->hit_list};
if(keys(%hit_list) == 0){
return $sts_file;
}
eval{
open(OUT, ">".$filename) or throw("FAILED to open $filename");
open(IN, $sts_file) or throw("FAILED to open $sts_file");
MARKER:while(){
my $id = (split)[0];
next MARKER if($hit_list{$id});
print OUT;
}
close(OUT) or throw("FAILED to close $filename");
close(IN) or throw("FAILED to close $sts_file");
};
if($@){
throw("FAILED to copy $sts_file $@");
}
return $filename;
}
=head2 parse_results
Arg [1] : Bio::EnsEMBL::Analysis::Runnable::EPCR
Arg [2] : string, filename
Function : parse the file given into Bio:EnsEMBL::Map::MarkerFeatures
Returntype: none
Exceptions: throws if results file doesnt exist or if open and closes
fail
Example :
=cut
sub parse_results{
my ($self, $results) = @_;
if(!$results){
$results = $self->resultsfile;
}
if(!-e $results){
throw("Can't open ".$results." as it doesn't exist");
}
my $ff = $self->feature_factory;
my @output;
open(FH, $results) or throw("FAILED to open ".$results);
while(){
chomp;
#chromosome:NCBI34:1:1:920598:1 615132..615340 121028
#chromosome:NCBI34:1:1:920598:1 622477..622687 121028
my ($name, $start, $end, $dbid) =
$_ =~ m!(\S+)\s+(\d+)\.\.(\d+)\s+(\w+)!;
my $m = $ff->create_marker($dbid);
my $mf = $ff->create_marker_feature($start, $end, 0, $m, $name,
$self->query);
push(@output, $mf);
$self->hit_list($dbid);
}
$self->output(\@output);
close(FH) or throw("FAILED to close ".$results);
}