Raw content of Bio::EnsEMBL::Pipeline::GeneDuplication::PAML
# To do:
# ------
# -Currently only implements codeml wrapper. Should at least
# include baseml too.
# -Some perfectly acceptable PAML options set to 0 are being
# treated by Perl as 'unset'. This probably is harmless,
# but is a bug nonetheless.
# -Set/Get functions should check dependencies such that config
# errors are thrown by the perl layer rather than the paml
# application. Tough.
# -Set functions should check that the set values are allowed.
package Bio::EnsEMBL::Pipeline::GeneDuplication::PAML;
use vars qw(@ISA);
use strict;
use Bio::EnsEMBL::Pipeline::RunnableI;
use Bio::Tools::Phylo::PAML;
use Bio::EnsEMBL::Utils::Argument qw(rearrange);
use Bio::EnsEMBL::Utils::Exception qw(warning throw);
use Cwd;
@ISA = qw(Bio::EnsEMBL::Pipeline::RunnableI);
sub new {
my ($class, @args) = @_;
my $self = bless {},$class;
my ($executable,
$work_dir,
$aligned_seqs,
$seqfile,
$treefile,
$outfile,
$noisy,
$verbose,
$runmode,
$seqtype,
$codonfreq,
$aadist,
$aaratefile,
$model,
$nssites,
$icode,
$mgene,
$fix_kappa,
$kappa,
$fix_omega,
$omega,
$fix_alpha,
$alpha,
$malpha,
$ncatg,
$clock,
$getse,
$rateancestor,
$small_diff) = rearrange([qw(EXECUTABLE
WORK_DIR
ALIGNED_SEQS
SEQFILE
TREEFILE
OUTFILE
NOISY
VERBOSE
RUNMODE
SEQTYPE
CODONFREQ
AADIST
AARATEFILE
MODEL
NSSITES
ICODE
MGENE
FIX_KAPPA
KAPPA
FIX_OMEGA
OMEGA
FIX_ALPHA
ALPHA
MALPHA
NCATG
CLOCK
GETSE
RATEANCESTOR
SMALL_DIFF)],@args);
throw ("Have sequence input and input file - which do I use?")
if (defined $aligned_seqs && defined $seqfile);
$self->_aligned_seqs($aligned_seqs) if $aligned_seqs;
$self->_codeml_executable($executable) if $executable;
$self->work_dir($work_dir) if $work_dir;
$self->_seqfile($seqfile) if $seqfile;
$self->_treefile($treefile) if $treefile;
$self->_pre_copying; # Set up our work dir the way we like it.
$self->_outfile($outfile) if $outfile;
$self->_noisy($noisy) if $noisy;
$self->_verbose($verbose) if $verbose;
$self->_runmode('set', $runmode);
$self->_seqtype($seqtype) if $seqtype;
$self->_codonfreq($codonfreq) if $codonfreq;
$self->_aadist($aadist) if $aadist;
$self->_aaratefile($aaratefile) if $aaratefile;
$self->_model('set', $model);
$self->_nssites($nssites);
$self->_icode($icode);
$self->_treefile($mgene) if $mgene;
$self->_treefile($fix_kappa) if $fix_kappa;
$self->_treefile($kappa) if $kappa;
$self->_treefile($fix_omega) if $fix_omega;
$self->_treefile($omega) if $omega;
$self->_treefile($fix_alpha) if $fix_alpha;
$self->_treefile($alpha) if $alpha;
$self->_treefile($malpha) if $malpha;
$self->_treefile($ncatg) if $ncatg;
$self->_treefile($clock) if $clock;
$self->_treefile($getse) if $getse;
$self->_treefile($rateancestor) if $rateancestor;
$self->_treefile($small_diff) if $small_diff;
return $self
}
sub DESTROY {
my $self = shift;
unlink $self->_config_file if $self->_config_file;
unlink $self->_seqfile if $self->_seqfile;
unlink ($self->work_dir . "/rub", $self->work_dir . "/rst",
$self->work_dir . "/rst1", $self->work_dir . "/lnf",
$self->work_dir . "/2ML.dN", $self->work_dir . "/2ML.dS",
$self->work_dir . "/2ML.t", $self->work_dir . "/2NG.dN",
$self->work_dir . "/2NG.dS", $self->work_dir . "/2NG.t")
if $self->_outfile;
unlink $self->_outfile if $self->_outfile;
warning ("Paml execution resulted in a core file being dumped.")
if (unlink $self->work_dir . "/core");
warning ("Could not remove working directory [". $self->work_dir ."].\n$!")
if (! rmdir $self->work_dir);
}
sub run_codeml {
my $self = shift;
# Directory changing shenanigans to make PAML behave!
my $user_dir = cwd();
# Move to our working dir
warning ($!) if (! chdir $self->work_dir);
$self->_write_seqs($self->_seqfile);
$self->_write_config_file;
my $command = $self->_codeml_executable . " " . $self->_config_file;
print STDERR $command . "\n";
eval {
system($command)
};
if ($@ or -e $self->work_dir . "/core") {
throw ("Something went wrong when codeml was executed.\n" . $@);
}
# Move back to original dir
print STDERR $! if (! chdir $user_dir);
my $parser = Bio::Tools::Phylo::PAML->new('-file' => $self->_outfile,
'-dir' => $self->work_dir);
return $parser;
}
# Make a temporary playground for PAML and some of its anti-social ways.
# Execute application from here such that all unwanted temp files can be
# neatly deleted. Also, can copy assorted external files, like grantham.dat
# here before execution. Give temp dir a unique name to avoid conflicts
# from multiple simultaneous jobs.
sub _pre_copying {
my $self = shift;
#system("cp " . $self->_executable . " " . $self->work_dir);
# Ultimately all the datafiles need to be copied into here.
return 1;
}
sub _write_config_file {
my ($self) = @_;
my $config_string;
$config_string .= ' seqfile = ' . $self->_seqfile . "\n";
$config_string .= ' treefile = ' . $self->_treefile . "\n";
$config_string .= ' outfile = ' . $self->_outfile . "\n";
$config_string .= ' noisy = ' . $self->_noisy . "\n";
$config_string .= ' verbose = ' . $self->_verbose . "\n";
$config_string .= ' runmode = ' . $self->_runmode . "\n";
$config_string .= ' seqtype = ' . $self->_seqtype . "\n";
$config_string .= ' CodonFreq = ' . $self->_codonfreq . "\n";
$config_string .= ' aaDist = ' . $self->_aadist . "\n";
$config_string .= ' aaRatefile = ' . $self->_aaratefile . "\n";
$config_string .= ' model = ' . $self->_model . "\n";
$config_string .= ' NSsites = ' . $self->_nssites . "\n";
$config_string .= ' icode = ' . $self->_icode . "\n";
$config_string .= ' Mgene = ' . $self->_mgene . "\n";
$config_string .= ' fix_kappa = ' . $self->_fix_kappa . "\n";
$config_string .= ' kappa = ' . $self->_kappa . "\n";
$config_string .= ' fix_omega = ' . $self->_fix_omega . "\n";
$config_string .= ' omega = ' . $self->_omega . "\n";
$config_string .= ' fix_alpha = ' . $self->_fix_alpha . "\n";
$config_string .= ' alpha = ' . $self->_alpha . "\n";
$config_string .= ' Malpha = ' . $self->_malpha . "\n";
$config_string .= ' ncatG = ' . $self->_ncatg . "\n";
$config_string .= ' clock = ' . $self->_clock . "\n";
$config_string .= ' getSE = ' . $self->_getse . "\n";
$config_string .= 'RateAncestor = ' . $self->_rateancestor . "\n";
$config_string .= ' Small_Diff = ' . $self->_small_diff . "\n";
my $file_with_path = $self->work_dir . "paml_codeml.ctl";
open(CONFIG, ">$file_with_path") or die "Cant open config file for writing.";
print CONFIG $config_string;
close(CONFIG);
$self->_config_file($file_with_path);
return 1
}
### Files and executables ###
sub _codeml_executable {
my $self = shift;
if (@_) {
$self->{_codeml_executable} = shift;
}
unless ($self->{_codeml_executable}) {
print STDERR "Explicit location of codeml executable not set. Assuming\n" .
"that codeml is in your path.";
$self->{_codeml_executable} = 'codeml';
}
return $self->{_codeml_executable}
}
sub work_dir {
my ($self) = shift;
unless ($self->{_work_dir}) {
my $work_dir;
if (@_) {
my $arg_dir = shift;
$work_dir = $arg_dir . '/' . 'paml_temp_' . time . '/';
} else {
$work_dir = '/tmp/paml_temp_' . time . '/';
}
unless (-d $work_dir){
mkdir $work_dir;
}
$self->{_work_dir} = $work_dir;
}
return $self->{_work_dir}
}
sub _write_seqs {
my ($self, $filename) = @_;
throw ("No sequences to write.")
unless $self->_aligned_seqs;
system("rm -f $filename");
open(OUT, ">$filename") or die "Cant write to file [$filename]\n";
print OUT scalar @{$self->_aligned_seqs} . " " . $self->_aligned_seqs->[0]->length . "\n";
foreach my $aligned_seq (@{$self->_aligned_seqs}){
print OUT $aligned_seq->display_id . "\n" . $aligned_seq->seq . "\n";
}
close(OUT);
return 1
}
sub _aligned_seqs {
my $self = shift;
if (@_){
$self->{_aligned_seqs} = shift;
throw ("No sequences to set.")
unless $self->{_aligned_seqs} and
$self->{_aligned_seqs}->[0]->isa("Bio::Seq");
}
return $self->{_aligned_seqs}
}
sub _config_file {
my ($self) = shift;
if (@_){
$self->{_config_file} = shift;
}
#print "Config file for PAML : " . $self->{_config_file} . "\n";
return $self->{_config_file}
}
### Config options ###
sub _seqfile {
my ($self) = shift;
if (@_) {
$self->{_seqfile} = shift;
}
unless ($self->{_seqfile}) {
$self->{_seqfile} = $self->work_dir . 'paml_in_' . time;
}
return $self->{_seqfile}
}
sub _treefile {
my ($self) = shift;
if (@_) {
$self->{_treefile} = shift;
}
if (!$self->{_treefile} && $self->_runmode == 0) {
throw ("Input tree file has not been defined.");
}
if (!$self->{_treefile}) {
$self->{_treefile} = '';
}
return $self->{_treefile}
}
sub _outfile {
my ($self) = shift;
if (@_) {
$self->{_outfile} = shift;
}
unless ($self->{_outfile}) {
$self->{_outfile} = $self->work_dir . 'paml_out_' . time;
}
return $self->{_outfile}
}
sub _noisy {
my ($self) = shift;
if (@_) {
$self->{_noisy} = shift;
}
if (!$self->{_noisy}){
return 0
} else {
return $self->{_noisy}
}
}
sub _verbose {
my ($self) = shift;
if (@_) {
$self->{_verbose} = shift;
}
if (!$self->{_verbose}){
return 0
} else {
return $self->{_verbose}
}
}
sub _runmode {
my ($self, $set, $value) = @_;
if (defined $set) {
$self->{_runmode} = $value + 1; # Add one to cope with 0 as a valid option.
}
unless (defined $self->{_runmode}){
throw ("Runmode has not been set.");
}
return $self->{_runmode} - 1 # Remember to remove one to return correct option.
}
sub _seqtype {
my ($self) = shift;
if (@_) {
$self->{_seqtype} = shift;
}
unless (defined $self->{_seqtype}){
throw ("Seqtype has not been set.");
}
return $self->{_seqtype}
}
sub _codonfreq {
my ($self) = shift;
if (@_) {
$self->{_codonfreq} = shift;
}
if ($self->_seqtype == 2 && !defined $self->{_codonfreq}){
# warning ("Codonfreq not set. Defaulting to option 2: F3X4 frequencies.");
return 2;
}
if (! $self->{_codonfreq}){
return 2;
}
return $self->{_codonfreq}
}
sub _aadist {
my ($self) = shift;
if (@_) {
$self->{_aadist} = shift;
}
unless (defined $self->{_aadist}){
# warning ("Aadist not set. Defaulting to option 1: equal distances.");
return 0;
}
return $self->{_aadist}
}
sub _aaratefile {
my ($self) = shift;
if (@_) {
$self->{_aaratefile} = shift;
}
if (!defined $self->{_aaratefile}){
if ($self->_seqtype == 2 && $self->_model > 1) {
# warning ("Aaratefile not set. Defaulting to file wag.dat.");
}
return 'wag.dat';
}
return $self->{_aaratefile}
}
sub _model {
my ($self, $set, $value) = @_;
if ($set) {
$self->{_model} = $value + 1; # Add one to cope with 0 as a valid option.
}
unless (defined $self->{_model}){
throw ("Model has not been set.");
}
return $self->{_model} - 1 # Remove one to obtain correct value.
}
sub _nssites {
my ($self) = shift;
if (@_) {
$self->{_nssites} = shift;
}
unless (defined $self->{_nssites}){
# warning ("NSsites has not been set. Defaulting to 0.");
}
return $self->{_nssites}
}
sub _icode {
my ($self) = shift;
if (@_) {
$self->{_icode} = shift;
}
unless (defined $self->{_icode}){
warning ("PAML: Defaulting to Universal Genetic Code.\n");
return 0 # Universal code
}
return $self->{_icode}
}
sub _mgene {
my ($self) = shift;
if (@_) {
$self->{_mgene} = shift;
}
unless (defined $self->{_mgene}){
# print STDERR "Defaulting to mgene = 0\n";
return 0 # default to separate
}
return $self->{_mgene}
}
sub _fix_kappa {
my ($self) = shift;
if (@_) {
$self->{_fix_kappa} = shift;
}
unless (defined $self->{_fix_kappa}){
# print STDERR "Defaulting to unfixed kappa - will estimate kappa.\n";
return 0 # default to unfixed kappa
}
return $self->{_fix_kappa}
}
sub _kappa {
my ($self) = shift;
if (@_) {
$self->{_kappa} = shift;
}
if ($self->_fix_kappa != 0 && !defined $self->{_kappa}){
throw ("Youve asked for kappa to be fixed, but\n" .
"have not specified a value for it to be\n" .
"fixed to. Try setting '-kappa' => x or\n" .
"whatever");
}
if (! $self->{_kappa}){
return 2;
}
return $self->{_kappa}
}
sub _fix_omega {
my ($self) = shift;
if (@_) {
$self->{_fix_omega} = shift;
}
unless (defined $self->{_fix_omega}){
# print STDERR "Defaulting to unfixed omega - will estimate omega.\n";
return 0 # default to unfixed omega
}
return $self->{_fix_omega}
}
sub _omega {
my ($self) = shift;
if (@_) {
$self->{_omega} = shift;
}
if ($self->_fix_omega != 0 && !defined $self->{_omega}){
throw ("Youve asked for omega to be fixed, but\n" .
"have not specified a value for it to be\n" .
"fixed to. Try setting '-omega' => x or\n" .
"whatever");
}
if (! defined $self->{_omega}){
return 0;
}
return $self->{_omega}
}
sub _fix_alpha {
my ($self) = shift;
if (@_) {
$self->{_fix_alpha} = shift;
}
unless (defined $self->{_fix_alpha}){
# print STDERR "Defaulting to unfixed alpha - will estimate alpha.\n";
return 0 # default to unfixed alpha
}
return $self->{_fix_alpha}
}
sub _alpha {
my ($self) = shift;
if (@_) {
$self->{_alpha} = shift;
}
if ($self->_fix_alpha != 0 && !defined $self->{_alpha}){
throw ("Youve asked for alpha to be fixed, but\n" .
"have not specified a value for it to be\n" .
"fixed to. Try setting '-alpha' => x or\n" .
"whatever");
}
if (! defined $self->{_alpha}){
return 0;
}
return $self->{_alpha}
}
sub _malpha {
my ($self) = shift;
if (@_) {
$self->{_malpha} = shift;
}
if (!defined $self->{_malpha}){
# silently default to 0. Ive never used this - what
# other parameters does it depend on?
return 0
}
return $self->{_malpha}
}
sub _ncatg {
my ($self) = shift;
if (@_) {
$self->{_ncatg} = shift;
}
if (!defined $self->{_ncatg}){
# silently default to 10. Ive never used this - what
# other parameters does it depend on?
return 10
}
return $self->{_ncatg}
}
sub _clock {
my ($self) = shift;
if (@_) {
$self->{_clock} = shift;
}
if (!defined $self->{_clock}){
# What other parameters are set with clock - should check these before throwing a warning.
# warning ("Clock parameter not set. Defaulting to option 0: no clock.");
return 0
}
return $self->{_clock}
}
sub _getse {
my ($self) = shift;
if (@_) {
$self->{_getse} = shift;
}
if (!defined $self->{_getse}){
# print STDERR "By default, not estimating SE.\n";
return 0
}
return $self->{_getse}
}
sub _rateancestor {
my ($self) = shift;
if (@_) {
$self->{_rateancestor} = shift;
}
if (!defined $self->{_rateancestor}){
# print STDERR "By default, RateAncestor option set to 0 - not" .
# "\nestimating ancestral states.\n";
return 0
}
return $self->{_rateancestor}
}
# What is this value for?
sub _small_diff {
my ($self) = shift;
if (@_) {
$self->{_small_diff} = shift;
}
if (!defined $self->{_small_diff}){
# Dont know what this does - default set directly from sample file.
return '0.5e-6'
}
return $self->{_small_diff}
}