Bio::EnsEMBL::Analysis::Runnable
Ortheus
Toolbar
Summary
Bio::EnsEMBL::Analysis::Runnable::Ortheus
Package variables
Privates (from "my" definitions)
$default_java_class = "bp.pecan.Pecan"
$estimate_tree = "~/pecan/EstimateTree.py"
$java_exe = "/nfs/acari/bpaten/bin/jre1.6.0/bin/java"
$default_exonerate = "exonerate-1.0.0"
$default_jar_file = "pecan.0.6.jar"
$uname = `uname`
Included modules
Bio::EnsEMBL::Analysis::Config::Compara
Data::Dumper
Inherit
Synopsis
my $runnable = new Bio::EnsEMBL::Analysis::Runnable::Ortheus
(-workdir => $workdir,
-fasta_files => $fasta_files,
-tree_string => $tree_string,
-program => "/path/to/program");
$runnable->run;
my @output = @{$runnable->output};
Description
Ortheus expects to run the ortheus, a program for reconstructing the ancestral history
of a set of sequences. It is able to infer a tree (or one may be provided) and then
create a tree alignment of the sequences that includes ML reconstructions of the
ancestral sequences. Ortheus is based upon a probabilistic transducer model and handles
the evolution of both substitutions, insertions and deletions.
Methods
Methods description
Arg [1] : -workdir => "/path/to/working/directory" Arg [2] : -fasta_files => "/path/to/fasta/file" Arg [3] : -tree_string => "/path/to/tree/file" (optional) Arg [4] : -parameters => "parameter" (optional)
Function : contruct a new Bio::EnsEMBL::Analysis::Runnable::Mlagan
runnable
Returntype: Bio::EnsEMBL::Analysis::Runnable::Mlagan
Exceptions: none
Example : |
Arg [1] : Bio::EnsEMBL::Analysis::Runnable::Mlagan Function : parse the specifed file and produce RepeatFeatures Returntype: nine Exceptions: throws if fails to open or close the results file Example : |
Arg [1] : Bio::EnsEMBL::Analysis::Runnable::Mlagan Arg [2] : string, program name Function : create and open a commandline for the program trf Returntype: none Exceptions: throws if the program in not executable or if the results file doesnt exist Example : |
Methods code
sub exonerate
{ my $self = shift;
$self->{'_exonerate'} = shift if(@_);
return $self->{'_exonerate'}; } |
sub fasta_files
{ my $self = shift;
$self->{'_fasta_files'} = shift if(@_);
return $self->{'_fasta_files'}; } |
sub jar_file
{ my $self = shift;
$self->{'_jar_file'} = shift if(@_);
return $self->{'_jar_file'}; } |
sub java_class
{ my $self = shift;
$self->{'_java_class'} = shift if(@_);
return $self->{'_java_class'}; } |
sub new
{ my ($class,@args) = @_;
my $self = $class->SUPER::new(@args);
my ($workdir, $fasta_files, $tree_string, $species_tree, $species_order, $parameters, $jar_file, $java_class, $exonerate, $options) =
rearrange(['WORKDIR', 'FASTA_FILES', 'TREE_STRING', 'SPECIES_TREE',
'SPECIES_ORDER', 'PARAMETERS', 'JAR_FILE', 'JAVA_CLASS', 'EXONERATE', 'OPTIONS'], @args);
chdir $self->workdir;
$self->fasta_files($fasta_files) if (defined $fasta_files);
if (defined $tree_string) {
$self->tree_string($tree_string)
} elsif ($species_tree and $species_order and @$species_order) {
$self->species_tree($species_tree);
$self->species_order($species_order);
}
$self->parameters($parameters) if (defined $parameters);
$self->options($options) if (defined $options);
unless (defined $self->exonerate) {
$self->exonerate($default_exonerate);
}
return $self; } |
sub options
{ my $self = shift;
$self->{'_options'} = shift if(@_);
return $self->{'_options'}; } |
sub parameters
{ my $self = shift;
$self->{'_parameters'} = shift if(@_);
return $self->{'_parameters'}; } |
sub parse_results
{ my ($self, $run_number) = @_;
my $tree_file = $self->workdir . "/output.$$.tree";
if (-e $tree_file) {
open(F, $tree_file) || throw("Could not open tree file <$tree_file>");
my ($newick, $files) = <F>;
close(F);
$newick =~ s/[\r\n]+$//;
$self->tree_string($newick);
$files =~ s/[\r\n]+$//;
my $all_files = [split(" ", $files)];
$self->fasta_files($all_files);
print STDERR "NEWICK: $newick\nFILES: ", join(" -- ", @$all_files), "\n";
}
my (@ordered_leaves) = $self->tree_string =~ /[(,]([^(:)]+)/g;
print STDERR "NEWICK: ", $self->tree_string, "\nLEAVES: ", join(" -- ", @ordered_leaves), "\nFILES: ", join(" -- ", @{$self->fasta_files}), "\n";
my $alignment_file = $self->workdir . "/output.$$.mfa";
my $this_genomic_align_block = new Bio::EnsEMBL::Compara::GenomicAlignBlock;
open(F, $alignment_file) || throw("Could not open $alignment_file");
my $seq = "";
my $this_genomic_align;
my $tree = Bio::EnsEMBL::Compara::Graph::NewickParser::parse_newick_into_tree($self->tree_string);
$tree->print_tree(100);
print $tree->newick_format("simple"), "\n";
print join(" -- ", map {$_->name} @{$tree->get_all_leaves}), "\n";
print "Reading $alignment_file...\n";
my $ids;
foreach my $this_file (@{$self->fasta_files}) {
push(@$ids, qx""head -1 $this_file");
push(@$ids, undef); ## There is an internal node after each leaf..
}
pop(@$ids); ## ...except for the last leaf which is the end of the tree
#print join(" :: ", @$ids), "\n\n";
while (<F>) {
next if (/^\s*$/);
chomp;
## FASTA headers correspond to the tree and the order of the leaves in the tree corresponds
## to the order of the files
if (/^>/) {
print "PARSING $_\n";
print $tree->newick_format(), "\n";
my ($name) = $_ =~ /^>(.+)/;
if (defined($this_genomic_align) and $seq) {
$this_genomic_align->aligned_sequence($seq);
$this_genomic_align_block->add_GenomicAlign($this_genomic_align);
}
my $header = shift(@$ids);
$this_genomic_align = new Bio::EnsEMBL::Compara::GenomicAlign;
if (!defined($header)) {
print "INTERNAL NODE $name\n";
my $this_node;
foreach my $this_leaf_name (split("_", $name)) {
if ($this_node) {
my $other_node = $tree->find_node_by_name($this_leaf_name);
if (!$other_node) {
throw("Cannot find node <$this_leaf_name>\n");
}
$this_node = $this_node->find_first_shared_ancestor($other_node);
} else {
print $tree->newick_format();
print "LEAF: $this_leaf_name\n";
$this_node = $tree->find_node_by_name($this_leaf_name);
}
}
print join("_", map {$_->name} @{$this_node->get_all_leaves}), "\n";
## INTERNAL NODE: dnafrag_id and dnafrag_end must be edited somewhere else
$this_genomic_align->dnafrag_id(-1);
$this_genomic_align->dnafrag_start(1);
$this_genomic_align->dnafrag_end(0);
$this_genomic_align->dnafrag_strand(1);
bless($this_node, "Bio::EnsEMBL::Compara::GenomicAlignTree");
$this_node->genomic_align($this_genomic_align);
$this_node->name($name);
} elsif ($header =~ /^>DnaFrag(\d+)\|(.+)\.(\d+)\-(\d+)\:(\-?1)$/) {
print "leaf_name?? $name\n";
my $this_leaf = $tree->find_node_by_name($name);
if (!$this_leaf) {
print $tree->newick_format(), "\n";
die "";
}
print "$this_leaf\n";
# print "****** $name -- $header -- ";
# if ($this_leaf) {
# $this_leaf->print_node();
# } else {
# print "[none]\n";
# }
#information extracted from fasta header
$this_genomic_align->dnafrag_id($1);
$this_genomic_align->dnafrag_start($3);
$this_genomic_align->dnafrag_end($4);
$this_genomic_align->dnafrag_strand($5);
bless($this_leaf, "Bio::EnsEMBL::Compara::GenomicAlignTree");
$this_leaf->genomic_align($this_genomic_align);
} else {
throw("Error while parsing the FASTA header. It must start by\" >DnaFrag }
$seq = "";
} else {
$seq .= $_;
}
}
close F;
if ($this_genomic_align->dnafrag_id == -1) {
} else {
$this_genomic_align->aligned_sequence($seq);
$this_genomic_align_block->add_GenomicAlign($this_genomic_align);
}
print $tree->newick_format("simple"), "\n";
print join(" -- ", map {$_->node_id."+".$_->genomic_align->dnafrag_id} (@{$tree->get_all_nodes()})), "\n";
$self->output([$tree]);
}
1; } |
sub run_analysis
{ my ($self, $program) = @_;
$self->run_ortheus;
return 1; } |
sub run_ortheus
{ my $self = shift;
chdir $self->workdir;
throw("Ortheus [$ORTHEUS] does not exist") unless ($ORTHEUS && -e $ORTHEUS);
my $command = "$PYTHON $ORTHEUS";
$command .= " -l\" #-j 0\" ";
if (@{$self->fasta_files}) {
$command .= " -e";
foreach my $fasta_file (@{$self->fasta_files}) {
$command .= " $fasta_file";
}
}
my $java_params = "";
if ($self->parameters) {
$java_params = $self->parameters;
}
$command .= " -m\" $JAVA " . $java_params . "\" -k\" #-J $EXONERATE -X\"";
if ($self->tree_string) {
$command .= " -d '" . $self->tree_string . "'";
} elsif ($self->species_tree and $self->species_order and @{$self->species_order}) {
$command .= " -s $SEMPHY -z '".$self->species_tree."' -A ".join(" ", @{$self->species_order});
} else {
$command .= " -s $SEMPHY";
}
$command .= " -f output.$$.mfa -g output.$$.tree";
if ($self->options) {
$command .= " " . $self->options;
}
print "Running ortheus: " . $command . "\n";
unless (system($command) == 0) {
throw("ortheus execution failed\n");
} } |
sub species_order
{ my $self = shift;
$self->{'_species_order'} = shift if(@_);
return $self->{'_species_order'}; } |
sub species_tree
{ my $self = shift;
$self->{'_species_tree'} = shift if(@_);
return $self->{'_species_tree'}; } |
sub tree_string
{ my $self = shift;
$self->{'_tree_string'} = shift if(@_);
return $self->{'_tree_string'}; } |
General documentation
Ortheus was developed by Benedict Paten in the group of Ewan Birney at the EBI. You contact
him at bjp (AT) ebi (DOT) ac (DOT) uk.
Questions regarding this module should be addressed to the Ensembl development list:
ensembl-dev@ebi.ac.uk