Privates (from "my" definitions)
$default_java_class = "bp.pecan.Pecan"
$estimate_tree = "~/pecan/EstimateTree.py"
$java_exe = "/nfs/acari/bpaten/bin/jre1.6.0/bin/java"
$default_exonerate = "exonerate-1.0.0"
$default_jar_file = "pecan.0.6.jar"
$uname = `uname`
my $runnable = new Bio::EnsEMBL::Analysis::Runnable::Mlagan
(-workdir => $workdir,
-fasta_files => $fasta_files,
-tree_string => $tree_string,
-program => "/path/to/program");
$runnable->run;
my @output = @{$runnable->output};
Mavid expects to run the program mavid, a global multiple aligner for large genomic sequences,
using a fasta file and a tree file (Newick format), and eventually a constraints file.
The output (multiple alignment) is parsed and return as a Bio::EnsEMBL::Compara::GenomicAlignBlock object.
sub new
{ my ($class,@args) = @_;
my $self = $class->SUPER::new(@args);
my ($workdir, $fasta_files, $tree_string, $parameters,
$jar_file, $java_class, $exonerate) =
rearrange(['WORKDIR', 'FASTA_FILES', 'TREE_STRING','PARAMETERS',
'JAR_FILE', 'JAVA_CLASS', 'EXONERATE'], @args);
chdir $self->workdir;
$self->fasta_files($fasta_files) if (defined $fasta_files);
if (defined $tree_string) {
$self->tree_string($tree_string)
} else {
my $run_str = "python $estimate_tree " . join(" ", @$fasta_files);
print "RUN $run_str\n";
my @estimate = qx""$run_str";
if (($estimate[0] !~ /^FINAL_TREE:\( .+\);/) or ($estimate[2] !~ /^ORDERED_SEQUENCES: (.+)/)) {
throw "Error while running EstimateTree program for Pecan";
}
($tree_string) = $estimate[0] =~ /^FINAL_TREE: (\(.+\);)/;
$self->tree_string($tree_string);
# print "THIS TREE $tree_string\n";
my ($files) = $estimate[2] =~ /^ORDERED_SEQUENCES: (.+)/;
@$fasta_files = split(" ", $files);
$self->fasta_files($fasta_files);
# print "THESE FILES ", join(" ", @$fasta_files), "\n";
## Build newick tree which can be stored in the meta table
foreach my $this_file (@$fasta_files) {
my $header = qx"head -1 $this_file";
my ($dnafrag_id, $name, $start, $end, $strand) = $header =~ /^>DnaFrag(\d+)\|([^\.+])\.(\d+)\-(\d+)\:(\-?1)/;
# print "HEADER: $dnafrag_id, $name, $start, $end, $strand $header";
$strand = 0 if ($strand != 1);
$tree_string =~ s/(\W)\d+(\W)/$1${dnafrag_id}_${start}_${end}_${strand}$2/;
}
$self->{tree_to_save} = $tree_string;
# print "TREE_TO_SAVE: $tree_string\n";
}
$self->parameters($parameters) if (defined $parameters);
unless (defined $self->program) {
if (defined($self->analysis) and defined($self->analysis->program)) {
$self->program($self->analysis->program);
} else {
$self->program($java_exe);
}
}
if (defined $jar_file) {
$self->jar_file($jar_file);
} else {
$self->jar_file($default_jar_file);
}
if (defined $java_class) {
$self->java_class($java_class);
} else {
$self->java_class($default_java_class);
}
if (defined $exonerate) {
$self->exonerate($exonerate);
} else {
$self->exonerate($default_exonerate);
}
# Try to locate jar file in usual places...
if (!-e $self->jar_file) {
$default_jar_file = $self->jar_file;
if (-e "/usr/local/pecan/$default_jar_file") {
$self->jar_file("/usr/local/pecan/$default_jar_file");
} elsif (-e "/usr/local/ensembl/pecan/$default_jar_file") { $self->jar_file("/usr/local/ensembl/pecan/$default_jar_file");
} elsif (-e "/usr/local/ensembl/bin/$default_jar_file") { $self->jar_file("/usr/local/ensembl/bin/$default_jar_file");
} elsif (-e "/usr/local/bin/pecan/$default_jar_file") { $self->jar_file("/usr/local/bin/pecan/$default_jar_file");
} elsif (-e $ENV{HOME}."/pecan/$default_jar_file") {
$self->jar_file($ENV{HOME}."/pecan/$default_jar_file");
} elsif (-e $ENV{HOME}."/Downloads/$default_jar_file") {
$self->jar_file($ENV{HOME}."/Downloads/$default_jar_file");
} else {
throw("Cannot find Pecan JAR file!");
}
}
return $self; } |
sub parse_results
{ my ($self, $run_number) = @_;
my $alignment_file = $self->workdir . "/pecan.mfa";
my $this_genomic_align_block = new Bio::EnsEMBL::Compara::GenomicAlignBlock;
open F, $alignment_file || throw("Could not open $alignment_file");
my $seq = "";
my $this_genomic_align;
print "Reading $alignment_file...\n";
while (<F>) {
next if (/^\s*$/);
chomp;
if (/^>/) {
if (/^>DnaFrag(\d+)\|(.+)\.(\d+)\-(\d+)\:(\-?1)$/) {
if (defined($this_genomic_align) and $seq) {
$this_genomic_align->aligned_sequence($seq);
$this_genomic_align_block->add_GenomicAlign($this_genomic_align);
}
$this_genomic_align = new Bio::EnsEMBL::Compara::GenomicAlign;
$this_genomic_align->dnafrag_id($1);
$this_genomic_align->dnafrag_start($3);
$this_genomic_align->dnafrag_end($4);
$this_genomic_align->dnafrag_strand($5);
$seq = "";
} else {
throw("Error while parsing the FASTA header. It must start by\" >DnaFrag#####\" where ##### is the dnafrag_id\n$_");
}
} else {
$seq .= $_;
}
}
close F;
$this_genomic_align->aligned_sequence($seq);
$this_genomic_align_block->add_GenomicAlign($this_genomic_align);
$self->output([$this_genomic_align_block]);
}
1; } |
sub run_analysis
{ my ($self, $program) = @_;
$self->run_mlagan;
$self->parse_results;
return 1; } |
sub run_mlagan
{ my $self = shift;
chdir $self->workdir;
throw($self->program . " is not executable Mlagan::run_analysis ")
unless ($self->program && -x $self->program);
my $command = $self->program;
if ($self->parameters) {
$command .= " " . $self->parameters;
}
$command .= " -cp ".$self->jar_file." ".$self->java_class;
if (@{$self->fasta_files}) {
$command .= " -F";
foreach my $fasta_file (@{$self->fasta_files}) {
$command .= " $fasta_file";
}
}
$command .= " -J '" . $self->exonerate . "' -X";
if ($self->tree_string) {
$command .= " -E '" . $self->tree_string . "'";
}
$command .= " -G pecan.mfa";
if ($self->options) {
$command .= " " . $self->options;
}
print "Running pecan: " . $command . "\n";
unless (system($command) == 0) {
throw("pecan execution failed\n");
} } |