Bio::EnsEMBL::Analysis::Runnable Pecan
SummaryIncluded librariesPackage variablesSynopsisDescriptionGeneral documentationMethods
Toolbar
WebCvsRaw content
Summary
Bio::EnsEMBL::Analysis::Runnable::Mlagan
Package variables
Privates (from "my" definitions)
$default_java_class = "bp.pecan.Pecan"
$estimate_tree = "~/pecan/EstimateTree.py"
$java_exe = "/nfs/acari/bpaten/bin/jre1.6.0/bin/java"
$default_exonerate = "exonerate-1.0.0"
$default_jar_file = "pecan.0.6.jar"
$uname = `uname`
Included modules
Bio::EnsEMBL::Analysis::Runnable
Bio::EnsEMBL::Compara::DBSQL::ProteinTreeAdaptor
Bio::EnsEMBL::Compara::GenomicAlign
Bio::EnsEMBL::Compara::GenomicAlignBlock
Bio::EnsEMBL::Utils::Argument
Bio::EnsEMBL::Utils::Exception
Inherit
Bio::EnsEMBL::Analysis::Runnable
Synopsis
  my $runnable = new Bio::EnsEMBL::Analysis::Runnable::Mlagan
(-workdir => $workdir,
-fasta_files => $fasta_files,
-tree_string => $tree_string,
-program => "/path/to/program");
$runnable->run;
my @output = @{$runnable->output};
Description
Mavid expects to run the program mavid, a global multiple aligner for large genomic sequences,
using a fasta file and a tree file (Newick format), and eventually a constraints file.
The output (multiple alignment) is parsed and return as a Bio::EnsEMBL::Compara::GenomicAlignBlock object.
Methods
exonerate
No description
Code
fasta_files
No description
Code
jar_file
No description
Code
java_class
No description
Code
newDescriptionCode
parameters
No description
Code
parse_resultsDescriptionCode
run_analysisDescriptionCode
run_mlagan
No description
Code
tree_string
No description
Code
Methods description
newcode    nextTop
  Arg [1]   : -workdir => "/path/to/working/directory"
Arg [2] : -fasta_files => "/path/to/fasta/file"
Arg [3] : -tree_string => "/path/to/tree/file" (optional)
Arg [4] : -parameters => "parameter" (optional)
Function : contruct a new Bio::EnsEMBL::Analysis::Runnable::Mlagan runnable Returntype: Bio::EnsEMBL::Analysis::Runnable::Mlagan Exceptions: none Example :
parse_resultscodeprevnextTop
  Arg [1]   : Bio::EnsEMBL::Analysis::Runnable::Mlagan
Function : parse the specifed file and produce RepeatFeatures
Returntype: nine
Exceptions: throws if fails to open or close the results file
Example :
run_analysiscodeprevnextTop
  Arg [1]   : Bio::EnsEMBL::Analysis::Runnable::Mlagan
Arg [2] : string, program name
Function : create and open a commandline for the program trf
Returntype: none
Exceptions: throws if the program in not executable or if the results
file doesnt exist
Example :
Methods code
exoneratedescriptionprevnextTop
sub exonerate {
  my $self = shift;
  $self->{'_exonerate'} = shift if(@_);
  return $self->{'_exonerate'};
}
fasta_filesdescriptionprevnextTop
sub fasta_files {
  my $self = shift;
  $self->{'_fasta_files'} = shift if(@_);
  return $self->{'_fasta_files'};
}
jar_filedescriptionprevnextTop
sub jar_file {
  my $self = shift;
  $self->{'_jar_file'} = shift if(@_);
  return $self->{'_jar_file'};
}
java_classdescriptionprevnextTop
sub java_class {
  my $self = shift;
  $self->{'_java_class'} = shift if(@_);
  return $self->{'_java_class'};
}
newdescriptionprevnextTop
sub new {
  my ($class,@args) = @_;
  my $self = $class->SUPER::new(@args);
  my ($workdir, $fasta_files, $tree_string, $parameters,
      $jar_file, $java_class, $exonerate) =
        rearrange(['WORKDIR', 'FASTA_FILES', 'TREE_STRING','PARAMETERS',
            'JAR_FILE', 'JAVA_CLASS', 'EXONERATE'], @args);

  chdir $self->workdir;
  $self->fasta_files($fasta_files) if (defined $fasta_files);
  if (defined $tree_string) {
    $self->tree_string($tree_string)
  } else {
    # Use EstimateTree.py program to get a tree from the sequences
my $run_str = "python $estimate_tree " . join(" ", @$fasta_files); print "RUN $run_str\n"; my @estimate = qx""$run_str"; if (($estimate[0] !~ /^FINAL_TREE:\( .+\);/) or ($estimate[2] !~ /^ORDERED_SEQUENCES: (.+)/)) { throw "Error while running EstimateTree program for Pecan"; } ($tree_string) = $estimate[0] =~ /^FINAL_TREE: (\(.+\);)/; $self->tree_string($tree_string); # print "THIS TREE $tree_string\n"; my ($files) = $estimate[2] =~ /^ORDERED_SEQUENCES: (.+)/; @$fasta_files = split(" ", $files); $self->fasta_files($fasta_files); # print "THESE FILES ", join(" ", @$fasta_files), "\n"; ## Build newick tree which can be stored in the meta table foreach my $this_file (@$fasta_files) { my $header = qx"head -1 $this_file"; my ($dnafrag_id, $name, $start, $end, $strand) = $header =~ /^>DnaFrag(\d+)\|([^\.+])\.(\d+)\-(\d+)\:(\-?1)/; # print "HEADER: $dnafrag_id, $name, $start, $end, $strand $header"; $strand = 0 if ($strand != 1); $tree_string =~ s/(\W)\d+(\W)/$1${dnafrag_id}_${start}_${end}_${strand}$2/; } $self->{tree_to_save} = $tree_string; # print "TREE_TO_SAVE: $tree_string\n"; } $self->parameters($parameters) if (defined $parameters); unless (defined $self->program) { if (defined($self->analysis) and defined($self->analysis->program)) { $self->program($self->analysis->program); } else { $self->program($java_exe); } } if (defined $jar_file) { $self->jar_file($jar_file); } else { $self->jar_file($default_jar_file); } if (defined $java_class) { $self->java_class($java_class); } else { $self->java_class($default_java_class); } if (defined $exonerate) { $self->exonerate($exonerate); } else { $self->exonerate($default_exonerate); } # Try to locate jar file in usual places... if (!-e $self->jar_file) { $default_jar_file = $self->jar_file; if (-e "/usr/local/pecan/$default_jar_file") { $self->jar_file("/usr/local/pecan/$default_jar_file"); } elsif (-e "/usr/local/ensembl/pecan/$default_jar_file") {
$self->jar_file("/usr/local/ensembl/pecan/$default_jar_file"); } elsif (-e "/usr/local/ensembl/bin/$default_jar_file") {
$self->jar_file("/usr/local/ensembl/bin/$default_jar_file"); } elsif (-e "/usr/local/bin/pecan/$default_jar_file") {
$self->jar_file("/usr/local/bin/pecan/$default_jar_file"); } elsif (-e $ENV{HOME}."/pecan/$default_jar_file") { $self->jar_file($ENV{HOME}."/pecan/$default_jar_file"); } elsif (-e $ENV{HOME}."/Downloads/$default_jar_file") { $self->jar_file($ENV{HOME}."/Downloads/$default_jar_file"); } else { throw("Cannot find Pecan JAR file!"); } } return $self;
}
parametersdescriptionprevnextTop
sub parameters {
  my $self = shift;
  $self->{'_parameters'} = shift if(@_);
  return $self->{'_parameters'};
}
parse_resultsdescriptionprevnextTop
sub parse_results {
  my ($self, $run_number) = @_;

  my $alignment_file = $self->workdir . "/pecan.mfa";
  my $this_genomic_align_block = new Bio::EnsEMBL::Compara::GenomicAlignBlock;

  open F, $alignment_file || throw("Could not open $alignment_file");
  my $seq = "";
  my $this_genomic_align;
print "Reading $alignment_file...\n";
  while (<F>) {
    next if (/^\s*$/);
    chomp;
    ## FASTA headers are defined in the Bio::EnsEMBL::Compara::Production::GenomicAlignBlock::Pecan
## module (or any other module you use to create this Pecan analysis job). Here is an example:
## >DnaFrag1234|X.10001-20000:-1
## This will correspond to chromosome X, which has dnafrag_id 1234 and the region goes from
## position 10001 to 20000 on the reverse strand.
if (/^>/) { if (/^>DnaFrag(\d+)\|(.+)\.(\d+)\-(\d+)\:(\-?1)$/) { if (defined($this_genomic_align) and $seq) { $this_genomic_align->aligned_sequence($seq); $this_genomic_align_block->add_GenomicAlign($this_genomic_align); } $this_genomic_align = new Bio::EnsEMBL::Compara::GenomicAlign; $this_genomic_align->dnafrag_id($1); $this_genomic_align->dnafrag_start($3); $this_genomic_align->dnafrag_end($4); $this_genomic_align->dnafrag_strand($5); $seq = ""; } else { throw("Error while parsing the FASTA header. It must start by\" >DnaFrag#####\" where ##### is the dnafrag_id\n$_"); } } else { $seq .= $_; } } close F; $this_genomic_align->aligned_sequence($seq); $this_genomic_align_block->add_GenomicAlign($this_genomic_align); $self->output([$this_genomic_align_block]); } 1;
}
run_analysisdescriptionprevnextTop
sub run_analysis {
  my ($self, $program) = @_;

  $self->run_mlagan;

  $self->parse_results;

  return 1;
}
run_mlagandescriptionprevnextTop
sub run_mlagan {
  my $self = shift;

  chdir $self->workdir;

  throw($self->program . " is not executable Mlagan::run_analysis ")
    unless ($self->program && -x $self->program);

  my $command = $self->program;
  if ($self->parameters) {
    $command .= " " . $self->parameters;
  }
  $command .= " -cp ".$self->jar_file." ".$self->java_class;
  if (@{$self->fasta_files}) {
    $command .= " -F";
    foreach my $fasta_file (@{$self->fasta_files}) {
      $command .= " $fasta_file";
    }
  }
  $command .= " -J '" . $self->exonerate . "' -X";
  if ($self->tree_string) {
    $command .= " -E '" . $self->tree_string . "'";
  }
  $command .= " -G pecan.mfa";
  if ($self->options) {
    $command .= " " . $self->options;
  }
  print "Running pecan: " . $command . "\n";
  unless (system($command) == 0) {
    throw("pecan execution failed\n");
  }
}
tree_stringdescriptionprevnextTop
sub tree_string {
  my $self = shift;
  $self->{'_tree_string'} = shift if(@_);
  return $self->{'_tree_string'};
}
General documentation
CONTACTTop
Post questions to the Ensembl development list: ensembl-dev@ebi.ac.uk