Raw content of Bio::EnsEMBL::Analysis::Runnable::Pecan # Ensembl module for Bio::EnsEMBL::Analysis::Runnable::Pecan # # Copyright (c) 2005 Ensembl # =head1 NAME Bio::EnsEMBL::Analysis::Runnable::Mlagan =head1 SYNOPSIS my $runnable = new Bio::EnsEMBL::Analysis::Runnable::Mlagan (-workdir => $workdir, -fasta_files => $fasta_files, -tree_string => $tree_string, -program => "/path/to/program"); $runnable->run; my @output = @{$runnable->output}; =head1 DESCRIPTION Mavid expects to run the program mavid, a global multiple aligner for large genomic sequences, using a fasta file and a tree file (Newick format), and eventually a constraints file. The output (multiple alignment) is parsed and return as a Bio::EnsEMBL::Compara::GenomicAlignBlock object. =head1 CONTACT Post questions to the Ensembl development list: ensembl-dev@ebi.ac.uk =cut package Bio::EnsEMBL::Analysis::Runnable::Pecan; use strict; use warnings; use Bio::EnsEMBL::Utils::Exception; use Bio::EnsEMBL::Utils::Argument; use Bio::EnsEMBL::Compara::DBSQL::ProteinTreeAdaptor; use Bio::EnsEMBL::Compara::GenomicAlign; use Bio::EnsEMBL::Compara::GenomicAlignBlock; use Bio::EnsEMBL::Analysis::Runnable; our @ISA = qw(Bio::EnsEMBL::Analysis::Runnable); my $java_exe = "/nfs/acari/bpaten/bin/jre1.6.0/bin/java"; my $uname = `uname`; $uname =~ s/[\r\n]+//; my $default_exonerate = "exonerate-1.0.0"; my $default_jar_file = "pecan.0.6.jar"; my $default_java_class = "bp.pecan.Pecan"; my $estimate_tree = "~/pecan/EstimateTree.py"; =head2 new Arg [1] : -workdir => "/path/to/working/directory" Arg [2] : -fasta_files => "/path/to/fasta/file" Arg [3] : -tree_string => "/path/to/tree/file" (optional) Arg [4] : -parameters => "parameter" (optional) Function : contruct a new Bio::EnsEMBL::Analysis::Runnable::Mlagan runnable Returntype: Bio::EnsEMBL::Analysis::Runnable::Mlagan Exceptions: none Example : =cut sub new { my ($class,@args) = @_; my $self = $class->SUPER::new(@args); my ($workdir, $fasta_files, $tree_string, $parameters, $jar_file, $java_class, $exonerate) = rearrange(['WORKDIR', 'FASTA_FILES', 'TREE_STRING','PARAMETERS', 'JAR_FILE', 'JAVA_CLASS', 'EXONERATE'], @args); chdir $self->workdir; $self->fasta_files($fasta_files) if (defined $fasta_files); if (defined $tree_string) { $self->tree_string($tree_string) } else { # Use EstimateTree.py program to get a tree from the sequences my $run_str = "python $estimate_tree " . join(" ", @$fasta_files); print "RUN $run_str\n"; my @estimate = qx"$run_str"; if (($estimate[0] !~ /^FINAL_TREE: \(.+\);/) or ($estimate[2] !~ /^ORDERED_SEQUENCES: (.+)/)) { throw "Error while running EstimateTree program for Pecan"; } ($tree_string) = $estimate[0] =~ /^FINAL_TREE: (\(.+\);)/; $self->tree_string($tree_string); # print "THIS TREE $tree_string\n"; my ($files) = $estimate[2] =~ /^ORDERED_SEQUENCES: (.+)/; @$fasta_files = split(" ", $files); $self->fasta_files($fasta_files); # print "THESE FILES ", join(" ", @$fasta_files), "\n"; ## Build newick tree which can be stored in the meta table foreach my $this_file (@$fasta_files) { my $header = qx"head -1 $this_file"; my ($dnafrag_id, $name, $start, $end, $strand) = $header =~ /^>DnaFrag(\d+)\|([^\.+])\.(\d+)\-(\d+)\:(\-?1)/; # print "HEADER: $dnafrag_id, $name, $start, $end, $strand $header"; $strand = 0 if ($strand != 1); $tree_string =~ s/(\W)\d+(\W)/$1${dnafrag_id}_${start}_${end}_${strand}$2/; } $self->{tree_to_save} = $tree_string; # print "TREE_TO_SAVE: $tree_string\n"; } $self->parameters($parameters) if (defined $parameters); unless (defined $self->program) { if (defined($self->analysis) and defined($self->analysis->program)) { $self->program($self->analysis->program); } else { $self->program($java_exe); } } if (defined $jar_file) { $self->jar_file($jar_file); } else { $self->jar_file($default_jar_file); } if (defined $java_class) { $self->java_class($java_class); } else { $self->java_class($default_java_class); } if (defined $exonerate) { $self->exonerate($exonerate); } else { $self->exonerate($default_exonerate); } # Try to locate jar file in usual places... if (!-e $self->jar_file) { $default_jar_file = $self->jar_file; if (-e "/usr/local/pecan/$default_jar_file") { $self->jar_file("/usr/local/pecan/$default_jar_file"); } elsif (-e "/usr/local/ensembl/pecan/$default_jar_file") { $self->jar_file("/usr/local/ensembl/pecan/$default_jar_file"); } elsif (-e "/usr/local/ensembl/bin/$default_jar_file") { $self->jar_file("/usr/local/ensembl/bin/$default_jar_file"); } elsif (-e "/usr/local/bin/pecan/$default_jar_file") { $self->jar_file("/usr/local/bin/pecan/$default_jar_file"); } elsif (-e $ENV{HOME}."/pecan/$default_jar_file") { $self->jar_file($ENV{HOME}."/pecan/$default_jar_file"); } elsif (-e $ENV{HOME}."/Downloads/$default_jar_file") { $self->jar_file($ENV{HOME}."/Downloads/$default_jar_file"); } else { throw("Cannot find Pecan JAR file!"); } } return $self; } sub fasta_files { my $self = shift; $self->{'_fasta_files'} = shift if(@_); return $self->{'_fasta_files'}; } sub tree_string { my $self = shift; $self->{'_tree_string'} = shift if(@_); return $self->{'_tree_string'}; } sub parameters { my $self = shift; $self->{'_parameters'} = shift if(@_); return $self->{'_parameters'}; } sub jar_file { my $self = shift; $self->{'_jar_file'} = shift if(@_); return $self->{'_jar_file'}; } sub java_class { my $self = shift; $self->{'_java_class'} = shift if(@_); return $self->{'_java_class'}; } sub exonerate { my $self = shift; $self->{'_exonerate'} = shift if(@_); return $self->{'_exonerate'}; } =head2 run_analysis Arg [1] : Bio::EnsEMBL::Analysis::Runnable::Mlagan Arg [2] : string, program name Function : create and open a commandline for the program trf Returntype: none Exceptions: throws if the program in not executable or if the results file doesnt exist Example : =cut sub run_analysis { my ($self, $program) = @_; $self->run_mlagan; $self->parse_results; return 1; } sub run_mlagan { my $self = shift; chdir $self->workdir; throw($self->program . " is not executable Mlagan::run_analysis ") unless ($self->program && -x $self->program); my $command = $self->program; if ($self->parameters) { $command .= " " . $self->parameters; } $command .= " -cp ".$self->jar_file." ".$self->java_class; if (@{$self->fasta_files}) { $command .= " -F"; foreach my $fasta_file (@{$self->fasta_files}) { $command .= " $fasta_file"; } } $command .= " -J '" . $self->exonerate . "' -X"; if ($self->tree_string) { $command .= " -E '" . $self->tree_string . "'"; } $command .= " -G pecan.mfa"; if ($self->options) { $command .= " " . $self->options; } print "Running pecan: " . $command . "\n"; unless (system($command) == 0) { throw("pecan execution failed\n"); } } =head2 parse_results Arg [1] : Bio::EnsEMBL::Analysis::Runnable::Mlagan Function : parse the specifed file and produce RepeatFeatures Returntype: nine Exceptions: throws if fails to open or close the results file Example : =cut sub parse_results{ my ($self, $run_number) = @_; my $alignment_file = $self->workdir . "/pecan.mfa"; my $this_genomic_align_block = new Bio::EnsEMBL::Compara::GenomicAlignBlock; open F, $alignment_file || throw("Could not open $alignment_file"); my $seq = ""; my $this_genomic_align; print "Reading $alignment_file...\n"; while (<F>) { next if (/^\s*$/); chomp; ## FASTA headers are defined in the Bio::EnsEMBL::Compara::Production::GenomicAlignBlock::Pecan ## module (or any other module you use to create this Pecan analysis job). Here is an example: ## >DnaFrag1234|X.10001-20000:-1 ## This will correspond to chromosome X, which has dnafrag_id 1234 and the region goes from ## position 10001 to 20000 on the reverse strand. if (/^>/) { if (/^>DnaFrag(\d+)\|(.+)\.(\d+)\-(\d+)\:(\-?1)$/) { if (defined($this_genomic_align) and $seq) { $this_genomic_align->aligned_sequence($seq); $this_genomic_align_block->add_GenomicAlign($this_genomic_align); } $this_genomic_align = new Bio::EnsEMBL::Compara::GenomicAlign; $this_genomic_align->dnafrag_id($1); $this_genomic_align->dnafrag_start($3); $this_genomic_align->dnafrag_end($4); $this_genomic_align->dnafrag_strand($5); $seq = ""; } else { throw("Error while parsing the FASTA header. It must start by \">DnaFrag#####\" where ##### is the dnafrag_id\n$_"); } } else { $seq .= $_; } } close F; $this_genomic_align->aligned_sequence($seq); $this_genomic_align_block->add_GenomicAlign($this_genomic_align); $self->output([$this_genomic_align_block]); } 1;