Bio::EnsEMBL::Analysis::Runnable
LowCoverageGenomeAlignment
Toolbar
Summary
Bio::EnsEMBL::Analysis::Runnable::LowCoverageGenomeAlignment
Package variables
No package variables defined.
Included modules
Bio::EnsEMBL::Analysis::Config::Compara
Data::Dumper
File::Basename
Inherit
Synopsis
my $runnable = new Bio::EnsEMBL::Analysis::Runnable::LowCoverageGenomeAlignment
(-workdir => $workdir,
-multi_fasta_file => "/path/to/mfa/file",
-tree_string => $tree_string,
-taxon_species_tree => "tree in Newick format"
-parameters => $parameters, (optional),
-options => $options);
$runnable->run;
my @output = @{$runnable->output};
Description
This module creates a new tree for those alignments which contain a segmental duplication. The module will runs treeBest where there are more than 3 sequences in an alignment, otherwise it will run semphy. This module is still under development.
Methods
Methods description
Arg [1] : -workdir => "/path/to/working/directory" Arg [2] : -multi_fasta_file => "/path/to/mfa/file" Arg [3] : -tree_string => $tree_string (optional) Arg [4] : -taxon_species_tree => $taxon_species_tree Arg [5] : -parameters => $parameters (optional) Arg [6] : -options => $options (optional)
Function : contruct a new Bio::EnsEMBL::Analysis::Runnable::LowCoverageGenomeAlignment
runnable
Returntype: Bio::EnsEMBL::Analysis::Runnable::LowCoverageGenomeAlignment
Exceptions: none
Example : |
Arg [1] : string: The command line argument of an exec() or system() call. Example : (system($command) == 0) || os_error_warning($command); Description: Evaluates an eventual operating system error in $! ($OS_ERROR) or the 16 bit wait(2) status word in $? ($CHILD_ERROR) after an exec() or system() call. It generates a warning message in both cases. Returntype : bool: true on success. Exceptions : none Caller : general Status : At Risk |
Arg [1] : Bio::EnsEMBL::Analysis::Runnable::LowCoverageGenomeAlignment Arg [2] : string, program name Description: run treebest if more than 3 sequences in the alignment else run semphy Returntype : none Exceptions : throws if the program in not executable or if the results file doesnt exist Example : Status : At risk |
Arg [1] : none Description: create and open a commandline for the program semphy Returntype : none Exceptions : throws if the program in not executable or if the results file doesnt exist Example : Status : At risk |
Arg [1] : none Description: create and open a commandline for the program treebest Returntype : none Exceptions : throws if the program in not executable or if the results file doesnt exist Example : Status : At risk |
Methods code
get_num_sequences | description | prev | next | Top |
sub get_num_sequences
{ my ($mfa) = @_;
my $cnt = 0;
open (MFA, "$mfa") || throw("Couldn't open $mfa");
while (<MFA>) {
$cnt++ if (/^>/);
}
close(MFA);
return $cnt; } |
sub multi_fasta_file
{ my $self = shift;
$self->{'_multi_fasta_file'} = shift if(@_);
return $self->{'_multi_fasta_file'}; } |
sub new
{ my ($class,@args) = @_;
my $self = $class->SUPER::new(@args);
my ($workdir, $multi_fasta_file, $tree_string, $taxon_species_tree, $parameters, $options) =
rearrange(['WORKDIR', 'MULTI_FASTA_FILE', 'TREE_STRING',
'TAXON_SPECIES_TREE', 'PARAMETERS', 'OPTIONS'], @args);
chdir $self->workdir;
$self->multi_fasta_file($multi_fasta_file) if (defined $multi_fasta_file);
$self->tree_string($tree_string) if (defined $tree_string);
$self->taxon_species_tree($taxon_species_tree) if (defined $taxon_species_tree);
$self->parameters($parameters) if (defined $parameters);
$self->options($options) if (defined $options);
return $self; } |
sub options
{ my $self = shift;
$self->{'_options'} = shift if(@_);
return $self->{'_options'}; } |
sub os_error_warning
{ my $command = shift;
my $warning = '';
$warning .= 'Encountered ';
if ($!) {
$warning .= "OS_ERROR\' $!\'";
} else {
$warning .= 'CHILD_ERROR ';
$warning .= 'Exit code: ';
$warning .= ($? >> 8);
$warning .= ' ';
$warning .= 'Signal: ';
$warning .= ($? & 127);
$warning .= ' ';
if ($? & 128) {
$warning .= ' ';
$warning .= 'Core dump ';
}
}
$warning .= " on following command:\n";
$warning .= $command;
warning($warning);
return 1; } |
sub parameters
{ my $self = shift;
$self->{'_parameters'} = shift if(@_);
return $self->{'_parameters'}; } |
sub rearrange_multi_fasta_file
{ my ($tree_string, $mfa_file) = @_;
my (@ordered_leaves) = $tree_string =~ /(seq[^:]+)/g;
my $ordered_names;
for (my $i = 0; $i < @ordered_leaves; $i++) {
my $item;
$item->{name} = ">" . $ordered_leaves[$i];
$item->{leaf} = $ordered_leaves[$i];
push @$ordered_names, $item;
}
my %mfa_data;
open(MFA, $mfa_file) || throw("Could not open mfa file <$mfa_file>");
my $name;
my $seq;
while (<MFA>) {
next if (/^\s*$/);
chomp;
if (/^>/) {
if ($name) {
$mfa_data{$name} = $seq;
$seq = "";
}
$name = $_;
} else {
$seq .= $_;
}
}
$mfa_data{$name} = $seq;
close(MFA);
open MFA, ">$mfa_file" || throw("Couldn't open $mfa_file");
foreach my $ordered_name (@$ordered_names) {
next if (!defined $mfa_data{$ordered_name->{name}});
print MFA ">".$ordered_name->{leaf} . "\n";
my $seq = $mfa_data{$ordered_name->{name}};
$seq =~ s/(.{80})/$1\n/g;
chomp $seq;
print MFA $seq,"\n";
}
close MFA;
}
1; } |
sub run_analysis
{ my ($self, $program) = @_;
my $num_sequences = get_num_sequences($self->multi_fasta_file);
if ($num_sequences < 4) {
$self->run_semphy_2x;
} else {
$self->run_treebest_2x;
}
return 1;
}
} |
sub run_semphy_2x
{ my ($self) = @_;
return if ($self->tree_string);
chdir $self->workdir;
my $tree_file = "output.$$.tree";
my $command = "$SEMPHY --treeoutputfile=" . $tree_file . " -a 4 --hky -J -H -S --ACGprob=0.300000,0.200000,0.200000 --sequence=" . $self->multi_fasta_file;
print "Running semphy $command\n";
unless (system($command) == 0) {
throw("semphy execution failed\n");
os_error_warning($command);
}
if (-e $tree_file) {
open(F, $tree_file) || throw("Could not open tree file <$tree_file>");
my ($newick) = <F>;
close(F);
$newick =~ s/[\r\n]+$//;
rearrange_multi_fasta_file($newick, $self->multi_fasta_file);
}
} |
sub run_treebest_2x
{ my ($self) = @_;
return if ($self->tree_string);
chdir $self->workdir;
my $tree_file = "output.$$.tree";
my $species_tree_file = "species_tree.$$.tree";
open(F, ">$species_tree_file") || throw("Couldn't open $species_tree_file for writing");
print (F $self->taxon_species_tree);
close(F);
my $command = "$TREEBEST phyml -Snf $species_tree_file " . $self->multi_fasta_file . " | $TREEBEST sdi -rs $species_tree_file - > $tree_file";
print "Running treebest $command\n";
unless (system($command) == 0) {
throw("treebest execution failed\n");
os_error_warning($command);
}
if (-e $tree_file) {
open(F, $tree_file) || throw("Could not open tree file <$tree_file>");
my $newick;
while (<F>) {
$newick .= $_;
}
close(F);
$newick =~ s/[\r\n]+$//;
rearrange_multi_fasta_file($newick, $self->multi_fasta_file);
} } |
sub taxon_species_tree
{ my $self = shift;
$self->{'_taxon_species_tree'} = shift if(@_);
return $self->{'_taxon_species_tree'}; } |
sub tree_string
{ my $self = shift;
$self->{'_tree_string'} = shift if(@_);
return $self->{'_tree_string'}; } |
General documentation
Questions regarding this module should be addressed to the Ensembl development list:
ensembl-dev@ebi.ac.uk