Raw content of XrefMapper::Methods::ExonerateBasic
# Base class that all other mapping methods inherit from
package XrefMapper::Methods::ExonerateBasic;
use strict;
use File::Basename;
use IPC::Open3;
# Path to exonerate executable
#my $exonerate_path = "/usr/local/ensembl/bin/exonerate-0.9.0";
my $exonerate_path = "/software/ensembl/bin/exonerate-1.4.0";
sub new {
my($class) = @_;
my $self ={};
bless $self,$class;
$self->jobcount(0);
return $self;
}
=head2 jobcount
Arg [1] : (optional)
Example : $mapper->jobcount(1004);
Description: Getter / Setter for number of jobs submitted.
Returntype : scalar
Exceptions : none
=cut
sub jobcount {
my ($self, $arg) = @_;
(defined $arg) &&
($self->{_jobcount} = $arg );
return $self->{_jobcount};
}
=head2 run
Arg[1] : Query filename to pass to exonerate; this should be the XREF file.
Arg[2] : Target filename to pass to exonerate; this should be the ENSEMBL file.
Example : none
Description: Run exonerate with default options.
Returntype : none
Exceptions : none
Caller : general
=cut
sub run() {
my ($self, $query, $target, $mapper) = @_;
my $name = $self->submit_exonerate($query, $target, $mapper, $self->options());
return $name;
}
=head2 options
Args : none
Example : none
Description: Options to pass to exonerate. Override as appropriate.
Returntype : List of strings.
Exceptions : none
Caller : general
=cut
sub options() {
return ();
}
=head2 submit_exonerate
Args : none
Example : none
Description: Submit a *single* mapping job array to exonerate.
Returntype : The name of the job array submitted to LSF. Can be used in depend job.
Exceptions : none
Caller : general
=cut
sub submit_exonerate {
# my ($self, $query, $target, $root_dir, $nofarm, @options) = @_;
my ($self, $query, $target, $mapper, @options) = @_;
my $root_dir = $mapper->core->dir;
print "query $query\n" if($mapper->verbose);
my $queryfile = basename($query);
print "target $target\n" if($mapper->verbose);
my $targetfile = basename($target);
my $prefix = $root_dir . "/" . basename($query);
$prefix =~ s/\.\w+$//;
my ($ensembl_type) = $prefix =~ /.*_(dna|peptide)$/; # dna or prot
my $options_str = join(" ", @options);
my $unique_name = $self->get_class_name() . "_" . time();
my $disk_space_needed = (stat($query))[7]+(stat($target))[7];
$disk_space_needed /= 1024000; # convert to MB
$disk_space_needed = int($disk_space_needed);
$disk_space_needed += 1;
# print "disk space needed = ".$disk_space_needed."\n";
my $num_jobs = calculate_num_jobs($query);
if(defined($mapper->nofarm)){
my $output = $self->get_class_name() . "_" . $ensembl_type . "_1.map";
my $cmd = < $root_dir/$output
EON
print "none farm command is $cmd\n" if($mapper->verbose);
system($cmd);
$self->jobcount(1);
return "nofarm";
}
# array features barf if just one job
if($num_jobs == 1){
$num_jobs++;
}
$self->jobcount($self->jobcount()+$num_jobs);
my $output = $self->get_class_name() . "_" . $ensembl_type . "_" . "\$LSB_JOBINDEX.map";
my @main_bsub = ( 'bsub', '-R' .'select[linux] -Rrusage[tmp='.$disk_space_needed.']', '-J' . $unique_name . "[1-$num_jobs]%200", '-o', "$prefix.%J-%I.out", '-e', "$prefix.%J-%I.err");
# Create actual execute script to be executed with LSF, and write to pipe
my $main_job = < $root_dir/$output
EOF
# now submit it
my $jobid = 0;
eval {
my $pid;
my $reader;
local *BSUB;
local *BSUB_READER;
if (($reader = open(BSUB_READER, '-|'))) {
while () {
if (/^Job <(\d+)> is submitted/) {
$jobid = $1;
print "LSF job ID for main mapping job: $jobid (job array with $num_jobs jobs)\n" if($mapper->verbose);
}
}
close(BSUB_READER);
} else {
die("Could not fork : $!\n") unless (defined($reader));
open(STDERR, ">&STDOUT");
if (($pid = open(BSUB, '|-'))) {
print BSUB $main_job;
close BSUB;
if ($? != 0) {
die("bsub exited with non-zero status - job not submitted\n");
}
} else {
if (defined($pid)) {
exec(@main_bsub);
die("Could not exec bsub : $!\n");
} else {
die("Could not fork : $!\n");
}
}
exit(0);
}
};
if ($@) {
# Something went wrong
warn("Job submission failed:\n$@\n");
return 0;
}
else{
# write details of job to database
my $command = "$exonerate_path $query $target --querychunkid \$LSB_JOBINDEX --querychunktotal $num_jobs --showvulgar false --showalignment FALSE --ryo ".
'"xref:%qi:%ti:%ei:%ql:%tl:%qab:%qae:%tab:%tae:%C:%s\n"'." $options_str | grep ".'"'."^xref".'"'." > $root_dir/$output";
my $sth = $mapper->xref->dbc->prepare("insert into process_status (status, date) values('mapping_submitted',now())");
$sth->execute();
$sth->finish;
my $insert = "insert into mapping (job_id, type, command_line, percent_query_cutoff, percent_target_cutoff, method, array_size) values($jobid, '$ensembl_type', '$command',".
$self->query_identity_threshold.", ".$self->target_identity_threshold.", '".$self->get_class_name()."', $num_jobs)";
$sth = $mapper->xref->dbc->prepare($insert);
$sth->execute;
$sth->finish;
$sth = $mapper->xref->dbc->prepare("insert into mapping_jobs (root_dir, map_file, status, out_file, err_file, array_number, job_id) values (?,?,?,?,?,?,?)");
for( my $i=1; $i<=$num_jobs; $i++){
my $map_file = $self->get_class_name()."_".$ensembl_type."_".$i.".map";
my $out_file = "xref_0_".$ensembl_type.".".$jobid."-".$i.".out";
my $err_file = "xref_0_".$ensembl_type.".".$jobid."-".$i.".err";
$sth->execute($root_dir, $map_file, 'SUBMITTED', $out_file, $err_file, $i, $jobid);
}
$sth->finish;
}
return $unique_name;
}
=head2 calculate_num_jobs
Args : Query file name
Example : none
Description: Calculate the number of LSF jobs to submit based on the size of the query file.
Returntype : The number of jobs.
Exceptions : none
Caller : general
=cut
sub calculate_num_jobs {
my $query = shift;
my $bytes_per_job = 250000;
my $size = (stat $query)[7];
if( $size == 0 ){ return 0 }
return int($size/$bytes_per_job) || 1;
}
# Get class name from fully-qualified object name
# e.g. return ExonerateBasic from XrefMapper::Methods::ExonerateBasic=HASH(Ox12113c0)
sub get_class_name() {
my $self = shift;
my $module_name = ref($self);
my @bits = split(/::/, $module_name);
return $bits[-1];
}
# Check if any .err files exist that have non-zero size;
# this indicates that something has gone wrong with the exonerate run
sub check_err {
my ($self, $dir) = @_;
foreach my $err (glob("$dir/*.err")) {
print "\n\n*** Warning: $err has non-zero size; may indicate problems with exonerate run\n\n\n" if (-s $err);
}
}
# Percentage identity that query (xref) must match to be considered.
sub query_identity_threshold {
return 0;
}
# Percentage identity that target (ensembl) must match to be considered.
sub target_identity_threshold {
return 0;
}
1;