Raw content of Bio::EnsEMBL::Analysis::Tools::BlastDB
package Bio::EnsEMBL::Analysis::Tools::BlastDB;
use strict;
no warnings;
use vars qw (@ISA);
@ISA = qw();
use Bio::EnsEMBL::Utils::Exception qw(verbose throw warning stack_trace_dump);
use Bio::EnsEMBL::Utils::Argument qw( rearrange );
use Bio::EnsEMBL::Analysis::Tools::Logger qw(logger_info);
use Bio::EnsEMBL::Analysis::Tools::Utilities qw(write_seqfile create_file_name);
sub new{
my ($class,@args) = @_;
my $self = bless {},$class;
my ($sequences, $sequence_file, $molecule_type, $blast_type, $format_command,
$output_dir)
= rearrange ([qw(SEQUENCES
SEQUENCE_FILE
MOL_TYPE
BLAST_TYPE
FORMAT_COMMAND
OUTPUT_DIR)], @args);
#default setting
$self->blast_type("wublast");
$self->output_dir("/tmp/");
###############
$self->sequences($sequences);
$self->sequence_file($sequence_file);
$self->molecule_type($molecule_type);
$self->blast_type($blast_type);
$self->format_command($format_command);
$self->output_dir($output_dir);
return $self;
}
sub sequences{
my ($self, $arg) = @_;
if($arg){
throw("BlastDB::sequences ".$arg." must be an array ref ")
unless(ref($arg) eq "ARRAY");
push(@{$self->{sequences}}, @{$arg});
}
return $self->{sequences};
}
sub sequence_file{
my ($self, $arg) = @_;
if($arg){
$self->{seq_file} = $arg;
}
if(!$self->{seq_file}){
$self->{seq_file} = create_file_name("blastdb", "fa", $self->temp_dir);
}
return $self->{seq_file};
}
sub molecule_type{
my ($self, $arg) = @_;
if($arg){
$arg = uc($arg);
throw($arg." must be either DNA or PROTEIN") unless($arg eq "DNA" ||
$arg eq "PROTEIN");
$self->{mol_type} = $arg;
}
return $self->{mol_type};
}
sub blast_type{
my ($self, $arg) = @_;
if($arg){
$arg = lc($arg);
throw($arg." must be either wublast, old_wublast or ncbi")
unless($arg eq "wublast" || $arg eq "old_wublast" || $arg eq "ncbi");
$self->{blast_type} = $arg;
}
return $self->{blast_type};
}
sub format_command{
my ($self, $arg) = @_;
if($arg){
$self->{format_command} = $arg;
}
return $self->{format_command};
}
sub output_dir{
my ($self, $arg) = @_;
if($arg){
throw($arg." must be a directory") unless(-d $arg);
$self->{output_dir} = $arg;
}
return $self->{output_dir};
}
sub temp_dir{
my ($self, $arg) = @_;
if($arg){
throw($arg." must be a directory") unless(-d $arg);
$self->{temp_dir} = $arg;
}
if(!$self->{temp_dir}){
my $id_num = $$;
my $blastdb_dir;
do {
$blastdb_dir = $self->output_dir . "/tempblast." . $id_num . "/";
$id_num++;
} while ( -d $blastdb_dir);
mkdir $blastdb_dir;
$self->{temp_dir} = $blastdb_dir;
}
return $self->{temp_dir};
}
sub create_blastdb{
my ($self, $seq_file, $format_command) = @_;
$seq_file = $self->sequence_file if(!$seq_file);
if(! -e $seq_file){
throw("Your seqfile ".$seq_file." does not exist you must have some ".
"seq objects defined to dump in it") unless($self->sequences);
write_seqfile($self->sequences, $seq_file);
}
$format_command = $self->format_command if(!$format_command);
if(!$format_command){
$format_command = $self->discover_command;
}
my $command = $self->format_command." ".$seq_file;
logger_info("Running ".$command);
open SAVEOUT, ">&STDOUT";
open SAVEERR, ">&STDERR";
open STDOUT, "/dev/null";
open STDERR, "/dev/null";
my $exit_status = system($command);
# restore STDOUT and STDERR
open STDOUT, ">&SAVEOUT";
open STDERR, ">&SAVEERR";
if ($exit_status) {
throw("Failed to run ".$command." exited with ".$exit_status);
} else {
return $seq_file;
}
}
sub discover_command{
my ($self) = @_;
if($self->blast_type eq "wublast"){
if($self->molecule_type eq "DNA"){
$self->format_command('xdformat -n -I');
}elsif($self->molecule_type eq "PROTEIN"){
$self->format_command('xdformat -p -I');
}else{
throw("Don't recognise mol type ".$self->molecule_type);
}
}elsif($self->blast_type eq "old_wublast"){
if($self->molecule_type eq "DNA"){
$self->format_command('pressdb');
}elsif($self->molecule_type eq "PROTEIN"){
$self->format_command('setdb');
}else{
throw("Don't recognise mol type ".$self->molecule_type);
}
}elsif($self->blast_type eq "ncbi"){
if($self->molecule_type eq "DNA"){
$self->format_command('formatdb -o -p F -i');
}elsif($self->molecule_type eq "PROTEIN"){
$self->format_command('formatdb -o -i ');
}else{
throw("Don't recognise mol type ".$self->molecule_type);
}
}
throw("Format command ".$self->format_command." must both be defined")
unless ($self->format_command);
return $self->format_command;
}
sub list_dbfiles{
my ($self) = @_;
if(!$self->{index_files}){
opendir (BLASTDB_DIR, $self->temp_dir) or throw("BlastDB: Failed to ".
"open ".$self->temp_dir);
my @dbfiles = readdir BLASTDB_DIR;
closedir BLASTDB_DIR;
my @full_path;
foreach my $dbfile(@dbfiles){
next if($dbfile eq "." || $dbfile eq "..");
my $full = $self->temp_dir."/".$dbfile;
push(@full_path, $full);
}
push(@full_path, $self->temp_dir);
$self->{index_files} = \@full_path;
}
return $self->{index_files};
}
1;