Raw content of Bio::EnsEMBL::Compara::Production::GenomicAlignBlock::BlastZ # # You may distribute this module under the same terms as perl itself # # POD documentation - main docs before the code =pod =head1 NAME Bio::EnsEMBL::Compara::RunnableDB::BlastZ =cut =head1 SYNOPSIS my $db = Bio::EnsEMBL::Compara::DBAdaptor->new($locator); my $repmask = Bio::EnsEMBL::Analysis::RunnableDB::BlastZ->new ( -db => $db, -input_id => $input_id -analysis => $analysis ); $repmask->fetch_input(); #reads from DB $repmask->run(); $repmask->output(); $repmask->write_output(); #writes to DB =cut =head1 DESCRIPTION This object wraps Bio::EnsEMBL::Analysis::Runnable::Blast to add functionality to read and write to databases. The appropriate Bio::EnsEMBL::Analysis object must be passed for extraction of appropriate parameters. A Bio::EnsEMBL::Pipeline::DBSQL::Obj is required for databse access. =cut =head1 CONTACT Describe contact details here =cut =head1 APPENDIX The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _ =cut package Bio::EnsEMBL::Compara::Production::GenomicAlignBlock::BlastZ; use strict; use Bio::EnsEMBL::Analysis::Runnable::Blastz; use Bio::EnsEMBL::Utils::Exception qw(throw warning); use Bio::EnsEMBL::Compara::Production::GenomicAlignBlock::PairAligner; our @ISA = qw(Bio::EnsEMBL::Compara::Production::GenomicAlignBlock::PairAligner); sub configure_defaults { my $self = shift; $self->options('T=2 H=2200'); $self->method_link_type('BLASTZ_RAW'); #Although this seems a good idea in principle, it takes longer and longer to #check as the genomic_align_block table gets longer. #$self->max_alignments('5000000'); $self->max_alignments('0'); return 0; } sub configure_runnable { my $self = shift; my (@db_chunk) = @{$self->db_DnaFragChunkSet->get_all_DnaFragChunks}; # # get the sequences and create the runnable # my $qyChunkFile; if($self->query_DnaFragChunkSet->count == 1) { my ($qy_chunk) = @{$self->query_DnaFragChunkSet->get_all_DnaFragChunks}; $qyChunkFile = $self->dumpChunkToWorkdir($qy_chunk); } else { $qyChunkFile = $self->dumpChunkSetToWorkdir($self->query_DnaFragChunkSet); } my @db_chunk_files; #if ($self->db_DnaFragChunkSet->count > 1) { #throw("blastz can not use more than 1 sequence in the database/target file.\n" . # "You may have specified a group_set_size in the target_dna_collection.\n" . # "In the case of blastz this should only be used for query_dna_collection"); #} foreach my $db_chunk (@{$self->db_DnaFragChunkSet->get_all_DnaFragChunks}) { push @db_chunk_files, $self->dumpChunkToWorkdir($db_chunk); } if (@db_chunk_files > 1) { warning("you have given a chunkset for the database; dumping individual chunks\n" . "and creating a runnable for each one"); } my $program = $self->analysis->program_file; $program = 'blastz' unless($program); if($self->debug) { print("running with analysis '".$self->analysis->logic_name."'\n"); print(" options : ", $self->options, "\n"); print(" program : $program\n"); } $self->delete_fasta_dumps_but_these([$qyChunkFile,@db_chunk_files]); foreach my $dbChunkFile (@db_chunk_files) { my $runnable = Bio::EnsEMBL::Analysis::Runnable::Blastz-> new( -query => $dbChunkFile, -database => $qyChunkFile, -options => $self->options, -program => $program, -analysis => $self->analysis, ); if($self->debug >1) { my ($fid) = $dbChunkFile =~ /([^\/]+)$/; $runnable->resultsfile($self->worker_temp_directory . "/results.$fid."); $runnable->results_to_file(1); # switch on whether to use pipe or /tmp file } $self->runnable($runnable); } # # # BIG WARNING!!!! I FLIPPED THE DB and Query above because it looks like # blastz flipped them in the parameter list from expected # # return 1; } 1;