Raw content of Bio::EnsEMBL::Compara::Production::GenomicAlignBlock::DumpLargeNibForChains
#
# You may distribute this module under the same terms as perl itself
#
# POD documentation - main docs before the code
=pod
=head1 NAME
Bio::EnsEMBL::Compara::Production::GenomicAlignBlock::DumpLargeNibForChains
=cut
=head1 SYNOPSIS
=cut
=head1 DESCRIPTION
=cut
=head1 CONTACT
Abel Ureta-Vidal
=cut
=head1 APPENDIX
The rest of the documentation details each of the object methods.
Internal methods are usually preceded with a _
=cut
package Bio::EnsEMBL::Compara::Production::GenomicAlignBlock::DumpLargeNibForChains;
use strict;
use Bio::EnsEMBL::Compara::Production::DBSQL::DBAdaptor;;
use Bio::EnsEMBL::Utils::Exception;
use Time::HiRes qw(time gettimeofday tv_interval);
our @ISA = qw(Bio::EnsEMBL::Pipeline::RunnableDB);
my $DEFAULT_DUMP_MIN_SIZE = 11500000;
=head2 fetch_input
Title : fetch_input
Usage : $self->fetch_input
Function: Fetches input data for repeatmasker from the database
Returns : none
Args : none
=cut
sub fetch_input {
my( $self) = @_;
#create a Compara::DBAdaptor which shares the same DBI handle
#with $self->db (Hive DBAdaptor)
$self->{'comparaDBA'} = Bio::EnsEMBL::Compara::Production::DBSQL::DBAdaptor->new(-DBCONN=>$self->db->dbc);
$self->get_params($self->parameters);
$self->get_params($self->input_id);
throw("Missing dna_collection_name") unless($self->dna_collection_name);
unless ($self->dump_min_size) {
$self->dump_min_size($DEFAULT_DUMP_MIN_SIZE);
}
return 1;
}
sub run
{
my $self = shift;
$self->dumpNibFiles;
return 1;
}
sub write_output {
my( $self) = @_;
return 1;
}
##########################################
#
# getter/setter methods
#
##########################################
sub dna_collection_name {
my $self = shift;
$self->{'_dna_collection_name'} = shift if(@_);
return $self->{'_dna_collection_name'};
}
sub dump_min_size {
my $self = shift;
$self->{'_dump_min_size'} = shift if(@_);
return $self->{'_dump_min_size'};
}
##########################################
#
# internal methods
#
##########################################
sub get_params {
my $self = shift;
my $param_string = shift;
return unless($param_string);
print("parsing parameter string : ",$param_string,"\n");
my $params = eval($param_string);
return unless($params);
if(defined($params->{'dna_collection_name'})) {
$self->dna_collection_name($params->{'dna_collection_name'});
}
if(defined($params->{'dump_min_size'})) {
$self->dump_min_size($params->{'dump_min_size'});
}
return 1;
}
sub dumpNibFiles {
my $self = shift;
$self->{'comparaDBA'}->dbc->disconnect_when_inactive(1);
my $starttime = time();
my $dna_collection = $self->{'comparaDBA'}->get_DnaCollectionAdaptor->fetch_by_set_description($self->dna_collection_name);
my $dump_loc = $dna_collection->dump_loc;
unless (defined $dump_loc) {
throw("dump_loc directory is not defined, can not dump nib files\n");
}
foreach my $dna_object (@{$dna_collection->get_all_dna_objects}) {
if($dna_object->isa('Bio::EnsEMBL::Compara::Production::DnaFragChunkSet')) {
warn "At this point you should get DnaFragChunk objects not DnaFragChunkSet objects!\n";
next;
}
if($dna_object->isa('Bio::EnsEMBL::Compara::Production::DnaFragChunk')) {
next if ($dna_object->length <= $self->dump_min_size);
my $fastafile = "$dump_loc/". $dna_object->dnafrag->name . ".fa";
#$dna_object->dump_to_fasta_file($fastafile);
#use this version to solve problem of very large chromosomes eg opossum
$dna_object->dump_chunks_to_fasta_file($fastafile);
my $nibfile = "$dump_loc/". $dna_object->dnafrag->name . ".nib";
system("faToNib", "$fastafile", "$nibfile") and throw("Could not convert fasta file $fastafile to nib: $!\n");
unlink $fastafile;
$dna_object = undef;
}
}
if($self->debug){printf("%1.3f secs to dump nib for \"%s\" collection\n", (time()-$starttime), $self->collection_name);}
$self->{'comparaDBA'}->dbc->disconnect_when_inactive(0);
return 1;
}
1;