Bio::EnsEMBL::Compara::Production::GenomicAlignBlock
DumpDnaCollection
Toolbar
Summary
Bio::EnsEMBL::Compara::Production::GenomicAlignBlock::DumpDnaCollection
Package variables
Privates (from "my" definitions)
$DEFAULT_DUMP_MIN_SIZE = 11500000
Included modules
Time::HiRes qw ( time gettimeofday tv_interval )
Inherit
Synopsis
Description
Methods
create_ooc_file | No description | Code |
dna_collection_name | No description | Code |
dumpDnaFiles | No description | Code |
dumpNibFiles | No description | Code |
dump_dna | No description | Code |
dump_min_size | No description | Code |
dump_nib | No description | Code |
fetch_input | Description | Code |
get_params | No description | Code |
run | No description | Code |
write_output | No description | Code |
Methods description
Title : fetch_input Usage : $self->fetch_input Function: Fetches input data for repeatmasker from the database Returns : none Args : none |
Methods code
create_ooc_file | description | prev | next | Top |
sub create_ooc_file
{ my ($dir, $seq_region) = @_;
my $ooc_file = "$dir/$seq_region/5ooc";
if (!-e "$dir/$seq_region") {
mkdir("$dir/$seq_region")
or throw("Directory $dir/$seq_region cannot be created");
}
my $runnable = new Bio::EnsEMBL::Analysis::Runnable::Blat (
-database => "$dir/$seq_region.fa",
-query_type => "dnax",
-target_type => "dnax",
-options => "-ooc=$ooc_file -tileSize=5 -makeOoc=$ooc_file -mask=lower -qMask=lower");
$runnable->run;
return $ooc_file;
}
1; } |
sub dna_collection_name
{ my $self = shift;
$self->{'_dna_collection_name'} = shift if(@_);
return $self->{'_dna_collection_name'}; } |
sub dumpDnaFiles
{ my $self = shift;
$self->{'comparaDBA'}->dbc->disconnect_when_inactive(1);
my $starttime = time();
my $dna_collection = $self->{'comparaDBA'}->get_DnaCollectionAdaptor->fetch_by_set_description($self->dna_collection_name);
my $dump_loc = $dna_collection->dump_loc;
unless (defined $dump_loc) {
throw("dump_loc directory is not defined, can not dump nib files\n");
}
foreach my $dna_object (@{$dna_collection->get_all_dna_objects}) {
if($dna_object->isa('Bio::EnsEMBL::Compara::Production::DnaFragChunkSet')) {
my $first_dna_object = $dna_object->get_all_DnaFragChunks->[0];
my $chunk_array = $dna_object->get_all_DnaFragChunks;
my $name = $first_dna_object->dnafrag->name . "_" . $first_dna_object->seq_start . "_" . $first_dna_object->seq_end;
my $fastafile = "$dump_loc/". $name . ".fa";
if (-e $fastafile) {
unlink $fastafile
}
foreach my $chunk (@$chunk_array) {
$chunk->dump_to_fasta_file(">".$fastafile);
}
}
if($dna_object->isa('Bio::EnsEMBL::Compara::Production::DnaFragChunk')) {
next if ($dna_object->length <= $self->dump_min_size);
my $name = $dna_object->dnafrag->name . "_" . $dna_object->seq_start . "_" . $dna_object->seq_end;
my $fastafile = "$dump_loc/". $name . ".fa";
if (-e $fastafile) {
unlink $fastafile
}
$dna_object->dump_to_fasta_file(">".$fastafile);
}
$dna_object = undef;
}
if($self->debug){printf("%1.3f secs to dump nib for\" %s\" collection\n", (time()-$starttime), $self->collection_name);}
$self->{'comparaDBA'}->dbc->disconnect_when_inactive(0);
return 1;
}
} |
sub dumpNibFiles
{ my $self = shift;
$self->{'comparaDBA'}->dbc->disconnect_when_inactive(1);
my $starttime = time();
my $dna_collection = $self->{'comparaDBA'}->get_DnaCollectionAdaptor->fetch_by_set_description($self->dna_collection_name);
my $dump_loc = $dna_collection->dump_loc;
unless (defined $dump_loc) {
throw("dump_loc directory is not defined, can not dump nib files\n");
}
foreach my $dna_object (@{$dna_collection->get_all_dna_objects}) {
if($dna_object->isa('Bio::EnsEMBL::Compara::Production::DnaFragChunkSet')) {
warn "At this point you should get DnaFragChunk objects not DnaFragChunkSet objects!\n";
next;
}
if($dna_object->isa('Bio::EnsEMBL::Compara::Production::DnaFragChunk')) {
next if ($dna_object->length <= $self->dump_min_size);
my $nibfile = "$dump_loc/". $dna_object->dnafrag->name . ".nib";
next if (-e $nibfile);
my $fastafile = "$dump_loc/". $dna_object->dnafrag->name . ".fa";
$dna_object->dump_chunks_to_fasta_file($fastafile);
system("faToNib", "$fastafile", "$nibfile") and throw("Could not convert fasta file $fastafile to nib: $!\n");
unlink $fastafile;
$dna_object = undef;
}
}
if($self->debug){printf("%1.3f secs to dump nib for\" %s\" collection\n", (time()-$starttime), $self->collection_name);}
$self->{'comparaDBA'}->dbc->disconnect_when_inactive(0);
return 1; } |
sub dump_dna
{ my $self = shift;
$self->{'_dump_dna'} = shift if(@_);
return $self->{'_dump_dna'}; } |
sub dump_min_size
{ my $self = shift;
$self->{'_dump_min_size'} = shift if(@_);
return $self->{'_dump_min_size'};
}
} |
sub dump_nib
{ my $self = shift;
$self->{'_dump_nib'} = shift if(@_);
return $self->{'_dump_nib'}; } |
sub fetch_input
{ my( $self) = @_;
$self->{'comparaDBA'} = Bio::EnsEMBL::Compara::Production::DBSQL::DBAdaptor->new(-DBCONN=>$self->db->dbc);
$self->get_params($self->parameters);
$self->get_params($self->input_id);
throw("Missing dna_collection_name") unless($self->dna_collection_name);
unless ($self->dump_min_size) {
$self->dump_min_size($DEFAULT_DUMP_MIN_SIZE);
}
throw("Missing dump_nib or dump_ooc method or dump_dna") unless ($self->dump_nib || $self->dump_dna);
return 1; } |
sub get_params
{ my $self = shift;
my $param_string = shift;
return unless($param_string);
my $params = eval($param_string);
return unless($params);
if(defined($params->{'dna_collection_name'})) {
$self->dna_collection_name($params->{'dna_collection_name'});
}
if(defined($params->{'dump_min_size'})) {
$self->dump_min_size($params->{'dump_min_size'});
}
if(defined($params->{'dump_dna'})) {
$self->dump_dna($params->{'dump_dna'});
}
if(defined($params->{'dump_nib'})) {
$self->dump_nib($params->{'dump_nib'});
}
return 1; } |
sub run
{
my $self = shift;
if ($self->dump_nib) {
$self->dumpNibFiles;
}
if ($self->dump_dna) {
$self->dumpDnaFiles;
}
return 1; } |
sub write_output
{ my( $self) = @_;
return 1;
}
} |
General documentation
The rest of the documentation details each of the object methods.
Internal methods are usually preceded with a _