Bio::EnsEMBL::Compara::Production::GenomicAlignBlock
FilterStack
Toolbar
Summary
Bio::EnsEMBL::Compara::Production::GenomicAlignBlock::FilterStack
Package variables
No package variables defined.
Included modules
Bio::EnsEMBL::Pipeline::RunnableDB
Time::HiRes qw ( time gettimeofday tv_interval )
Inherit
Bio::EnsEMBL::Pipeline::RunnableDB
Synopsis
my $db = Bio::EnsEMBL::Compara::DBAdaptor->new($locator);
my $runnable = Bio::EnsEMBL::Pipeline::RunnableDB::FilterStack->new (
-db => $db,
-input_id => $input_id
-analysis => $analysis );
$runnable->fetch_input(); #reads from DB
$runnable->run();
$runnable->write_output(); #writes to DB
Description
This is designed to run after Chaining and Netting has been done on a PairWise analysis. It will perform a sort of pseudo softmasking on regions of large numbers of overlapping genomic_aligns on the non-reference species. It marks regions where there are more than "threshold" genomic_aligns and will remove all genomic_aligns and genomic_align_blocks that are entirely within this region. It takes as input (on the input_id string).
Methods
assign_jobID_to_genomic_align_block | No description | Code |
delete_alignments | No description | Code |
fetch_input | Description | Code |
filter_stack | No description | Code |
find_stack_coverage | No description | Code |
get_params | No description | Code |
print_gab | No description | Code |
print_params | No description | Code |
run | No description | Code |
write_output | No description | Code |
Methods description
Title : fetch_input Usage : $self->fetch_input Function: prepares global variables and DB connections Returns : none Args : none |
Methods code
assign_jobID_to_genomic_align_block | description | prev | next | Top |
sub assign_jobID_to_genomic_align_block
{ my $self = shift;
my $gab = shift;
my $sql = "SELECT analysis_job_id FROM genomic_align_block_job_track ".
"WHERE genomic_align_block_id=?";
my $sth = $self->{'comparaDBA'}->prepare($sql);
$sth->execute($gab->dbID);
my ($job_id) = $sth->fetchrow_array();
$sth->finish;
$gab->{'analysis_job_id'} = $job_id; } |
sub delete_alignments
{ my ($self, $GAB_DBA, $mlss, $delete_group) = @_;
my @del_list;
foreach my $group_id (keys %{$delete_group}) {
my $group_gabs = $GAB_DBA->fetch_all_by_MethodLinkSpeciesSet_GroupID($mlss, $group_id);
if (@$group_gabs == @{$delete_group->{$group_id}}) {
my $error = 0;
@$group_gabs = sort {$a->dbID<=>$b->dbID} @$group_gabs;
@{$delete_group->{$group_id}} = sort {$a->dbID<=>$b->dbID} @{$delete_group->{$group_id}};
for (my $i = 0; $i < @$group_gabs; $i++) {
if ($group_gabs->[$i]->dbID != $delete_group->{$group_id}->[$i]->dbID) {
warn "Inconsistent genomic_align_blocks in group $group_id\n ";
$error = 1;
}
}
if (!$error) {
push @del_list, @$group_gabs;
}
}
}
if (@del_list > 0) {
my @gab_ids = map {$_->dbID} @del_list;
my $sql_genomic_align = "DELETE FROM genomic_align WHERE genomic_align_block_id IN (" . join(",", @gab_ids) . ");";
my $sql_genomic_align_block = "DELETE FROM genomic_align_block WHERE genomic_align_block_id in (" . join(",", @gab_ids) . ");";
my $sth_genomic_align = $self->{'comparaDBA'}->prepare($sql_genomic_align);
$sth_genomic_align->execute;
$sth_genomic_align->finish;
my $sth_genomic_align_block = $self->{'comparaDBA'}->prepare($sql_genomic_align_block);
$sth_genomic_align_block->execute;
$sth_genomic_align->execute;
}
printf("%d gabs to delete\n", scalar(@del_list)); } |
sub fetch_input
{ my( $self) = @_;
$self->{'threshold'} = undef;
$self->{'method_link_species_set_id'} = undef;
$self->{'options'} = undef;
$self->{'height'} = 40;
$self->debug(0);
$self->get_params($self->parameters);
$self->get_params($self->input_id);
$self->print_params;
$self->{'comparaDBA'} = Bio::EnsEMBL::Compara::Production::DBSQL::DBAdaptor->new(-DBCONN => $self->db->dbc);
$self->{'comparaDBA'}->dbc->disconnect_when_inactive(0);
throw("must specify 'collection_name' to identify DnaCollection")
unless(defined($self->{'collection_name'}));
$self->{'collection'} = $self->{'comparaDBA'}->get_DnaCollectionAdaptor->fetch_by_set_description($self->{'collection_name'});
throw("unable to find DnaCollection with name : ". $self->{'collection_name'})
unless(defined($self->{'collection'}));
return 1; } |
sub filter_stack
{ my $self = shift;
my $height = $self->{'height'};
my $dna_collection = $self->{'collection'};
$self->{'delete_group'} = {}; my $mlss = $self->{'comparaDBA'}->get_MethodLinkSpeciesSetAdaptor->fetch_by_method_link_type_genome_db_ids($self->{'method_link'}, [$self->{'query_genome_db_id'}, $self->{'target_genome_db_id'}]);
my $GAB_DBA = $self->{'comparaDBA'}->get_GenomicAlignBlockAdaptor;
my $dna_list = $dna_collection->get_all_dna_objects;
foreach my $dna_object (@{$dna_list}) {
my $dnafrag_chunk_array;
if ($dna_object->isa('Bio::EnsEMBL::Compara::Production::DnaFragChunkSet')) {
$dnafrag_chunk_array = $dna_object->get_all_DnaFragChunks;
} else {
$dnafrag_chunk_array = [$dna_object];
}
foreach my $new_dna_object (@$dnafrag_chunk_array) {
my $delete_group = {};
my $dnafrag = $new_dna_object->dnafrag;
my $genomic_align_block_list = $GAB_DBA->fetch_all_by_MethodLinkSpeciesSet_DnaFrag($mlss, $dnafrag);
if ($self->debug) {
foreach my $gab (@{$genomic_align_block_list}) {
$self->assign_jobID_to_genomic_align_block($gab);
}
}
@$genomic_align_block_list = sort {
$a->reference_genomic_align->dnafrag_start <=>
$b->reference_genomic_align->dnafrag_start} @$genomic_align_block_list;
my $max_end = 0;
my $blocks = [];
foreach my $this_genomic_align_block (@$genomic_align_block_list) {
if ($this_genomic_align_block->reference_genomic_align->dnafrag_start > $max_end) {
if (@$blocks > 0) {
$delete_group = $self->find_stack_coverage($blocks, $height, $delete_group);
}
$blocks = [];
}
push(@$blocks, $this_genomic_align_block);
if ($this_genomic_align_block->reference_genomic_align->dnafrag_end > $max_end) {
$max_end = $this_genomic_align_block->reference_genomic_align->dnafrag_end;
}
}
if (@$blocks > 0) {
$delete_group = $self->find_stack_coverage($blocks, $height, $delete_group);
}
$self->delete_alignments($GAB_DBA, $mlss, $delete_group);
}
} } |
sub find_stack_coverage
{ my ($self, $blocks, $height, $delete_group) = @_;
my ($min_start, $max_end);
foreach my $this_block (@$blocks) {
if (!$min_start or $min_start > $this_block->reference_genomic_align->dnafrag_start) {
$min_start = $this_block->reference_genomic_align->dnafrag_start;
}
if (!$max_end or $max_end < $this_block->reference_genomic_align->dnafrag_end) {
$max_end = $this_block->reference_genomic_align->dnafrag_end;
}
}
my @coverage_by_pos;
foreach my $this_block (@$blocks) {
my $start = $this_block->reference_genomic_align->dnafrag_start;
my $end = $this_block->reference_genomic_align->dnafrag_end;
for (my $a = $start; $a <= $end; $a++) {
$coverage_by_pos[$a-$min_start]++;
}
}
my $threshold_limits;
my $threshold_limit;
for (my $a = 0; $a < @coverage_by_pos; $a++) {
$coverage_by_pos[$a] ||= 0;
if ($coverage_by_pos[$a] > $height) {
if (!defined $threshold_limit->{min}) {
$threshold_limit->{min} = $a+$min_start;
}
} else {
if (defined($threshold_limit->{min})) {
$threshold_limit->{max} = $a+$min_start-1;
push @$threshold_limits, $threshold_limit;
undef($threshold_limit);
}
}
}
if (defined($threshold_limit->{min}) && !defined($threshold_limit->{max} && $coverage_by_pos[-1] > $height)) {
$threshold_limit->{max} = @coverage_by_pos+$min_start-1;
push @$threshold_limits, $threshold_limit;
undef($threshold_limit);
}
foreach my $limit (@$threshold_limits) {
foreach my $this_block (@$blocks) {
my $start = $this_block->reference_genomic_align->dnafrag_start;
my $end = $this_block->reference_genomic_align->dnafrag_end;
if ($start >= $limit->{min} && $end <= $limit->{max}) {
push @{$delete_group->{$this_block->group_id}}, $this_block;
}
}
}
return $delete_group; } |
sub get_params
{ my $self = shift;
my $param_string = shift;
return unless($param_string);
print("parsing parameter string : ",$param_string,"\n");
my $params = eval($param_string);
return unless($params);
foreach my $key (keys %$params) {
print(" $key : ", $params->{$key}, "\n");
}
$self->{'method_link'} = $params->{'method_link'}
if(defined($params->{'method_link'}));
$self->{'query_genome_db_id'} = $params->{'query_genome_db_id'}
if(defined($params->{'query_genome_db_id'}));
$self->{'target_genome_db_id'} = $params->{'target_genome_db_id'}
if(defined($params->{'target_genome_db_id'}));
$self->{'height'} = $params->{'height'}
if(defined($params->{'height'}));
$self->{'collection_name'} = $params->{'collection_name'}
if(defined($params->{'collection_name'}));
return; } |
sub print_gab
{ my $gab = shift;
my ($gab_1, $gab_2) = @{$gab->genomic_align_array};
printf(" id(%d) %s:(%d)%d-%d %s:(%d)%d-%d score=%d\n",
$gab->dbID,
$gab_1->dnafrag->name, $gab_1->dnafrag_strand, $gab_1->dnafrag_start, $gab_1->dnafrag_end,
$gab_2->dnafrag->name, $gab_2->dnafrag_strand, $gab_2->dnafrag_start, $gab_2->dnafrag_end,
$gab->score);
}
1; } |
sub print_params
{ my $self = shift;
print(" params:\n");
print(" method_link : ", $self->{'method_link'},"\n");
print(" query_genome_db_id : ", $self->{'query_genome_db_id'},"\n");
print(" target_genome_db_id : ", $self->{'target_genome_db_id'},"\n");
print(" height : ", $self->{'height'},"\n");
print(" collection_name : ", $self->{'collection_name'},"\n"); } |
sub run
{
my $self = shift;
$self->filter_stack;
return 1; } |
sub write_output
{
my $self = shift;
my $output_id = $self->input_id;
print("output_id = $output_id\n");
$self->input_id($output_id);
return 1;
}
} |
General documentation
Describe contact details here
The rest of the documentation details each of the object methods.
Internal methods are usually preceded with a _