Raw content of Bio::EnsEMBL::Compara::Production::EPOanchors::RemoveAnchorOverlaps # You may distribute this module under the same terms as perl itself # POD documentation - main docs before the code =pod =head1 NAME Bio::EnsEMBL::Compara::Production::EPOanchors::RemoveAnchorOverlaps =cut =head1 SYNOPSIS parameters {input_analysis_id=> ?,method_link_species_set_id=> ?,test_method_link_species_set_id=> ?, genome_db_ids => [?],} =cut =head1 DESCRIPTION Removes the minimum number of overlappping anchors. =cut =head1 CONTACT ensembl-compara@ebi.ac.uk =cut =head1 APPENDIX The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _ =cut package Bio::EnsEMBL::Compara::Production::EPOanchors::RemoveAnchorOverlaps; use strict; use Data::Dumper; use Bio::EnsEMBL::Compara::Production::DBSQL::DBAdaptor; use Bio::EnsEMBL::Hive::Process; use Bio::EnsEMBL::Compara::MethodLinkSpeciesSet; use Bio::EnsEMBL::Compara::Production::EPOanchors::AnchorAlign; use Bio::EnsEMBL::Utils::Exception qw(throw warning); our @ISA = qw(Bio::EnsEMBL::Hive::Process); sub configure_defaults { my $self = shift; return 1; } sub fetch_input { my ($self) = @_; $self->configure_defaults(); $self->{'comparaDBA'} = Bio::EnsEMBL::Compara::Production::DBSQL::DBAdaptor->new(-DBCONN=>$self->db->dbc); $self->{'comparaDBA'}->dbc->disconnect_when_inactive(0); $self->get_params($self->parameters); return 1; } sub run { my ($self) = @_; my $anchor_align_adaptor = $self->{'comparaDBA'}->get_AnchorAlignAdaptor(); my $dnafrag_ids = $anchor_align_adaptor->fetch_all_dnafrag_ids($self->test_method_link_species_set_id); my (%Overlappping_anchors, %Anchors_2_remove, %Scores); my $test_mlssid = $self->test_method_link_species_set_id; foreach my $genome_db_id(sort keys %{$dnafrag_ids}) { my %genome_db_dnafrags; foreach my $genome_db_anchors(@{ $anchor_align_adaptor->fetch_all_anchors_by_genome_db_id_and_mlssid( $genome_db_id, $test_mlssid) }) { push(@{ $genome_db_dnafrags{ $genome_db_anchors->[0] } }, [ @{ $genome_db_anchors }[1..4] ]); } foreach my $dnafrag_id(sort keys %genome_db_dnafrags) { my @Dnafrag_anchors = @{ $genome_db_dnafrags{$dnafrag_id} }; for(my$i=0;$i<@Dnafrag_anchors-1;$i++) { #count number of overlaps an anchor has at every position to which it maps for(my$j=$i+1;$j<@Dnafrag_anchors;$j++) { if($Dnafrag_anchors[$i]->[3] >= $Dnafrag_anchors[$j]->[2]) { $Overlappping_anchors{$Dnafrag_anchors[$i]->[1]}{$Dnafrag_anchors[$j]->[1]}++; $Overlappping_anchors{$Dnafrag_anchors[$j]->[1]}{$Dnafrag_anchors[$i]->[1]}++; } else { splice(@Dnafrag_anchors, $i, 1); $i--; last; } } } } } foreach my$anchor(sort keys %Overlappping_anchors) { foreach my $overlapping_anchor(sort keys %{$Overlappping_anchors{$anchor}}) { $Scores{$anchor} += ($Overlappping_anchors{$anchor}{$overlapping_anchor})**2; #score the anchors according to the number of overlaps } } print STDERR "scores: ", scalar(keys %Scores), "\n"; my$flag = 1; while($flag) { $flag = 0; foreach my $anchor(sort {$Scores{$b} <=> $Scores{$a}} keys %Scores) { #get highest scoring anchor next unless(exists($Scores{$anchor})); #don't add it to "remove list" if it's already gone from the score hash foreach my $anc_with_overlap_2_anchor(sort keys %{$Overlappping_anchors{$anchor}}) { #find all the anchors which overlap this anchor $Scores{$anc_with_overlap_2_anchor} -= ($Overlappping_anchors{$anc_with_overlap_2_anchor}{$anchor})**2; #decrement the score delete $Scores{$anc_with_overlap_2_anchor} unless($Scores{$anc_with_overlap_2_anchor}); #if score is zero remove this anchor from the overlapping list, delete($Overlappping_anchors{$anc_with_overlap_2_anchor}{$anchor}); #remove high scoring anchor from hash of overlaps } delete($Overlappping_anchors{$anchor}); #remove high scoring anchor from hash delete($Scores{$anchor}); #also remove it from scoring hash $Anchors_2_remove{$anchor}++; #add it to list of ancs to remove } $flag = 1 if (scalar(keys %Scores)); } print STDERR "anchors to remove: ", scalar(keys %Anchors_2_remove), "\n"; $self->overlapping_ancs_to_remove(\%Anchors_2_remove); return 1; } sub write_output { my ($self) = @_; my $anchor_align_adaptor = $self->{'comparaDBA'}->get_AnchorAlignAdaptor(); my $current_analysis_id = $self->input_analysis_id(); my $Anchors_2_remove = $self->overlapping_ancs_to_remove(); my $test_mlssid = $self->test_method_link_species_set_id(); $anchor_align_adaptor->update_failed_anchor($Anchors_2_remove, $current_analysis_id, $test_mlssid); return 1; } sub test_method_link_species_set_id { my $self = shift; if (@_) { $self->{test_method_link_species_set_id} = shift; } return $self->{test_method_link_species_set_id}; } sub overlapping_ancs_to_remove { my $self = shift; if (@_) { $self->{overlapping_ancs_to_remove} = shift; } return $self->{overlapping_ancs_to_remove}; } sub genome_db_ids { my $self = shift; if (@_) { $self->{genome_db_ids} = shift; } return $self->{genome_db_ids}; } sub method_link_species_set_id { my $self = shift; if (@_) { $self->{method_link_species_set_id} = shift; } return $self->{method_link_species_set_id}; } sub input_analysis_id { my $self = shift; if (@_) { $self->{input_analysis_id} = shift; } return $self->{input_analysis_id}; } sub get_params { my $self = shift; my $param_string = shift; return unless($param_string); print("parsing parameter string : ",$param_string,"\n"); my $params = eval($param_string); return unless($params); if(defined($params->{'genome_db_ids'})) { $self->genome_db_ids($params->{'genome_db_ids'}); } if(defined($params->{'test_method_link_species_set_id'})) { $self->test_method_link_species_set_id($params->{'test_method_link_species_set_id'}); } if(defined($params->{'method_link_species_set_id'})) { $self->method_link_species_set_id($params->{'method_link_species_set_id'}); } if(defined($params->{'input_analysis_id'})) { $self->input_analysis_id($params->{'input_analysis_id'}); } return 1; } #sub store_input { # my $self = shift; # # if (@_) { # $self->{_store_input} = shift; # } # # return $self->{_store_input}; #} # #sub store_output { # my $self = shift; # # if (@_) { # $self->{_store_output} = shift; # } # # return $self->{_store_output}; #} 1;