Bio::EnsEMBL::Compara::Production::EPOanchors
FilterAnchors
Toolbar
Summary
Bio::EnsEMBL::Compara::Production::GenomicAlignBlock::EPOanchors::FilterAnchors
Package variables
No package variables defined.
Included modules
Bio::EnsEMBL::Compara::Production::Anchors::AnchorAlign
Data::Dumper
Inherit
Synopsis
parameters
{input_analysis_id=> ?,method_link_species_set_id=> ?,test_method_link_species_set_id=> ?}
Description
There are 3 hard-coded filtering conditions for
anchor removal:
1). Anchors which hit > 5 dnafrags in any one genome.
2). Anchors which hit the same dnafrag > 10 in any one genome.
3). Anchors which hit any one genome > 20 times.
Methods
anchor_ids_with_zero_strand | No description | Code |
anchors2remove | No description | Code |
configure_defaults | No description | Code |
fetch_input | No description | Code |
genome_db_ids | No description | Code |
get_params | No description | Code |
input_analysis_id | No description | Code |
max_number_of_dnafrags_hit | No description | Code |
max_number_of_hits_to_any_one_genome | No description | Code |
max_number_of_hits_to_same_dnafrag | No description | Code |
method_link_species_set_id | No description | Code |
run | No description | Code |
test_method_link_species_set_id | No description | Code |
write_output | No description | Code |
Methods description
None available.
Methods code
anchor_ids_with_zero_strand | description | prev | next | Top |
sub anchor_ids_with_zero_strand
{ my $self = shift;
if (@_) {
$self->{anchor_ids_with_zero_strand} = shift;
}
return $self->{anchor_ids_with_zero_strand}; } |
sub anchors2remove
{ my $self = shift;
if (@_) {
$self->{anchors2remove} = shift;
}
return $self->{anchors2remove}; } |
sub configure_defaults
{ my $self = shift;
$self->max_number_of_dnafrags_hit(MAX_NUMBER_OF_DNAFRAGS_HIT);
$self->max_number_of_hits_to_same_dnafrag(MAX_NUMBER_OF_HITS_TO_SAME_DNAFRAG);
$self->max_number_of_hits_to_any_one_genome(MAX_NUMBER_OF_HITS_TO_ANY_ONE_GENOME);
return 0; } |
sub fetch_input
{ my ($self) = @_;
$self->configure_defaults();
$self->{'comparaDBA'} = Bio::EnsEMBL::Compara::Production::DBSQL::DBAdaptor->new(-DBCONN=>$self->db->dbc);
$self->{'comparaDBA'}->dbc->disconnect_when_inactive(0);
my $dnafrag_adaptor = $self->{'comparaDBA'}->get_DnaFragAdaptor();
my $anchor_align_adaptor = $self->{'comparaDBA'}->get_AnchorAlignAdaptor();
$self->get_params($self->parameters);
$self->get_params($self->input_id);
return 1; } |
sub genome_db_ids
{ my $self = shift;
if (@_) {
$self->{genome_db_ids} = shift;
}
return $self->{genome_db_ids};
}
} |
sub get_params
{ my $self = shift;
my $param_string = shift;
return unless($param_string);
print("parsing parameter string : ",$param_string,"\n");
my $params = eval($param_string);
return unless($params);
if(defined($params->{'test_method_link_species_set_id'})) {
$self->test_method_link_species_set_id($params->{'test_method_link_species_set_id'});
}
if(defined($params->{'method_link_species_set_id'})) {
$self->method_link_species_set_id($params->{'method_link_species_set_id'});
}
if(defined($params->{'input_analysis_id'})) {
$self->input_analysis_id($params->{'input_analysis_id'});
}
if(defined($params->{'genome_db_ids'})) {
$self->genome_db_ids($params->{'genome_db_ids'});
}
if(defined($params->{'method_link_type'})) {
$self->method_link_type($params->{'method_link_type'});
}
if(defined($params->{'input_anchor_id'})) { $self->input_anchor_id($params->{'input_anchor_id'});
}
if(defined($params->{'anchor_id'})) { $self->input_anchor_id($params->{'anchor_id'});
}
return 1;
}
1; } |
sub input_analysis_id
{ my $self = shift;
if (@_) {
$self->{input_analysis_id} = shift;
}
return $self->{input_analysis_id}; } |
sub max_number_of_dnafrags_hit
{ my $self = shift;
if (@_) {
$self->{max_number_of_dnafrags_hit} = shift;
}
return $self->{max_number_of_dnafrags_hit}; } |
max_number_of_hits_to_any_one_genome | description | prev | next | Top |
sub max_number_of_hits_to_any_one_genome
{ my $self = shift;
if (@_) {
$self->{max_number_of_hits_to_any_one_genome} = shift;
}
return $self->{max_number_of_hits_to_any_one_genome}; } |
max_number_of_hits_to_same_dnafrag | description | prev | next | Top |
sub max_number_of_hits_to_same_dnafrag
{ my $self = shift;
if (@_) {
$self->{max_number_of_hits_to_same_dnafrag} = shift;
}
return $self->{max_number_of_hits_to_same_dnafrag}; } |
sub method_link_species_set_id
{ my $self = shift;
if (@_) {
$self->{method_link_species_set_id} = shift;
}
return $self->{method_link_species_set_id}; } |
sub run
{
my ($self) = @_;
my $anchor_align_adaptor = $self->{'comparaDBA'}->get_AnchorAlignAdaptor();
my(@palindromic_anchors, $all_anchors, %anchor_hits2genomes_and_dnafrags, %anchors2remove);
$self->anchor_ids_with_zero_strand(
$anchor_align_adaptor->fetch_all_anchors_with_zero_strand(
$self->test_method_link_species_set_id));
$all_anchors = $anchor_align_adaptor->fetch_all_anchor_ids_by_test_mlssid_and_genome_db_ids(
$self->test_method_link_species_set_id, $self->genome_db_ids);
foreach my $anchor (@{$all_anchors}) {
my $dnafrags_and_genomedbs = $anchor_align_adaptor->fetch_dnafrag_and_genome_db_ids_by_test_mlssid(
$self->test_method_link_species_set_id, $anchor->[0]);
foreach my $genome_db_dnafrag(@{$dnafrags_and_genomedbs}) {
$anchor_hits2genomes_and_dnafrags{$anchor->[0]}{$genome_db_dnafrag->[1]}{$genome_db_dnafrag->[0]}++;
}
}
foreach my $anchor_id(sort keys %anchor_hits2genomes_and_dnafrags) {
foreach my $genome_db_id(sort keys %{$anchor_hits2genomes_and_dnafrags{$anchor_id}}) {
last if(exists($anchors2remove{$anchor_id}));
my $num_hits_to_each_genome = 0;
if(scalar(keys %{$anchor_hits2genomes_and_dnafrags{$anchor_id}{$genome_db_id}}) > $self->max_number_of_dnafrags_hit) {
$anchors2remove{$anchor_id}++;
last;
}
foreach my $dnafrag_id(%{$anchor_hits2genomes_and_dnafrags{$anchor_id}{$genome_db_id}}) {
if($anchor_hits2genomes_and_dnafrags{$anchor_id}{$genome_db_id}{$dnafrag_id} > $self->max_number_of_hits_to_same_dnafrag) {
$anchors2remove{$anchor_id}++;
last;
}
else{
$num_hits_to_each_genome += $anchor_hits2genomes_and_dnafrags{$anchor_id}{$genome_db_id}{$dnafrag_id};
}
}
if($num_hits_to_each_genome > $self->max_number_of_hits_to_any_one_genome) {
$anchors2remove{$anchor_id}++;
last;
}
}
}
$self->anchors2remove(\%anchors2remove);
return 1; } |
sub test_method_link_species_set_id
{ my $self = shift;
if (@_) {
$self->{test_method_link_species_set_id} = shift;
}
return $self->{test_method_link_species_set_id}; } |
sub write_output
{ my ($self) = @_;
my $anchor_align_adaptor = $self->{'comparaDBA'}->get_AnchorAlignAdaptor();
print join(":", $self->input_analysis_id, $self->test_method_link_species_set_id), "\n";
$anchor_align_adaptor->update_zero_strand_anchors($self->anchor_ids_with_zero_strand,
$self->input_analysis_id, $self->test_method_link_species_set_id);
if(scalar (keys %{$self->anchors2remove})) {
$anchor_align_adaptor->update_failed_anchor($self->anchors2remove, $self->input_analysis_id, $self->test_method_link_species_set_id);
}
else{
print "No anchors to remove\n";
}
return 1; } |
General documentation
The rest of the documentation details each of the object methods.
Internal methods are usually preceded with a _