Bio::EnsEMBL::Compara::Production::EPOanchors FilterAnchors
SummaryIncluded librariesPackage variablesSynopsisDescriptionGeneral documentationMethods
Toolbar
WebCvsRaw content
Summary
Bio::EnsEMBL::Compara::Production::GenomicAlignBlock::EPOanchors::FilterAnchors
Package variables
No package variables defined.
Included modules
Bio::EnsEMBL::Compara::MethodLinkSpeciesSet
Bio::EnsEMBL::Compara::Production::Anchors::AnchorAlign
Bio::EnsEMBL::Compara::Production::DBSQL::DBAdaptor
Bio::EnsEMBL::Hive::Process
Bio::EnsEMBL::Utils::Exception qw ( throw warning )
Data::Dumper
Inherit
Bio::EnsEMBL::Hive::Process
Synopsis
parameters
{input_analysis_id=> ?,method_link_species_set_id=> ?,test_method_link_species_set_id=> ?}
Description
There are 3 hard-coded filtering conditions for
anchor removal:
1). Anchors which hit > 5 dnafrags in any one genome.
2). Anchors which hit the same dnafrag > 10 in any one genome.
3). Anchors which hit any one genome > 20 times.
Methods
anchor_ids_with_zero_strand
No description
Code
anchors2remove
No description
Code
configure_defaults
No description
Code
fetch_input
No description
Code
genome_db_ids
No description
Code
get_params
No description
Code
input_analysis_id
No description
Code
max_number_of_dnafrags_hit
No description
Code
max_number_of_hits_to_any_one_genome
No description
Code
max_number_of_hits_to_same_dnafrag
No description
Code
method_link_species_set_id
No description
Code
run
No description
Code
test_method_link_species_set_id
No description
Code
write_output
No description
Code
Methods description
None available.
Methods code
anchor_ids_with_zero_stranddescriptionprevnextTop
sub anchor_ids_with_zero_strand {
	my $self = shift;
	if (@_) {
		$self->{anchor_ids_with_zero_strand} = shift;
	}
	return $self->{anchor_ids_with_zero_strand};
}
anchors2removedescriptionprevnextTop
sub anchors2remove {
	my $self = shift;
	if (@_) {
		$self->{anchors2remove} = shift;
	}
	return $self->{anchors2remove};
}
configure_defaultsdescriptionprevnextTop
sub configure_defaults {
  my $self = shift;
  $self->max_number_of_dnafrags_hit(MAX_NUMBER_OF_DNAFRAGS_HIT);
  $self->max_number_of_hits_to_same_dnafrag(MAX_NUMBER_OF_HITS_TO_SAME_DNAFRAG);
  $self->max_number_of_hits_to_any_one_genome(MAX_NUMBER_OF_HITS_TO_ANY_ONE_GENOME);
  return 0;
}
fetch_inputdescriptionprevnextTop
sub fetch_input {
  my ($self) = @_;

  $self->configure_defaults();

  #create a Compara::DBAdaptor which shares the same DBI handle
#with $self->db (Hive DBAdaptor)
$self->{'comparaDBA'} = Bio::EnsEMBL::Compara::Production::DBSQL::DBAdaptor->new(-DBCONN=>$self->db->dbc); $self->{'comparaDBA'}->dbc->disconnect_when_inactive(0); my $dnafrag_adaptor = $self->{'comparaDBA'}->get_DnaFragAdaptor(); my $anchor_align_adaptor = $self->{'comparaDBA'}->get_AnchorAlignAdaptor(); $self->get_params($self->parameters); $self->get_params($self->input_id); return 1;
}
genome_db_idsdescriptionprevnextTop
sub genome_db_ids {
	my $self = shift;
	if (@_) {
		$self->{genome_db_ids} = shift;
	}
	return $self->{genome_db_ids};
}

#sub method_link_type {
# my $self = shift;
# if (@_) {
# $self->{method_link_type} = shift;
# }
# return $self->{method_link_type};
#}
#
#sub input_anchor_id {
# my $self = shift;
# if (@_) {
# $self->{input_anchor_id} = shift;
# }
# return $self->{input_anchor_id};
#
}
get_paramsdescriptionprevnextTop
sub get_params {
  my $self         = shift;
  my $param_string = shift;

  return unless($param_string);
  print("parsing parameter string : ",$param_string,"\n");

  my $params = eval($param_string);
  return unless($params);

  if(defined($params->{'test_method_link_species_set_id'})) {
    $self->test_method_link_species_set_id($params->{'test_method_link_species_set_id'});
  }
  if(defined($params->{'method_link_species_set_id'})) {
	$self->method_link_species_set_id($params->{'method_link_species_set_id'});
  }
  if(defined($params->{'input_analysis_id'})) {
    $self->input_analysis_id($params->{'input_analysis_id'});
  }
  if(defined($params->{'genome_db_ids'})) {
    $self->genome_db_ids($params->{'genome_db_ids'});
  }
  
  if(defined($params->{'method_link_type'})) {
    $self->method_link_type($params->{'method_link_type'});
  }
  if(defined($params->{'input_anchor_id'})) { #same as anchor_id
$self->input_anchor_id($params->{'input_anchor_id'}); } if(defined($params->{'anchor_id'})) { #same as input_anchor_id
$self->input_anchor_id($params->{'anchor_id'}); } return 1; } #sub store_input {
# my $self = shift;
#
# if (@_) {
# $self->{store_input} = shift;
# }
#
# return $self->{store_input};
#}
1;
}
input_analysis_iddescriptionprevnextTop
sub input_analysis_id {
	my $self = shift;
	if (@_) {
		$self->{input_analysis_id} = shift;
	}
	return $self->{input_analysis_id};
}
max_number_of_dnafrags_hitdescriptionprevnextTop
sub max_number_of_dnafrags_hit {
	my $self = shift;
	if (@_) {
		$self->{max_number_of_dnafrags_hit} = shift;
	}
	return $self->{max_number_of_dnafrags_hit};
}
max_number_of_hits_to_any_one_genomedescriptionprevnextTop
sub max_number_of_hits_to_any_one_genome {
	my $self = shift;
	if (@_) {
		$self->{max_number_of_hits_to_any_one_genome} = shift;
	}
	return $self->{max_number_of_hits_to_any_one_genome};
}
max_number_of_hits_to_same_dnafragdescriptionprevnextTop
sub max_number_of_hits_to_same_dnafrag {
	my $self = shift;
	if (@_) {
		$self->{max_number_of_hits_to_same_dnafrag} = shift;
	}
	return $self->{max_number_of_hits_to_same_dnafrag};
}
method_link_species_set_iddescriptionprevnextTop
sub method_link_species_set_id {
	my $self = shift;
	if (@_) {
		$self->{method_link_species_set_id} = shift;
	}
	return $self->{method_link_species_set_id};
}
rundescriptionprevnextTop
sub run {
	my ($self) = @_;
	my $anchor_align_adaptor = $self->{'comparaDBA'}->get_AnchorAlignAdaptor();
	my(@palindromic_anchors, $all_anchors, %anchor_hits2genomes_and_dnafrags, %anchors2remove);
	$self->anchor_ids_with_zero_strand(
		$anchor_align_adaptor->fetch_all_anchors_with_zero_strand(
			$self->test_method_link_species_set_id));
	$all_anchors = $anchor_align_adaptor->fetch_all_anchor_ids_by_test_mlssid_and_genome_db_ids(
		$self->test_method_link_species_set_id, $self->genome_db_ids);
	foreach my $anchor (@{$all_anchors}) {
		my $dnafrags_and_genomedbs = $anchor_align_adaptor->fetch_dnafrag_and_genome_db_ids_by_test_mlssid(
			$self->test_method_link_species_set_id, $anchor->[0]);
		foreach my $genome_db_dnafrag(@{$dnafrags_and_genomedbs}) {
			$anchor_hits2genomes_and_dnafrags{$anchor->[0]}{$genome_db_dnafrag->[1]}{$genome_db_dnafrag->[0]}++;
		}
	}
	foreach my $anchor_id(sort keys %anchor_hits2genomes_and_dnafrags) {
		foreach my $genome_db_id(sort keys %{$anchor_hits2genomes_and_dnafrags{$anchor_id}}) {
			last if(exists($anchors2remove{$anchor_id}));
			my $num_hits_to_each_genome = 0;
			if(scalar(keys %{$anchor_hits2genomes_and_dnafrags{$anchor_id}{$genome_db_id}}) > $self->max_number_of_dnafrags_hit) {
				$anchors2remove{$anchor_id}++;
#				print "MAX_NUM_DNAFRAGS_HIT : $anchor_id\n";
last; } foreach my $dnafrag_id(%{$anchor_hits2genomes_and_dnafrags{$anchor_id}{$genome_db_id}}) { if($anchor_hits2genomes_and_dnafrags{$anchor_id}{$genome_db_id}{$dnafrag_id} > $self->max_number_of_hits_to_same_dnafrag) { $anchors2remove{$anchor_id}++; # print "MAX_NUM_HITS_TO_SAME_DNAFRAG : $anchor_id\n";
last; } else{ $num_hits_to_each_genome += $anchor_hits2genomes_and_dnafrags{$anchor_id}{$genome_db_id}{$dnafrag_id}; } } if($num_hits_to_each_genome > $self->max_number_of_hits_to_any_one_genome) { $anchors2remove{$anchor_id}++; # print "MAX_NUM_HITS_TO_ANY_ONE_GENOME : $anchor_id\n";
last; } } } $self->anchors2remove(\%anchors2remove); return 1;
}
test_method_link_species_set_iddescriptionprevnextTop
sub test_method_link_species_set_id {
	my $self = shift;
	if (@_) {
		$self->{test_method_link_species_set_id} = shift;
	}
	return $self->{test_method_link_species_set_id};
}
write_outputdescriptionprevnextTop
sub write_output {
	my ($self) = @_;
	my $anchor_align_adaptor = $self->{'comparaDBA'}->get_AnchorAlignAdaptor();
	print join(":", $self->input_analysis_id, $self->test_method_link_species_set_id), "\n";
	$anchor_align_adaptor->update_zero_strand_anchors($self->anchor_ids_with_zero_strand, 
		$self->input_analysis_id, $self->test_method_link_species_set_id);
	if(scalar (keys %{$self->anchors2remove})) {
		$anchor_align_adaptor->update_failed_anchor($self->anchors2remove, $self->input_analysis_id, $self->test_method_link_species_set_id);
	}			
	else{
		print "No anchors to remove\n";
	}
  return 1;
}
General documentation
CONTACTTop
ensembl-compara@ebi.ac.uk
APPENDIXTop
The rest of the documentation details each of the object methods.
Internal methods are usually preceded with a _