Bio::EnsEMBL::Compara::Production::EPOanchors TrimStoreAnchors
SummaryIncluded librariesPackage variablesSynopsisDescriptionGeneral documentationMethods
Toolbar
WebCvsRaw content
Summary
Bio::EnsEMBL::Compara::Production::EPOanchors::TrimStoreAnchors
Package variables
No package variables defined.
Included modules
Bio::EnsEMBL::Analysis
Bio::EnsEMBL::Compara::DBSQL::DBAdaptor
Bio::EnsEMBL::Compara::Production::DBSQL::DBAdaptor
Bio::EnsEMBL::Hive::Process
Bio::EnsEMBL::Registry
Bio::EnsEMBL::Utils::Exception qw ( throw warning )
Data::Dumper
Inherit
Bio::EnsEMBL::Hive::Process
Synopsis
$exonate_anchors->fetch_input();
$exonate_anchors->run();
$exonate_anchors->write_output(); writes to database
Description
Given a database with anchor sequences and a target genome. This modules exonerates
the anchors against the target genome. The required information (anchor batch size,
target genome file, exonerate parameters are provided by the analysis, analysis_job
and analysis_data tables
This modules is part of the Ensembl project
Email compara@ebi.ac.uk
Methods
N_cut_off_ratio
No description
Code
analysis_data_id
No description
Code
configure_defaults
No description
Code
fetch_input
No description
Code
gab_batch_size
No description
Code
get_input_id
No description
Code
get_parameters
No description
Code
method_link_species_set_id
No description
Code
previous_mlssid
No description
Code
run
No description
Code
trim_and_store_analysis_id
No description
Code
write_output
No description
Code
Methods description
None available.
Methods code
N_cut_off_ratiodescriptionprevnextTop
sub N_cut_off_ratio {
	my $self = shift;
	if (@_) {
		$self->{_N_cut_off_ratio} = shift;
	}
	return $self->{_N_cut_off_ratio};
}
analysis_data_iddescriptionprevnextTop
sub analysis_data_id {
	my $self = shift;
	if (@_) {
		$self->{_analysis_data_id} = shift;
	}
	return $self->{_analysis_data_id};
}
configure_defaultsdescriptionprevnextTop
sub configure_defaults {
 	my $self = shift;
	$self->gab_batch_size(1000);
	$self->N_cut_off_ratio(0.1);
  	return 1;
}
fetch_inputdescriptionprevnextTop
sub fetch_input {
	my ($self) = @_;
	$self->configure_defaults();
	$self->get_parameters($self->parameters);
	#create a Compara::DBAdaptor which shares the same DBI handle with $self->db (Hive DBAdaptor)
$self->{'comparaDBA'} = Bio::EnsEMBL::Compara::Production::DBSQL::DBAdaptor->new(-DBCONN=>$self->db->dbc) or die "cant connect\n"; $self->{'comparaDBA'}->dbc->disconnect_if_idle(); $self->{'hiveDBA'} = Bio::EnsEMBL::Hive::DBSQL::DBAdaptor->new(-DBCONN => $self->{'comparaDBA'}->dbc) or die "cant connect\n"; $self->{'hiveDBA'}->dbc->disconnect_if_idle(); return 1;
}
gab_batch_sizedescriptionprevnextTop
sub gab_batch_size {
	my $self = shift;
	if (@_) {
		$self->{_gab_batch_size} = shift;
	}
	return $self->{_gab_batch_size};
}
get_input_iddescriptionprevnextTop
sub get_input_id {
	my $self = shift;
	my $input_id_string = shift;

	return unless($input_id_string);
	print("parsing input_id string : ",$input_id_string,"\n");
	return 1;
}

1;
}
get_parametersdescriptionprevnextTop
sub get_parameters {
	my $self = shift;
	my $param_string = shift;
	
	return unless($param_string);
	my $params = eval($param_string);
	if(defined($params->{'method_link_species_set_id'})) {
		$self->method_link_species_set_id($params->{'method_link_species_set_id'});
	}
	if(defined($params->{'analysis_data_id'})) {
		$self->analysis_data_id($params->{'analysis_data_id'});
	}
	if(defined($params->{'previous_mlssid'})) {
		$self->previous_mlssid($params->{'previous_mlssid'});
	}
}
method_link_species_set_iddescriptionprevnextTop
sub method_link_species_set_id {
	my $self = shift;
	if (@_){
		$self->{_method_link_species_set_id} = shift;
	}
	return $self->{_method_link_species_set_id};
}
previous_mlssiddescriptionprevnextTop
sub previous_mlssid {
 #this should be the Gerp mlssid	my $self = shift;
if (@_) { $self->{_previous_mlssid} = shift; } return $self->{_previous_mlssid};
}
rundescriptionprevnextTop
sub run {
	my ($self) = @_;
	#find all genomic_align_blocks with the minimum size and minimum number of seqs
my %sql_statements = ( select_gerp_blocks => "SELECT genomic_align_block_id FROM genomic_align_block WHERE length >= ? AND method_link_species_set_id = ?", select_analysis_data => "SELECT data FROM analysis_data WHERE analysis_data_id = ?", select_genomic_aligns => "SELECT genomic_align_id, dnafrag_id, dnafrag_start, dnafrag_end, dnafrag_strand FROM genomic_align WHERE genomic_align_block_id = ?", select_genome_db_ids => "SELECT ss.genome_db_id, gdb.name FROM genome_db gdb INNER JOIN species_set ss ON gdb.genome_db_id = ss.genome_db_id WHERE species_set_id = (SELECT species_set_id FROM method_link_species_set WHERE method_link_species_set_id = ?)", select_dnafrag_info => "SELECT df.genome_db_id, df.dnafrag_id, df.name, df.coord_system_name FROM dnafrag df INNER JOIN genomic_align ga ON df.dnafrag_id = ga.dnafrag_id INNER JOIN genomic_align_block gab ON ga.genomic_align_block_id = gab.genomic_align_block_id WHERE gab.method_link_species_set_id = ? AND gab.length >= ? GROUP BY df.dnafrag_id", insert_anchor_seq => "INSERT INTO anchor_sequence (method_link_species_set_id, anchor_id, dnafrag_id, start, end, strand, sequence, length) VALUES (?,?,?,?,?,?,?,?)", ); foreach my$sql_statement(keys %sql_statements) {#prepare all the sql statements
$sql_statements{$sql_statement} = $self->{'comparaDBA'}->dbc->prepare($sql_statements{$sql_statement}); } $sql_statements{select_genome_db_ids}->execute($self->method_link_species_set_id); my(%org_hash, $dnafrag_hashref); while(my@row = $sql_statements{select_genome_db_ids}->fetchrow_array) { $org_hash{$row[0]} = $row[1]; } foreach my$genome_db_id(sort keys %org_hash) { #get slice adaptors for all the species used to generate the anchors
$org_hash{$genome_db_id} = Bio::EnsEMBL::Registry->get_adaptor("$org_hash{$genome_db_id}", "core", "Slice"); } $sql_statements{select_analysis_data}->execute($self->analysis_data_id); my $analysis_struct = eval ( ($sql_statements{select_analysis_data}->fetchrow_array)[0] ); $sql_statements{select_dnafrag_info}->execute($self->previous_mlssid, $analysis_struct->{min_anc_size}); $dnafrag_hashref = $sql_statements{select_dnafrag_info}->fetchall_hashref("dnafrag_id"); $sql_statements{select_gerp_blocks}->execute($analysis_struct->{min_anc_size}, $self->previous_mlssid); while(my@row = $sql_statements{select_gerp_blocks}->fetchrow_array) { my $genomic_align_block_id = $row[0]; $sql_statements{select_genomic_aligns}->execute($genomic_align_block_id); my $ga_ref = $sql_statements{select_genomic_aligns}->fetchall_hashref("genomic_align_id"); foreach my $ga_id(sort keys %{$ga_ref}) { my $dnafrag_id = $ga_ref->{$ga_id}->{dnafrag_id}; my $slice = $org_hash{ $dnafrag_hashref->{$dnafrag_id}->{genome_db_id} }->fetch_by_region( $dnafrag_hashref->{$dnafrag_id}->{coord_system_name}, $dnafrag_hashref->{$dnafrag_id}->{name}, $ga_ref->{$ga_id}->{dnafrag_start}, $ga_ref->{$ga_id}->{dnafrag_end}, $ga_ref->{$ga_id}->{dnafrag_strand}); next if $slice->length < $analysis_struct->{min_anc_size}; my($new_end, $new_seq); if($slice->length > $analysis_struct->{max_anc_size}) { #shorten the sequence if it's greater that the max length
$new_seq = join("", (split("", $slice->seq))[0..$analysis_struct->{max_anc_size}-1]); $new_end = $ga_ref->{$ga_id}->{dnafrag_start} + $analysis_struct->{max_anc_size} - 1; } if($new_seq) { #skip if sequence has too many Ns, insert into db otherwise
my $new_seq_Ns_len = length( join("", $new_seq=~/(N)/g)); next if ($new_seq_Ns_len && ( $new_seq_Ns_len / length($new_seq) > $self->N_cut_off_ratio ));
$sql_statements{insert_anchor_seq}->execute($self->method_link_species_set_id, $genomic_align_block_id, $dnafrag_id, $ga_ref->{$ga_id}->{dnafrag_start}, $ga_ref->{$ga_id}->{dnafrag_end}, $ga_ref->{$ga_id}->{dnafrag_strand}, $new_seq, length($new_seq)); } else { my $slice_seq_Ns_len = length( join("", $slice->seq=~/(N)/g)); next if ($slice_seq_Ns_len && ( $slice_seq_Ns_len / length($slice->seq) > $self->N_cut_off_ratio ));
$sql_statements{insert_anchor_seq}->execute($self->method_link_species_set_id, $genomic_align_block_id, $dnafrag_id, $ga_ref->{$ga_id}->{dnafrag_start}, $ga_ref->{$ga_id}->{dnafrag_end}, $ga_ref->{$ga_id}->{dnafrag_strand}, $slice->seq, length($slice->seq)); } } } return 1;
}
trim_and_store_analysis_iddescriptionprevnextTop
sub trim_and_store_analysis_id {
	my $self = shift;
	if (@_) {
		$self->{_trim_and_store_analysis_id} = shift;
	}
	return $self->{_trim_and_store_analysis_id};
}
write_outputdescriptionprevnextTop
sub write_output {
	my ($self) = @_;
	return 1;
}
General documentation
CONTACTTop
This modules is part of the EnsEMBL project ()
Questions can be posted to the ensembl-dev mailing list: ensembl-dev@ebi.ac.uk
APPENDIXTop
The rest of the documentation details each of the object methods.
Internal methods are usually preceded with a _