Bio::EnsEMBL::Funcgen::Utils
HealthChecker
Toolbar
Summary
Bio::EnsEMBL::Funcgen::Utils::Helper
Package variables
No package variables defined.
Included modules
Inherit
Synopsis
e.g.
my $object = Bio::EnsEMBL::Object->new
(
logging => 1,
log_file => "/tmp/Misc.log",
debug_level => 2,
debug_file => "/tmp/Misc.dbg",
);
$object->log("This is a log message.");
$object->debug(1,"This is a debug message.");
$object->system("rmdir /tmp/test");
----------------------------------------------------------------------------
Description
This program performs several health check and update methods prior to release.
Methods
analyse_and_optimise_tables | No description | Code |
check_meta_strings | No description | Code |
check_stable_ids | No description | Code |
db | No description | Code |
log_data_sets | No description | Code |
log_set | No description | Code |
new | No description | Code |
update_db_for_release | No description | Code |
update_meta_coord | No description | Code |
update_meta_schema_version | No description | Code |
validate_RegulatoryFeature_Sets | No description | Code |
validate_new_seq_regions | No description | Code |
Methods description
None available.
Methods code
analyse_and_optimise_tables | description | prev | next | Top |
sub analyse_and_optimise_tables
{ my $self = shift;
if($self->{'skip_analyse'}){
$self->log('Skipping analyse/optimise tables');
return;
}
my $sql = 'show tables;';
my @tables = @{$self->db->dbc->db_handle->selectall_arrayref($sql)};
map $_ = "@{$_}", @tables;
my $analyse_sql = 'analyze table ';
my $optimise_sql = 'optimize table ';
foreach my $table(@tables){
$self->log("Analysing and optimising table $table:");
my @anal_info = @{$self->db->dbc->db_handle->selectall_arrayref($analyse_sql.$table)};
foreach my $line_ref(@anal_info){
$self->log(join("\t", @$line_ref));
}
my @opt_info = @{$self->db->dbc->db_handle->selectall_arrayref($optimise_sql.$table)};
foreach my $line_ref(@opt_info){
$self->log(join("\t", @$line_ref));
}
}
return;
}
1; } |
sub check_meta_strings
{ my ($self, $all_builds) = @_;
$self->log_header('Checking meta strings');
warn "Need to check/update rebuild.version and regbuild.initial_release_date";
my @regf_fsets;
my $passed = 1;
my $fset_a = $self->db->get_FeatureSetAdaptor;
my $mc = $self->db->get_MetaContainer;
my $regf_a = $self->db->get_RegulatoryFeatureAdaptor;
if($all_builds){
@regf_fsets = @{$fset_a->fetch_all_by_type('regulatory')};
}else{
my $fset = $fset_a->fetch_by_name('RegulatoryFeatures');
push @regf_fsets, $fset if defined $fset;
}
my @meta_keys = ('reguild.feature_set_ids', 'regbuild.feature_type_ids');
if(scalar(@regf_fsets) == 0){
$self->report("WARNING: Found no regulatory FeatureSets for check_meta_strings");
}
else{
$self->log_header("Validating meta entries for FeatureSets:\t".join("\t", (map $_->name, @regf_fsets)));
foreach my $fset(@regf_fsets){
my (undef, $build_version) = split/v/, $fset->name;
$build_version = (defined $build_version) ? '_v'.$build_version : '';
my $fset_string_key = 'regbuild.feature_set_ids'.$build_version;
my $ftype_string_key = 'regbuild.feature_type_ids'.$build_version;
my $fset_string = $mc->list_value_by_key($fset_string_key)->[0];
my $ftype_string = $mc->list_value_by_key($ftype_string_key)->[0];
$self->log('Validating '.$fset->name.":\n\t$fset_string_key($fset_string) vs $ftype_string_key($ftype_string)");
if(! defined $fset_string && ! defined $ftype_string){
$self->report("FAIL:\tNo $fset_string_key or $ftype_string_key found in meta table");
}
elsif(! defined $fset_string){
$self->report("FAIL:\tNo $fset_string_key found in meta table");
}
else{
my @fset_ids = split/,/, $fset_string;
my @ftype_ids;
my @new_ftype_ids;
my $ftype_fail = 0;
if(defined $ftype_string){
@ftype_ids = split/,/, $ftype_string;
}
else{
$self->report("WARNING:\tNo $ftype_string_key found in meta table, will update using $fset_string_key");
}
if(scalar(@fset_ids) != scalar(@ftype_ids)){
$self->report("FAIL:\tLength mismatch between $fset_string_key and $ftype_string_key");
}
foreach my $i(0..$#fset_ids){
my $supporting_set_id = $fset_ids[$i];
my $sset = $fset_a->fetch_by_dbID($supporting_set_id);
if(! defined $sset){
$self->report("FAIL:\t$fset_string_key $supporting_set_id does not exist in the DB");
}
else{
if(defined $ftype_string){
if($sset->feature_type->dbID != $ftype_ids[$i]){
$ftype_fail = 1;
$self->report("FAIL:\t$fset_string_key $supporting_set_id(".$sset->name.") FeatureType(".$sset->feature_type->name.") does not match $ftype_string_key $ftype_ids[$i]");
}
}
push @new_ftype_ids, $sset->feature_type->dbID;
}
}
my $new_ftype_string = join(',', @new_ftype_ids);
if(! defined $ftype_string){
$self->log("Updating $ftype_string_key to:\t$new_ftype_string");
$self->db->dbc->db_handle->do("INSERT into meta values(NULL, '$ftype_string_key', '$new_ftype_string')");
}
elsif($ftype_fail){
$self->report("FAIL:\t$ftype_string_key($ftype_string) does not match $fset_string_key types($new_ftype_string)");
}
my ($regf_dbID) = @{$self->db->dbc->db_handle->selectrow_arrayref('select regulatory_feature_id from regulatory_feature where feature_set_id='.$fset->dbID.' limit 1')};
if(! defined $regf_dbID){
$self->report("FAIL:\tNo RegulatoryFeatures found for FeatureSet ".$fset->name);
}
else{
my $rf_string = $regf_a->fetch_by_dbID($regf_dbID)->{'display_label'};
if(length($rf_string) != scalar(@fset_ids)){
$self->report("FAIL:\tRegulatory string length mismatch between RegulatoryFeature($regf_dbID) and $fset_string_key:\n$rf_string(".length($rf_string).")\n$fset_string(".scalar(@fset_ids).")");
}
}
}
}
}
return;
}
} |
sub check_stable_ids
{ my ($self, @slices) = @_;
my $species_id = $self->db()->species_id();
$self->log_header('Checking stable IDs');
my $fset_a = $self->db->get_FeatureSetAdaptor;
my $fset = $fset_a->fetch_by_name('RegulatoryFeatures');
if(! $fset){
$self->report('WARNING: No RegulatoryFeatures FeatureSet found');
}
else{
my $sql = 'select count(rf.regulatory_feature_id) from regulatory_feature rf, seq_region sr, coord_system cs where rf.stable_id is NULL and rf.seq_region_id = sr.seq_region_id and sr.coord_system_id = cs.coord_system_id and cs.species_id = $species_id and rf.feature_set_id='.$fset->dbID;
my ($null_sids) = @{$self->db->dbc->db_handle->selectrow_arrayref($sql)};
if($null_sids){
$self->report("FAIL: Found a total of $null_sids NULL stable IDs");
my $slice_a = $self->db->get_SliceAdaptor;
if(! @slices){
@slices = @{$slice_a->fetch_all('toplevel', 1)};
}
foreach my $slice(@slices){
my $sr_name=$slice->seq_region_name;
$sql = 'select count(rf.stable_id) from regulatory_feature rf, seq_region sr, coord_system cs where rf.seq_region_id=sr.seq_region_id and sr.name="'.$sr_name.'" and sr.coord_system_id = cs.coord_system_id and cs.species_id = $species_id and rf.stable_id is NULL and rf.feature_set_id='.$fset->dbID;
($null_sids) = @{$self->db->dbc->db_handle->selectrow_arrayref($sql)};
$self->log('Slice '.$slice->name." has $null_sids NULL stable IDs");
}
}
else{
$self->log('No NULL stable IDs found');
}
}
return; } |
sub db
{ my ($self) = @_;
return $self->{'db'}; } |
sub log_data_sets
{ my $self = shift;
my ($status);
my $txt = 'Checking ';
$status = 'DISPLAYABLE' if($self->{'check_displayable'});
my @dsets;
my $dsets = $self->db->get_DataSetAdaptor->fetch_all($status);
@dsets = @$dsets if defined $dsets;
$txt .= scalar(@dsets).' ';
$txt.= $status.' ' if($self->{'check_displayable'});
$txt .= 'DataSets';
$self->log_header($txt);
foreach my $dset(@dsets){
$self->log_set("Found DataSet:\t\t", $dset) ;
my $fset = $dset->product_FeatureSet;
$self->log_set("Product FeatureSet:\t", $fset) if $fset;
if(my @supporting_sets = @{$dset->get_supporting_sets}){
map $self->log_set("SupportingSet:\t\t", $_), @supporting_sets;
}
}
return; } |
sub log_set
{ my ($self, $text, $set) = @_;
$text .= $set->display_label.'('.$set->name.')';
$text .= "\tDISPLAYABLE" if($set->is_displayable);
$self->log($text);
return; } |
sub new
{ my $caller = shift;
my $class = ref($caller) || $caller;
my $self = $class->SUPER::new(@_);
my ($db, $builds, $skip_mc, $check_displayable, $skip_analyse, $meta_coord_tables) =
rearrange(['DB', 'BUILDS', 'SKIP_META_COORD', 'CHECK_DISPLAYABLE', 'SKIP_ANALYSE', 'META_COORD_TABLES'], @_);
if (! ($db && ref($db) &&
$db->isa('Bio::EnsEMBL::Funcgen::DBSQL::DBAdaptor'))){
throw('You must provide a valid Bio::EnsEMBL::Funcgen::DBSQL::DBAdaptor');
}
$db->dbc->db_handle;
$self->{'db'} = $db;
$self->{'mysql_connect_string'} = 'mysql -h'.$db->dbc->host.' -u'.$db->dbc->username.' -p'
.$db->dbc->password.' '.$db->dbc->dbname.' -P'.$db->dbc->port;
$self->{'dbname'} = $db->dbc->dbname;
$self->{'builds'} = (scalar(@$builds)>0) ? $builds : ['DEFAULT'];
$self->{'skip_meta_coord'} = $skip_mc;
if(defined $meta_coord_tables){
throw('-skip_meta_coord is set, Cannot build meta_coord entries for tables '.join(', ', @$meta_coord_tables));
if(! ref($meta_coord_tables) eq 'ARRAY'){
throw('-meta_coord_tables parameter must be an array ref');
}
@{$self->{'meta_coord_tables'}} = @$meta_coord_tables;
}
$self->{'skip_analyse'} = $skip_analyse;
$self->{'check_displayable'} = $check_displayable;
return $self; } |
sub update_db_for_release
{ my ($self, @args) = @_;
if(@args){
}
$self->validate_new_seq_regions; $self->update_meta_schema_version;
$self->update_meta_coord;
$self->check_meta_strings;
$self->analyse_and_optimise_tables;
$self->log('??? Have you dumped/copied GFF dumps ???');
$self->log("??? Have you diff'd the sql for each species vs. a fresh schema ???");
$self->log('Need to implement check meta string check');
$self->log('Finished updating '.$self->{'dbname'}." for release\n\n"); } |
sub update_meta_coord
{ my ($self, @table_names) = @_;
my $species_id = $self->db()->species_id();
if($self->{'skip_meta_coord'}){
$self->log("Skipping meta_coord update\n");
return;
}
$self->log_header('Updating meta_coord table');
if(! @table_names || scalar(@table_names) == 0){
if(defined $self->{'meta_coord_tables'}){
@table_names = @{$self->{'meta_coord_tables'}};
}
else{
@table_names = qw(
regulatory_feature
probe_feature
external_feature
annotated_feature
result_feature
);
}
}
if(system($self->{'mysql_connect_string'}." -e 'SELECT * FROM meta_coord'"
. '> '.$self->{'dbname'}.'meta_coord.backup'
) != 0 ){
throw("Can't dump the original meta_coord for back up"); }
else {
$self->log('Original meta_coord table backed up in '. $self->{'dbname'}.'.meta_coord.backup');
}
foreach my $table_name(@table_names){
my $sql1 = "select distinct(cs.name), mc.coord_system_id, cs.version, mc.max_length from coord_system cs, meta_coord mc where mc.table_name='$table_name' and mc.coord_system_id=cs.coord_system_id and cs.species_id = $species_id";
$self->log('');
$self->log("Updating meta_coord max_length for $table_name:\n\tname\tcoord_system_id\tversion\tmax_length");
my @info = @{$self->db->dbc->db_handle->selectall_arrayref($sql1)};
map {$self->log("\t".join("\t", @{$_})."\n")} @info;
$self->log("Deleting old meta_coord entries");
my $sql = "DELETE mc FROM meta_coord mc, coord_system cs WHERE mc.table_name ='$table_name' and mc.coord_system_id = cs.coord_system_id and cs.species_id = $species_id";
$self->db->dbc->db_handle->do($sql);
$self->log("Generating new max_lengths");
$sql = "select distinct s.coord_system_id from coord_system cs, seq_region s, $table_name t WHERE t.seq_region_id = s.seq_region_id and s.coord_system_id = cs.coord_system_id and cs.species_id = $species_id";
my @cs_ids = @{$self->db->dbc->db_handle->selectall_arrayref($sql)};
map $_ = ${$_}[0], @cs_ids;
$self->log("New max_lengths for $table_name are:");
$self->log(join("\t", ('coord_system_id', 'max_length', 'longest record dbID')));
foreach my $cs_id(@cs_ids){
$sql = "SELECT s.coord_system_id, (t.seq_region_end - t.seq_region_start + 1 ) as max, t.${table_name}_id "
. "FROM $table_name t, seq_region s, coord_system cs "
. "WHERE t.seq_region_id = s.seq_region_id "
. "and s.coord_system_id=${cs_id} "
. "and s.coord_system_id = cs.coord_system_id and cs.species_id = $species_id"
. "order by max desc limit 1";
@info = @{$self->db->dbc->db_handle->selectall_arrayref($sql)};
@info = @{$info[0]};
$self->log(join("\t\t", @info));
$sql = "INSERT INTO meta_coord values(\"${table_name}\",\" ${cs_id}\",\" $info[1]\")";
$self->db->dbc->db_handle->do($sql);
}
}
$self->log("Finished updating meta_coord max_lengths\n");
return; } |
sub update_meta_schema_version
{ my ($self) = @_;
my $schema_version = $self->get_schema_and_build($self->{'dbname'})->[0];
my $sql = 'DELETE from meta where meta_key="schema_version"';
$self->db->dbc->db_handle->do($sql);
$sql = "INSERT into meta(meta_key, species_id, meta_value) values ('schema_version', NULL, '$schema_version')";
$self->db->dbc->db_handle->do($sql);
$self->log_header("Updated meta.schema_version to $schema_version"); } |
sub validate_RegulatoryFeature_Sets
{
} |
validate_new_seq_regions | description | prev | next | Top |
sub validate_new_seq_regions
{ my ($self, $force) = @_;
if(! $force){
my $efgdb_sm = join('_', @{$self->get_schema_and_build($self->{'dbname'})});
my $dnadb_sm = join('_', @{$self->get_schema_and_build($self->{'dbname'})});
if($efgdb_sm ne $dnadb_sm){
$self->report("WARNING Skipped validate_new_seq_regions as schema_versions are mismatched:\t".
"efgdb $efgdb_sm\tdnadb $dnadb_sm");
return 0;
}
}
my $pf_adaptor = $self->db->get_ProbeFeatureAdaptor();
my $slice_adaptor = $self->db->dnadb->get_SliceAdaptor();
$self->log_header('Validating new coord_systems/seq_regions');
foreach my $build(@{$self->{'builds'}}){
$self->log("Importing seq_region/coord_system info for build:\t".$build);
foreach my $slice(@{$slice_adaptor->fetch_all('toplevel', $build, 1)}){
if($slice->start() != 1){
$self->log("Reslicing slice:\t".$slice->name());
$slice = $slice_adaptor->fetch_by_region($slice->coord_system_name(), $slice->seq_region_name());
}
$self->log("_pre_storing seq_region info for slice:\t".$slice->name());
my $pseudo_feature = Bio::EnsEMBL::Funcgen::ProbeFeature->new
(
-slice => $slice,
-start => 0,
-end => 0,
-strand => 0,
);
$pf_adaptor->_pre_store($pseudo_feature);
}
}
$self->log("Finished validating seq_regions\n");
return; } |
General documentation
-debug
Turns on and defines the verbosity of debugging output, 1-3, default = 0 = off
-log_file|l
Defines the log file, default = "${instance}.log"
-help
Print a brief help message and exits.
-man
Prints the manual page and exits.
Arg[0] :
Example :
Description: Wrapper method to perform all common update functions
Returntype :
Exceptions : None
Caller : General
Status : at risk