Bio::EnsEMBL::Funcgen::Utils HealthChecker
SummaryIncluded librariesPackage variablesSynopsisDescriptionGeneral documentationMethods
WebCvsRaw content
Package variables
No package variables defined.
Included modules
Bio::EnsEMBL::Utils::Argument qw ( rearrange )
Bio::EnsEMBL::Utils::Exception qw ( throw )
my $object = Bio::EnsEMBL::Object->new ( logging => 1, log_file => "/tmp/Misc.log", debug_level => 2, debug_file => "/tmp/Misc.dbg", ); $object->log("This is a log message."); $object->debug(1,"This is a debug message."); $object->system("rmdir /tmp/test"); ----------------------------------------------------------------------------
This program performs several health check and update methods prior to release.
No description
No description
No description
No description
No description
No description
No description
No description
No description
No description
No description
No description
Methods description
None available.
Methods code
sub analyse_and_optimise_tables {
  my $self = shift;

  #myisamchk --analyze. or analyze statement
if($self->{'skip_analyse'}){ $self->log('Skipping analyse/optimise tables'); return; } my $sql = 'show tables;'; my @tables = @{$self->db->dbc->db_handle->selectall_arrayref($sql)}; map $_ = "@{$_}", @tables; my $analyse_sql = 'analyze table '; my $optimise_sql = 'optimize table '; foreach my $table(@tables){ $self->log("Analysing and optimising table $table:"); my @anal_info = @{$self->db->dbc->db_handle->selectall_arrayref($analyse_sql.$table)}; foreach my $line_ref(@anal_info){ $self->log(join("\t", @$line_ref)); } my @opt_info = @{$self->db->dbc->db_handle->selectall_arrayref($optimise_sql.$table)}; foreach my $line_ref(@opt_info){ $self->log(join("\t", @$line_ref)); } } return; } ### Check for regulatory meta entries for all regulatory feature_sets
sub check_meta_strings {
  my ($self, $all_builds) = @_;

  $self->log_header('Checking meta strings');

  warn "Need to check/update rebuild.version and regbuild.initial_release_date";

  #update flag?
my @regf_fsets; my $passed = 1; my $fset_a = $self->db->get_FeatureSetAdaptor; my $mc = $self->db->get_MetaContainer; my $regf_a = $self->db->get_RegulatoryFeatureAdaptor; if($all_builds){ @regf_fsets = @{$fset_a->fetch_all_by_type('regulatory')}; }else{ my $fset = $fset_a->fetch_by_name('RegulatoryFeatures'); push @regf_fsets, $fset if defined $fset; } #Need to remove/implement this
my @meta_keys = ('reguild.feature_set_ids', 'regbuild.feature_type_ids'); #What about anchor/seed sets?
if(scalar(@regf_fsets) == 0){ $self->report("WARNING: Found no regulatory FeatureSets for check_meta_strings"); } else{ $self->log_header("Validating meta entries for FeatureSets:\t".join("\t", (map $_->name, @regf_fsets))); #How do we validate this?
#Check all feature_sets exist
#Pull back some features from a test slice and check the number of bits match.
#Check the feature_type string exists and matches else create.
foreach my $fset(@regf_fsets){ #get version number of build
my (undef, $build_version) = split/v/, $fset->name; $build_version = (defined $build_version) ? '_v'.$build_version : ''; my $fset_string_key = 'regbuild.feature_set_ids'.$build_version; my $ftype_string_key = 'regbuild.feature_type_ids'.$build_version; my $fset_string = $mc->list_value_by_key($fset_string_key)->[0]; my $ftype_string = $mc->list_value_by_key($ftype_string_key)->[0]; $self->log('Validating '.$fset->name.":\n\t$fset_string_key($fset_string) vs $ftype_string_key($ftype_string)"); #Test fset vs ftype string
if(! defined $fset_string && ! defined $ftype_string){ $self->report("FAIL:\tNo $fset_string_key or $ftype_string_key found in meta table"); } elsif(! defined $fset_string){ $self->report("FAIL:\tNo $fset_string_key found in meta table"); } else{ my @fset_ids = split/,/, $fset_string; my @ftype_ids; my @new_ftype_ids; my $ftype_fail = 0; if(defined $ftype_string){ @ftype_ids = split/,/, $ftype_string; } else{ $self->report("WARNING:\tNo $ftype_string_key found in meta table, will update using $fset_string_key"); } #Now need to work backwards through ftypes to remove pseudo ftypes before validating
#New string should be A,A,A;S,S,S,S,S,S;P,P,P
#Where A is and Anchor/Seed set
#S is a supporting set
#P is a pseudo feature type e.g. TSS proximal
if(scalar(@fset_ids) != scalar(@ftype_ids)){ $self->report("FAIL:\tLength mismatch between $fset_string_key and $ftype_string_key"); } foreach my $i(0..$#fset_ids){ my $supporting_set_id = $fset_ids[$i]; my $sset = $fset_a->fetch_by_dbID($supporting_set_id); if(! defined $sset){ $self->report("FAIL:\t$fset_string_key $supporting_set_id does not exist in the DB"); } else{ #test/build ftype string
if(defined $ftype_string){ if($sset->feature_type->dbID != $ftype_ids[$i]){ $ftype_fail = 1; $self->report("FAIL:\t$fset_string_key $supporting_set_id(".$sset->name.") FeatureType(".$sset->feature_type->name.") does not match $ftype_string_key $ftype_ids[$i]"); } } push @new_ftype_ids, $sset->feature_type->dbID; } } #Set ftype_string
#This will not account for pseudo ftypes? Remove!!!?
my $new_ftype_string = join(',', @new_ftype_ids); if(! defined $ftype_string){ $self->log("Updating $ftype_string_key to:\t$new_ftype_string"); $self->db->dbc->db_handle->do("INSERT into meta values(NULL, '$ftype_string_key', '$new_ftype_string')"); } elsif($ftype_fail){ $self->report("FAIL:\t$ftype_string_key($ftype_string) does not match $fset_string_key types($new_ftype_string)"); } #Finally validate versus a reg feat
#Need to change this to ftype string rather than fset string?
my ($regf_dbID) = @{$self->db->dbc->db_handle->selectrow_arrayref('select regulatory_feature_id from regulatory_feature where feature_set_id='.$fset->dbID.' limit 1')}; if(! defined $regf_dbID){ $self->report("FAIL:\tNo RegulatoryFeatures found for FeatureSet ".$fset->name); } else{ my $rf_string = $regf_a->fetch_by_dbID($regf_dbID)->{'display_label'};#Direct access to avoid feature type
if(length($rf_string) != scalar(@fset_ids)){ $self->report("FAIL:\tRegulatory string length mismatch between RegulatoryFeature($regf_dbID) and $fset_string_key:\n$rf_string(".length($rf_string).")\n$fset_string(".scalar(@fset_ids).")"); } } } } } return; } #Change this to log sets and incorporate RegFeat FeatureSet as standard
sub check_stable_ids {
  my ($self, @slices) = @_;
  my $species_id = $self->db()->species_id();

  $self->log_header('Checking stable IDs');

  my $fset_a = $self->db->get_FeatureSetAdaptor;
  my $fset = $fset_a->fetch_by_name('RegulatoryFeatures');

  if(! $fset){
	$self->report('WARNING: No RegulatoryFeatures FeatureSet found');

	#Can't count NULL field, so have to count regulatory_ffeature_id!!!
my $sql = 'select count(rf.regulatory_feature_id) from regulatory_feature rf, seq_region sr, coord_system cs where rf.stable_id is NULL and rf.seq_region_id = sr.seq_region_id and sr.coord_system_id = cs.coord_system_id and cs.species_id = $species_id and rf.feature_set_id='.$fset->dbID; #warn "sql is $sql";
my ($null_sids) = @{$self->db->dbc->db_handle->selectrow_arrayref($sql)}; if($null_sids){ $self->report("FAIL: Found a total of $null_sids NULL stable IDs"); my $slice_a = $self->db->get_SliceAdaptor; if(! @slices){ @slices = @{$slice_a->fetch_all('toplevel', 1)}; } foreach my $slice(@slices){ my $sr_name=$slice->seq_region_name; $sql = 'select count(rf.stable_id) from regulatory_feature rf, seq_region sr, coord_system cs where rf.seq_region_id=sr.seq_region_id and"'.$sr_name.'" and sr.coord_system_id = cs.coord_system_id and cs.species_id = $species_id and rf.stable_id is NULL and rf.feature_set_id='.$fset->dbID; ($null_sids) = @{$self->db->dbc->db_handle->selectrow_arrayref($sql)}; $self->log('Slice '.$slice->name." has $null_sids NULL stable IDs"); } } else{ $self->log('No NULL stable IDs found'); } } return;
sub db {
  my ($self) = @_;
  return $self->{'db'};
sub log_data_sets {
  my $self = shift;
  my ($status);
  my $txt = 'Checking ';

  $status = 'DISPLAYABLE' if($self->{'check_displayable'});

  my @dsets;
  my $dsets = $self->db->get_DataSetAdaptor->fetch_all($status);
  @dsets = @$dsets if defined $dsets;

  $txt .= scalar(@dsets).' ';
  $txt.= $status.' ' if($self->{'check_displayable'});

  $txt .= 'DataSets';

  foreach my $dset(@dsets){
	$self->log_set("Found DataSet:\t\t", $dset) ;

	my $fset = $dset->product_FeatureSet;
	$self->log_set("Product FeatureSet:\t", $fset) if $fset;
	#my @supporting_sets = @{$dset->get_supporting_sets};
if(my @supporting_sets = @{$dset->get_supporting_sets}){ map $self->log_set("SupportingSet:\t\t", $_), @supporting_sets; } } #$self->log_header('Checking Regulatory FeatureSets');
sub log_set {
  my ($self, $text, $set) = @_;
  #if(! $set->isa('Bio::EnsEMBL::Funcgen::DataSet')){
# $text .= $set->set_type.":\t";
# }
$text .= $set->display_label.'('.$set->name.')'; $text .= "\tDISPLAYABLE" if($set->is_displayable); $self->log($text); return;
sub new {
  my $caller = shift;
  my $class = ref($caller) || $caller;
  my $self = $class->SUPER::new(@_);
  #validate and set type, analysis and feature_set here
my ($db, $builds, $skip_mc, $check_displayable, $skip_analyse, $meta_coord_tables) = rearrange(['DB', 'BUILDS', 'SKIP_META_COORD', 'CHECK_DISPLAYABLE', 'SKIP_ANALYSE', 'META_COORD_TABLES'], @_); if (! ($db && ref($db) && $db->isa('Bio::EnsEMBL::Funcgen::DBSQL::DBAdaptor'))){ throw('You must provide a valid Bio::EnsEMBL::Funcgen::DBSQL::DBAdaptor'); } #test connection
$db->dbc->db_handle; $self->{'db'} = $db; $self->{'mysql_connect_string'} = 'mysql -h'.$db->dbc->host.' -u'.$db->dbc->username.' -p' .$db->dbc->password.' '.$db->dbc->dbname.' -P'.$db->dbc->port; $self->{'dbname'} = $db->dbc->dbname; $self->{'builds'} = (scalar(@$builds)>0) ? $builds : ['DEFAULT']; $self->{'skip_meta_coord'} = $skip_mc; if(defined $meta_coord_tables){ throw('-skip_meta_coord is set, Cannot build meta_coord entries for tables '.join(', ', @$meta_coord_tables)); if(! ref($meta_coord_tables) eq 'ARRAY'){ throw('-meta_coord_tables parameter must be an array ref'); } @{$self->{'meta_coord_tables'}} = @$meta_coord_tables; } $self->{'skip_analyse'} = $skip_analyse; $self->{'check_displayable'} = $check_displayable; #$self->log_header(
return $self;
sub update_db_for_release {
  my ($self, @args) = @_;
  #do seq_region_update to validate dnadb first
#hence avoiding redoing longer methods
$self->update_meta_schema_version; $self->update_meta_coord; $self->check_meta_strings; $self->analyse_and_optimise_tables; $self->log('??? Have you dumped/copied GFF dumps ???'); $self->log("??? Have you diff'd the sql for each species vs. a fresh schema ???"); $self->log('Need to implement check meta string check'); #Log footer? Pass optional counts hash?
$self->log('Finished updating '.$self->{'dbname'}." for release\n\n");
sub update_meta_coord {
  my ($self, @table_names) = @_;
  my $species_id = $self->db()->species_id();
	$self->log("Skipping meta_coord update\n");

  $self->log_header('Updating meta_coord table');

  #set default table_name
if(! @table_names || scalar(@table_names) == 0){ #Can we do this via DBAdaptor and get all available adaptors which are BaseFeatureAdaptors then grab the first table name
if(defined $self->{'meta_coord_tables'}){ @table_names = @{$self->{'meta_coord_tables'}}; } else{#default
@table_names = qw( regulatory_feature probe_feature external_feature annotated_feature result_feature ); } } #backup meta coord
#if(system("mysql -h$host -P$port -u$user -p$pass -N "
# . "-e 'SELECT * FROM meta_coord' ${species}_funcgen_${schema_build}"
# . "> ${species}_funcgen_${schema_build}.meta_coord.backup"
# ) != 0 ){
if(system($self->{'mysql_connect_string'}." -e 'SELECT * FROM meta_coord'" . '> '.$self->{'dbname'}.'meta_coord.backup' ) != 0 ){ throw("Can't dump the original meta_coord for back up");#will this get copied to log?
} else { $self->log('Original meta_coord table backed up in '. $self->{'dbname'}.'.meta_coord.backup'); } #Update each max_length for table_name and coord_system
foreach my $table_name(@table_names){ my $sql1 = "select distinct(, mc.coord_system_id, cs.version, mc.max_length from coord_system cs, meta_coord mc where mc.table_name='$table_name' and mc.coord_system_id=cs.coord_system_id and cs.species_id = $species_id"; $self->log(''); $self->log("Updating meta_coord max_length for $table_name:\n\tname\tcoord_system_id\tversion\tmax_length"); #can we test for emtpy array here? Then skip delete.
my @info = @{$self->db->dbc->db_handle->selectall_arrayref($sql1)}; #log this
map {$self->log("\t".join("\t", @{$_})."\n")} @info; # Clean old entries
$self->log("Deleting old meta_coord entries"); my $sql = "DELETE mc FROM meta_coord mc, coord_system cs WHERE mc.table_name ='$table_name' and mc.coord_system_id = cs.coord_system_id and cs.species_id = $species_id"; $self->db->dbc->db_handle->do($sql); # Generate new max_lengths
$self->log("Generating new max_lengths"); #Is this query running for each redundant cs_id?
#would it be more efficient to retrieve the NR cs_ids first and loop the query for each cs_id?
#Can we get the dbID of the largest feature for ease of checking?
#This won't work as we're grouping by coord_system
#would need to select distinct coord_system_id for table first
#This may well slow down quite a bit doing it this way
$sql = "select distinct s.coord_system_id from coord_system cs, seq_region s, $table_name t WHERE t.seq_region_id = s.seq_region_id and s.coord_system_id = cs.coord_system_id and cs.species_id = $species_id"; my @cs_ids = @{$self->db->dbc->db_handle->selectall_arrayref($sql)}; #Convert single element arrayrefs to scalars
map $_ = ${$_}[0], @cs_ids; $self->log("New max_lengths for $table_name are:"); #wtf?
#map {$self->log(join("\t", @{$_}))} ['coord_system_id', 'max_length', 'longest record dbID'];
$self->log(join("\t", ('coord_system_id', 'max_length', 'longest record dbID'))); foreach my $cs_id(@cs_ids){ #This will always give a length of 1 even if there are no features present
$sql = "SELECT s.coord_system_id, (t.seq_region_end - t.seq_region_start + 1 ) as max, t.${table_name}_id " . "FROM $table_name t, seq_region s, coord_system cs " . "WHERE t.seq_region_id = s.seq_region_id " . "and s.coord_system_id=${cs_id} " . "and s.coord_system_id = cs.coord_system_id and cs.species_id = $species_id" . "order by max desc limit 1"; @info = @{$self->db->dbc->db_handle->selectall_arrayref($sql)}; #Convert one multi element array_ref into array
@info = @{$info[0]}; $self->log(join("\t\t", @info)); $sql = "INSERT INTO meta_coord values(\"${table_name}\",\" ${cs_id}\",\" $info[1]\")"; #$sql = "INSERT INTO meta_coord "
#. "SELECT '$table_name', s.coord_system_id, "
# . "MAX( t.seq_region_end - t.seq_region_start + 1 ) "
# . "FROM $table_name t, seq_region s "
# . "WHERE t.seq_region_id = s.seq_region_id "
# . "GROUP BY s.coord_system_id";
$self->db->dbc->db_handle->do($sql); } #$self->log("New max_lengths for $table_name are:");
#@info = @{$self->db->dbc->db_handle->selectall_arrayref($sql1)};
#map {$self->log(join("\t", @{$_})."\n")} @info;
} $self->log("Finished updating meta_coord max_lengths\n"); return;
sub update_meta_schema_version {
  my ($self) = @_;
  my $schema_version = $self->get_schema_and_build($self->{'dbname'})->[0];

  my $sql = 'DELETE from meta where meta_key="schema_version"';
  $sql = "INSERT into meta(meta_key, species_id, meta_value) values ('schema_version', NULL, '$schema_version')";

  $self->log_header("Updated meta.schema_version to $schema_version");
sub validate_RegulatoryFeature_Sets {
  #checks feature data and supporting sets
#links between DatSet and FeatureSet, i.e. correct naming, not linking to old set
sub validate_new_seq_regions {
  my ($self, $force) = @_;
  #do we need to add the none default levels here?
#or are we only bothered about those which constitute the toplevel?
#To make sure we have all the correct levels in eFG we need to get all the names.
#then get all by name from the core db and set them as the dnadb.
# we also need to get all the toplevel seq_regions and store them in the seq_region table
#use BaseFeatureAdaptor::_pre_store with and array of pseudo feature on each top level slice
#Validate the efgdb and dnadb schema version are the same first
if(! $force){ my $efgdb_sm = join('_', @{$self->get_schema_and_build($self->{'dbname'})}); my $dnadb_sm = join('_', @{$self->get_schema_and_build($self->{'dbname'})}); if($efgdb_sm ne $dnadb_sm){ $self->report("WARNING Skipped validate_new_seq_regions as schema_versions are mismatched:\t". "efgdb $efgdb_sm\tdnadb $dnadb_sm"); return 0; } } my $pf_adaptor = $self->db->get_ProbeFeatureAdaptor(); my $slice_adaptor = $self->db->dnadb->get_SliceAdaptor(); $self->log_header('Validating new coord_systems/seq_regions'); foreach my $build(@{$self->{'builds'}}){ $self->log("Importing seq_region/coord_system info for build:\t".$build); foreach my $slice(@{$slice_adaptor->fetch_all('toplevel', $build, 1)}){ #1 is non-reference flag, essential for haplotype regions
if($slice->start() != 1){ $self->log("Reslicing slice:\t".$slice->name()); #we must have some sort of PAR linked region i.e. Y
$slice = $slice_adaptor->fetch_by_region($slice->coord_system_name(), $slice->seq_region_name()); } #we need test if it needs doing first?
#we would need to test for the coord_systems outside of this loop
#and then for each seq_region inside the loop if the coord_system is present
$self->log("_pre_storing seq_region info for slice:\t".$slice->name()); my $pseudo_feature = Bio::EnsEMBL::Funcgen::ProbeFeature->new ( -slice => $slice, -start => 0, -end => 0, -strand => 0, ); $pf_adaptor->_pre_store($pseudo_feature); #This will create a meta_coord entry of max_length 1 for features which have an absent meta_coord entry
} } $self->log("Finished validating seq_regions\n"); return;
General documentation
        Turns on and defines the verbosity of debugging output, 1-3, default = 0 = off
                Defines the log file, default = "${instance}.log"
                Print a brief help message and exits.
                Prints the manual page and exits.
Nathan Johnson,
  Arg[0]     : 
Example :
Description: Wrapper method to perform all common update functions
Returntype :
Exceptions : None
Caller : General
Status : at risk