Raw content of XrefMapper::TestMappings
package XrefMapper::TestMappings;
use vars '@ISA';
@ISA = qw{ XrefMapper::BasicMapper };
use strict;
use warnings;
use XrefMapper::BasicMapper;
use Cwd;
use DBI;
use File::Basename;
use IPC::Open3;
##########################################
# Testing (may be moved to healthchecks)
##########################################
##### Unlinked entries ##############
# ERRORS
# dependent_xref and xref
# primary_xref and xref
# transcript_direct_xref and xref
# translation_direct_xref and xref
# gene_direct_xref and xref
# synonym and xref
# identity_xref and object_xref
# go_xref and object_xref
# gene_transcript_translation and gene_stable_id
# gene_transcript_translation and transcript_stable_id
# gene_transcript_translation and translation_stable_id
# WARNNGS
# gene_direct_xref and gene_stable_id
# transcript
# translation
# All object_xref of type go have a go_xref entry
##### Numbers between xref and core (xref and object_xref) are similar
##### if human or mouse check the number of gene name changes.
sub new {
my($class, $mapper) = @_;
my $self ={};
bless $self,$class;
$self->core($mapper->core);
$self->xref($mapper->xref);
$self->mapper($mapper);
return $self;
}
sub mapper{
my ($self, $arg) = @_;
(defined $arg) &&
($self->{_mapper} = $arg );
return $self->{_mapper};
}
sub unlinked_entries{
my ($self) = @_;
my $failed = 0;
my $xref_id;
my $count;
my $sth_stat = $self->xref->dbc->prepare("insert into process_status (status, date) values('tests_started',now())");
$sth_stat->execute();
# dependent_xref and xref
my $count_sql = "select count(1) from dependent_xref d left join xref x on d.master_xref_id = x.xref_id where x.xref_id is null";
my $sql = "select distinct(d.master_xref_id) from dependent_xref d left join xref x on d.master_xref_id = x.xref_id where x.xref_id is null limit 10";
my $sth = $self->xref->dbc->prepare($count_sql);
$sth->execute();
$sth->bind_columns(\$count);
$sth->fetch();
$sth->finish;
if($count){
$failed = 1;
$sth = $self->xref->dbc->prepare($sql);
$sth->execute();
$sth->bind_columns(\$xref_id);
print STDERR "SQL QUERY: $sql\n";
while($sth->fetch){
print STDERR "Problem with master xref $xref_id\n";
}
$sth->finish;
}
$count_sql = "select count(1) from dependent_xref d left join xref x on d.dependent_xref_id = x.xref_id where x.xref_id is null";
$sql = "select distinct(d.dependent_xref_id) from dependent_xref d left join xref x on d.dependent_xref_id = x.xref_id where x.xref_id is null limit 10";
$sth = $self->xref->dbc->prepare($count_sql);
$sth->execute();
$sth->bind_columns(\$count);
$sth->fetch();
$sth->finish;
if($count){
$failed = 1;
$sth = $self->xref->dbc->prepare($sql);
$sth->execute();
$sth->bind_columns(\$xref_id);
print STDERR "SQL QUERY: $sql\n";
while($sth->fetch){
print STDERR "Problem with dependent xref $xref_id\n";
}
$sth->finish;
}
$count_sql = "select count(1) from primary_xref d left join xref x on d.xref_id = x.xref_id where x.xref_id is null";
$sql = "select distinct(d.xref_id) from primary_xref d left join xref x on d.xref_id = x.xref_id where x.xref_id is null limit 10";
$sth = $self->xref->dbc->prepare($count_sql);
$sth->execute();
$sth->bind_columns(\$count);
$sth->fetch();
$sth->finish;
if($count){
$failed = 1;
$sth = $self->xref->dbc->prepare($sql);
$sth->execute();
$sth->bind_columns(\$xref_id);
print STDERR "SQL QUERY: $sql\n";
while($sth->fetch){
print STDERR "Problem with primary xref $xref_id\n";
}
$sth->finish;
}
foreach my $type (qw(transcript translation gene)){
$count_sql = "select count(1) from ".$type."_direct_xref d left join xref x on d.general_xref_id = x.xref_id where x.xref_id is null";
$sql = "select distinct(d.general_xref_id) from ".$type."_direct_xref d left join xref x on d.general_xref_id = x.xref_id where x.xref_id is null limit 10";
$sth = $self->xref->dbc->prepare($count_sql);
$sth->execute();
$sth->bind_columns(\$count);
$sth->fetch();
$sth->finish;
if($count){
$failed = 1;
$sth = $self->xref->dbc->prepare($sql);
$sth->execute();
$sth->bind_columns(\$xref_id);
print STDERR "SQL QUERY: $sql\n";
while($sth->fetch){
print STDERR "Problem with ".$type."_direct_xref $xref_id\n";
}
$sth->finish;
}
}
$count_sql = "select count(1) from synonym d left join xref x on d.xref_id = x.xref_id where x.xref_id is null";
$sql = "select distinct(d.xref_id) from synonym d left join xref x on d.xref_id = x.xref_id where x.xref_id is null limit 10";
$sth = $self->xref->dbc->prepare($count_sql);
$sth->execute();
$sth->bind_columns(\$count);
$sth->fetch();
$sth->finish;
if($count){
$failed = 1;
$sth = $self->xref->dbc->prepare($sql);
$sth->execute();
$sth->bind_columns(\$xref_id);
print STDERR "SQL QUERY: $sql\n";
while($sth->fetch){
print STDERR "Problem with synonym $xref_id\n";
}
$sth->finish;
}
$count_sql = "select count(1) from identity_xref d left join object_xref o on d.object_xref_id = o.object_xref_id where o.object_xref_id is null";
$sql = "select distinct(d.object_xref_id) from identity_xref d left join object_xref o on d.object_xref_id = o.object_xref_id where o.object_xref_id is null limit 10";
$sth = $self->xref->dbc->prepare($count_sql);
$sth->execute();
$sth->bind_columns(\$count);
$sth->fetch();
$sth->finish;
if($count){
$failed = 1;
$sth = $self->xref->dbc->prepare($sql);
$sth->execute();
$sth->bind_columns(\$xref_id);
print STDERR "SQL QUERY: $sql\n";
while($sth->fetch){
print STDERR "Problem with object_xref $xref_id\n";
}
$sth->finish;
}
$count_sql = "select count(1) from go_xref d left join object_xref o on d.object_xref_id = o.object_xref_id where o.object_xref_id is null";
$sql = "select distinct(d.object_xref_id) from go_xref d left join object_xref o on d.object_xref_id = o.object_xref_id where o.object_xref_id is null limit 10";
$sth = $self->xref->dbc->prepare($count_sql);
$sth->execute();
$sth->bind_columns(\$count);
$sth->fetch();
$sth->finish;
if($count){
$failed = 1;
$sth = $self->xref->dbc->prepare($sql);
$sth->execute();
$sth->bind_columns(\$xref_id);
print STDERR "SQL QUERY: $sql\n";
while($sth->fetch){
print STDERR "Problem with object_xref $xref_id\n";
}
$sth->finish;
}
foreach my $type (qw(transcript translation gene)){
$count_sql = "select count(1) from gene_transcript_translation d left join ".$type."_stable_id x on d.".$type."_id = x.internal_id where x.internal_id is null and d.".$type."_id is not null";
$sql = "select distinct(d.".$type."_id) from gene_transcript_translation d left join ".$type."_stable_id x on d.".$type."_id = x.internal_id where x.internal_id is null and d.".$type."_id is not null limit 10";
$sth = $self->xref->dbc->prepare($count_sql);
$sth->execute();
$sth->bind_columns(\$count);
$sth->fetch();
$sth->finish;
if($count){
$failed = 1;
$sth = $self->xref->dbc->prepare($sql);
$sth->execute();
$sth->bind_columns(\$xref_id);
print STDERR "SQL QUERY: $sql\n";
while($sth->fetch){
print STDERR "Problem with ".$type."_id $xref_id\n";
}
$sth->finish;
}
}
$count_sql = "select count(1) from xref x, source s, object_xref o left join go_xref g on o.object_xref_id = g.object_xref_id where x.xref_id = o.xref_id and s.source_id = x.source_id and s.name like 'GO' and g.object_xref_id is null";
$sql = "select distinct(o.object_xref_id) from xref x, source s, object_xref o left join go_xref g on o.object_xref_id = g.object_xref_id where x.xref_id = o.xref_id and s.source_id = x.source_id and s.name like 'GO' and g.object_xref_id is null limit 10";
$sth = $self->xref->dbc->prepare($count_sql);
$sth->execute();
$sth->bind_columns(\$count);
$sth->fetch();
$sth->finish;
if($count){
$failed = 1;
$sth = $self->xref->dbc->prepare($sql);
$sth->execute();
$sth->bind_columns(\$xref_id);
print STDERR "SQL QUERY: $sql\n";
while($sth->fetch){
print STDERR "Problem with object_xref $xref_id which is linked to a GO source but has no go_xref reference\n";
}
$sth->finish;
}
if(!$failed){
$sth_stat = $self->xref->dbc->prepare("insert into process_status (status, date) values('tests_finished',now())");
$sth_stat->execute();
}
else{
$sth_stat = $self->xref->dbc->prepare("insert into process_status (status, date) values('tests_failed',now())");
$sth_stat->execute();
}
$sth_stat->finish;
return $failed;
}
sub entry_number_check{
my ($self) = @_;
# No point doing xrefs object_xrefs are more important and gives a better indication of wether things went okay.
my %old_object_xref_count;
my %new_object_xref_count;
my $sth = $self->xref->dbc->prepare('select s.name, count(*) from xref x, object_xref ox, source s where ox.xref_id = x.xref_id and x.source_id = s.source_id and ox_status = "DUMP_OUT" and s.name not like "AFFY%" group by s.name');
$sth->execute();
my ($name, $count);
$sth->bind_columns(\$name,\$count);
while($sth->fetch()){
$new_object_xref_count{$name} = $count;
}
$sth->finish;
$sth = $self->core->dbc->prepare('select e.db_name, count(*) from xref x, object_xref ox, external_db e where ox.xref_id = x.xref_id and x.external_db_id = e.external_db_id and e.db_name not like "AFFY%" group by e.db_name');
$sth->execute();
$sth->bind_columns(\$name,\$count);
while($sth->fetch()){
my $change = 0;
$old_object_xref_count{$name} = $count;
if(defined($new_object_xref_count{$name})){
$change = (($new_object_xref_count{$name} - $count)/$new_object_xref_count{$name}) * 100;
if($change > 5){ # increase of 5%
print "WARNING: $name has increased by $change\% was $count now ". $new_object_xref_count{$name}. "\n" if($self->mapper->verbose);
}
elsif($change < -5){ # decrease by 5%
print "WARNING: $name has decreased by $change\% was $count now ". $new_object_xref_count{$name}. "\n" if($self->mapper->verbose);
}
}
else{
print "WARNING: xrefs $name are not in the new database but are in the old???\n" if($self->mapper->verbose);
}
}
$sth->finish;
foreach my $key (keys %new_object_xref_count){
if(!defined($old_object_xref_count{$key})){
print "WARNING: $key has ".$new_object_xref_count{$key}." xrefs in the new database but NONE in the old\n" if($self->mapper->verbose);
}
}
return;
}
sub name_change_check{
my ($self) = @_;
my %new_name; # $old_name{$gene_id} = HGNC_%name
my %id_to_stable_id;
my $official_name = $self->mapper->get_official_name;
if(!defined($official_name)){
return;
}
# print "Checking names\n";
my $sql = 'select x.label, gsi.internal_id, gsi.stable_id from object_xref ox, xref x, gene_stable_id gsi, source s where x.xref_id = ox.xref_id and ox.ensembl_object_type = "Gene" and gsi.internal_id = ox.ensembl_id and x.source_id = s.source_id and s.name like "'.$official_name.'_%"';
my $sth = $self->xref->dbc->prepare($sql);
$sth->execute();
my ($name, $gene_id, $stable_id);
$sth->bind_columns(\$name,\$gene_id, \$stable_id);
my $count = 0;
while($sth->fetch()){
$new_name{$gene_id} = $name;
$id_to_stable_id{$gene_id} = $stable_id;
$count++;
}
$sth->finish;
# print $sql."\n";
# print $count." entries found in xref database\n";
# Use synonyms as well.
my %alias;
$sql = 'select x.label, sy.synonym from xref x, synonym sy, source so where x.xref_id = sy.xref_id and so.source_id = x.source_id and so.name like "'.$official_name.'_%" ';
$sth = $self->xref->dbc->prepare($sql);
$sth->execute();
my ($syn);
$sth->bind_columns(\$name,\$syn);
$count = 0;
while($sth->fetch()){
$alias{$syn} = $name;
}
$sth->finish;
$sql = 'select x.label, sy.synonym from xref x, synonym sy, source so where x.xref_id = sy.xref_id and so.source_id = x.source_id and so.name like "EntrezGene"';
$sth = $self->xref->dbc->prepare($sql);
$sth->execute();
$sth->bind_columns(\$name,\$syn);
while($sth->fetch()){
$alias{$syn} = $name;
}
$sth->finish;
# NOTE ncRNA has higher priority
$sql = "select x.display_label, g.gene_id from gene g, xref x where g.display_xref_id = x.xref_id and biotype = 'protein_coding'";
$sth = $self->core->dbc->prepare($sql);
$sth->execute();
$sth->bind_columns(\$name,\$gene_id);
$count =0;
my $total_count=0;
while($sth->fetch()){
if(defined($new_name{$gene_id})){
$total_count++;
}
if(defined($new_name{$gene_id}) and $new_name{$gene_id} ne $name){
if(!defined($alias{$name}) or $alias{$name} ne $new_name{$gene_id}){
print STDERR "WARN: gene_id ($gene_id) ".$id_to_stable_id{$gene_id}." new = ".$new_name{$gene_id}." old = $name\n";
$count++;
}
}
}
if($total_count){
print STDERR "$count entries with different names out of $total_count protein coding gene comparisons?\n";
}
}
sub direct_stable_id_check{
my ($self) = @_;
foreach my $type (qw(gene transcript translation)){
my $sql = "select s.name, count(*) from source s, xref x, ".$type."_direct_xref gdx left join ".$type."_stable_id gsi on gdx.ensembl_stable_id = gsi.stable_id where s.source_id = x.source_id and x.xref_id = gdx.general_xref_id and gsi.stable_id is null group by s.name";
my $sth = $self->xref->dbc->prepare($sql);
$sth->execute();
my ($name, $count);
$sth->bind_columns(\$name,\$count);
my $total_count=0;
while($sth->fetch()){
print STDERR "WARNING $name has $count invalid stable ids in ".$type."_direct_xrefs\n";
$total_count += $count;
}
$sth->finish;
if($total_count){
print STDERR "USEFUL SQL: $sql\n";
}
}
}
1;