XrefMapper TestMappings
Included librariesPackage variablesGeneral documentationMethods
Toolbar
WebCvsRaw content
Package variables
No package variables defined.
Included modules
Cwd
DBI
File::Basename
IPC::Open3
XrefMapper::BasicMapper
Inherit
XrefMapper::BasicMapper
Synopsis
No synopsis!
Description
No description!
Methods
direct_stable_id_check
No description
Code
entry_number_check
No description
Code
mapper
No description
Code
name_change_check
No description
Code
new
No description
Code
unlinked_entries
No description
Code
Methods description
None available.
Methods code
direct_stable_id_checkdescriptionprevnextTop
sub direct_stable_id_check {
  my ($self) = @_;


  foreach my $type (qw(gene transcript translation)){
    
    my $sql = "select s.name, count(*) from source s, xref x, ".$type."_direct_xref gdx left join ".$type."_stable_id gsi on gdx.ensembl_stable_id = gsi.stable_id where s.source_id = x.source_id and x.xref_id = gdx.general_xref_id and gsi.stable_id is null group by s.name";
    
    my $sth = $self->xref->dbc->prepare($sql);
    $sth->execute();
    my ($name, $count);
    $sth->bind_columns(\$name,\$count);
    my $total_count=0;
    while($sth->fetch()){
      print STDERR "WARNING $name has $count invalid stable ids in ".$type."_direct_xrefs\n";
      $total_count += $count;
    }	
    $sth->finish;
    if($total_count){
      print STDERR "USEFUL SQL: $sql\n";
    }
  }

}
1;
}
entry_number_checkdescriptionprevnextTop
sub entry_number_check {
  my ($self) = @_;
  

# No point doing xrefs object_xrefs are more important and gives a better indication of wether things went okay.
my %old_object_xref_count; my %new_object_xref_count; my $sth = $self->xref->dbc->prepare('select s.name, count(*) from xref x, object_xref ox, source s where ox.xref_id = x.xref_id and x.source_id = s.source_id and ox_status = "DUMP_OUT" and s.name not like "AFFY%" group by s.name'); $sth->execute(); my ($name, $count); $sth->bind_columns(\$name,\$count); while($sth->fetch()){ $new_object_xref_count{$name} = $count; } $sth->finish; $sth = $self->core->dbc->prepare('select e.db_name, count(*) from xref x, object_xref ox, external_db e where ox.xref_id = x.xref_id and x.external_db_id = e.external_db_id and e.db_name not like "AFFY%" group by e.db_name'); $sth->execute(); $sth->bind_columns(\$name,\$count); while($sth->fetch()){ my $change = 0; $old_object_xref_count{$name} = $count; if(defined($new_object_xref_count{$name})){ $change = (($new_object_xref_count{$name} - $count)/$new_object_xref_count{$name}) * 100;
if($change > 5){ # increase of 5%
print "WARNING: $name has increased by $change\% was $count now ". $new_object_xref_count{$name}. "\n" if($self->mapper->verbose); } elsif($change < -5){ # decrease by 5%
print "WARNING: $name has decreased by $change\% was $count now ". $new_object_xref_count{$name}. "\n" if($self->mapper->verbose); } } else{ print "WARNING: xrefs $name are not in the new database but are in the old???\n" if($self->mapper->verbose); } } $sth->finish; foreach my $key (keys %new_object_xref_count){ if(!defined($old_object_xref_count{$key})){ print "WARNING: $key has ".$new_object_xref_count{$key}." xrefs in the new database but NONE in the old\n" if($self->mapper->verbose); } } return;
}
mapperdescriptionprevnextTop
sub mapper {
  my ($self, $arg) = @_;

  (defined $arg) &&
    ($self->{_mapper} = $arg );
  return $self->{_mapper};
}
name_change_checkdescriptionprevnextTop
sub name_change_check {
  my ($self) = @_;

  my %new_name; # $old_name{$gene_id} = HGNC_%name
my %id_to_stable_id; my $official_name = $self->mapper->get_official_name; if(!defined($official_name)){ return; } # print "Checking names\n";
my $sql = 'select x.label, gsi.internal_id, gsi.stable_id from object_xref ox, xref x, gene_stable_id gsi, source s where x.xref_id = ox.xref_id and ox.ensembl_object_type = "Gene" and gsi.internal_id = ox.ensembl_id and x.source_id = s.source_id and s.name like "'.$official_name.'_%"'; my $sth = $self->xref->dbc->prepare($sql); $sth->execute(); my ($name, $gene_id, $stable_id); $sth->bind_columns(\$name,\$gene_id,\$ stable_id); my $count = 0; while($sth->fetch()){ $new_name{$gene_id} = $name; $id_to_stable_id{$gene_id} = $stable_id; $count++; } $sth->finish; # print $sql."\n";
# print $count." entries found in xref database\n";
# Use synonyms as well.
my %alias; $sql = 'select x.label, sy.synonym from xref x, synonym sy, source so where x.xref_id = sy.xref_id and so.source_id = x.source_id and so.name like "'.$official_name.'_%" '; $sth = $self->xref->dbc->prepare($sql); $sth->execute(); my ($syn); $sth->bind_columns(\$name,\$syn); $count = 0; while($sth->fetch()){ $alias{$syn} = $name; } $sth->finish; $sql = 'select x.label, sy.synonym from xref x, synonym sy, source so where x.xref_id = sy.xref_id and so.source_id = x.source_id and so.name like "EntrezGene"'; $sth = $self->xref->dbc->prepare($sql); $sth->execute(); $sth->bind_columns(\$name,\$syn); while($sth->fetch()){ $alias{$syn} = $name; } $sth->finish; # NOTE ncRNA has higher priority
$sql = "select x.display_label, g.gene_id from gene g, xref x where g.display_xref_id = x.xref_id and biotype = 'protein_coding'"; $sth = $self->core->dbc->prepare($sql); $sth->execute(); $sth->bind_columns(\$name,\$gene_id); $count =0; my $total_count=0; while($sth->fetch()){ if(defined($new_name{$gene_id})){ $total_count++; } if(defined($new_name{$gene_id}) and $new_name{$gene_id} ne $name){ if(!defined($alias{$name}) or $alias{$name} ne $new_name{$gene_id}){ print STDERR "WARN: gene_id ($gene_id) ".$id_to_stable_id{$gene_id}." new = ".$new_name{$gene_id}." old = $name\n"; $count++; } } } if($total_count){ print STDERR "$count entries with different names out of $total_count protein coding gene comparisons?\n"; }
}
newdescriptionprevnextTop
sub new {
  my($class, $mapper) = @_;

  my $self ={};
  bless $self,$class;
  $self->core($mapper->core);
  $self->xref($mapper->xref);
  $self->mapper($mapper);
  return $self;
}
unlinked_entriesdescriptionprevnextTop
sub unlinked_entries {
  my ($self) = @_;
  
  my $failed = 0;

  my $xref_id;
  my $count;

  my $sth_stat = $self->xref->dbc->prepare("insert into process_status (status, date) values('tests_started',now())");
  $sth_stat->execute();

  #    dependent_xref            and xref
my $count_sql = "select count(1) from dependent_xref d left join xref x on d.master_xref_id = x.xref_id where x.xref_id is null"; my $sql = "select distinct(d.master_xref_id) from dependent_xref d left join xref x on d.master_xref_id = x.xref_id where x.xref_id is null limit 10"; my $sth = $self->xref->dbc->prepare($count_sql); $sth->execute(); $sth->bind_columns(\$count); $sth->fetch(); $sth->finish; if($count){ $failed = 1; $sth = $self->xref->dbc->prepare($sql); $sth->execute(); $sth->bind_columns(\$xref_id); print STDERR "SQL QUERY: $sql\n"; while($sth->fetch){ print STDERR "Problem with master xref $xref_id\n"; } $sth->finish; } $count_sql = "select count(1) from dependent_xref d left join xref x on d.dependent_xref_id = x.xref_id where x.xref_id is null"; $sql = "select distinct(d.dependent_xref_id) from dependent_xref d left join xref x on d.dependent_xref_id = x.xref_id where x.xref_id is null limit 10"; $sth = $self->xref->dbc->prepare($count_sql); $sth->execute(); $sth->bind_columns(\$count); $sth->fetch(); $sth->finish; if($count){ $failed = 1; $sth = $self->xref->dbc->prepare($sql); $sth->execute(); $sth->bind_columns(\$xref_id); print STDERR "SQL QUERY: $sql\n"; while($sth->fetch){ print STDERR "Problem with dependent xref $xref_id\n"; } $sth->finish; } $count_sql = "select count(1) from primary_xref d left join xref x on d.xref_id = x.xref_id where x.xref_id is null"; $sql = "select distinct(d.xref_id) from primary_xref d left join xref x on d.xref_id = x.xref_id where x.xref_id is null limit 10"; $sth = $self->xref->dbc->prepare($count_sql); $sth->execute(); $sth->bind_columns(\$count); $sth->fetch(); $sth->finish; if($count){ $failed = 1; $sth = $self->xref->dbc->prepare($sql); $sth->execute(); $sth->bind_columns(\$xref_id); print STDERR "SQL QUERY: $sql\n"; while($sth->fetch){ print STDERR "Problem with primary xref $xref_id\n"; } $sth->finish; } foreach my $type (qw(transcript translation gene)){ $count_sql = "select count(1) from ".$type."_direct_xref d left join xref x on d.general_xref_id = x.xref_id where x.xref_id is null"; $sql = "select distinct(d.general_xref_id) from ".$type."_direct_xref d left join xref x on d.general_xref_id = x.xref_id where x.xref_id is null limit 10"; $sth = $self->xref->dbc->prepare($count_sql); $sth->execute(); $sth->bind_columns(\$count); $sth->fetch(); $sth->finish; if($count){ $failed = 1; $sth = $self->xref->dbc->prepare($sql); $sth->execute(); $sth->bind_columns(\$xref_id); print STDERR "SQL QUERY: $sql\n"; while($sth->fetch){ print STDERR "Problem with ".$type."_direct_xref $xref_id\n"; } $sth->finish; } } $count_sql = "select count(1) from synonym d left join xref x on d.xref_id = x.xref_id where x.xref_id is null"; $sql = "select distinct(d.xref_id) from synonym d left join xref x on d.xref_id = x.xref_id where x.xref_id is null limit 10"; $sth = $self->xref->dbc->prepare($count_sql); $sth->execute(); $sth->bind_columns(\$count); $sth->fetch(); $sth->finish; if($count){ $failed = 1; $sth = $self->xref->dbc->prepare($sql); $sth->execute(); $sth->bind_columns(\$xref_id); print STDERR "SQL QUERY: $sql\n"; while($sth->fetch){ print STDERR "Problem with synonym $xref_id\n"; } $sth->finish; } $count_sql = "select count(1) from identity_xref d left join object_xref o on d.object_xref_id = o.object_xref_id where o.object_xref_id is null"; $sql = "select distinct(d.object_xref_id) from identity_xref d left join object_xref o on d.object_xref_id = o.object_xref_id where o.object_xref_id is null limit 10"; $sth = $self->xref->dbc->prepare($count_sql); $sth->execute(); $sth->bind_columns(\$count); $sth->fetch(); $sth->finish; if($count){ $failed = 1; $sth = $self->xref->dbc->prepare($sql); $sth->execute(); $sth->bind_columns(\$xref_id); print STDERR "SQL QUERY: $sql\n"; while($sth->fetch){ print STDERR "Problem with object_xref $xref_id\n"; } $sth->finish; } $count_sql = "select count(1) from go_xref d left join object_xref o on d.object_xref_id = o.object_xref_id where o.object_xref_id is null"; $sql = "select distinct(d.object_xref_id) from go_xref d left join object_xref o on d.object_xref_id = o.object_xref_id where o.object_xref_id is null limit 10"; $sth = $self->xref->dbc->prepare($count_sql); $sth->execute(); $sth->bind_columns(\$count); $sth->fetch(); $sth->finish; if($count){ $failed = 1; $sth = $self->xref->dbc->prepare($sql); $sth->execute(); $sth->bind_columns(\$xref_id); print STDERR "SQL QUERY: $sql\n"; while($sth->fetch){ print STDERR "Problem with object_xref $xref_id\n"; } $sth->finish; } foreach my $type (qw(transcript translation gene)){ $count_sql = "select count(1) from gene_transcript_translation d left join ".$type."_stable_id x on d.".$type."_id = x.internal_id where x.internal_id is null and d.".$type."_id is not null"; $sql = "select distinct(d.".$type."_id) from gene_transcript_translation d left join ".$type."_stable_id x on d.".$type."_id = x.internal_id where x.internal_id is null and d.".$type."_id is not null limit 10"; $sth = $self->xref->dbc->prepare($count_sql); $sth->execute(); $sth->bind_columns(\$count); $sth->fetch(); $sth->finish; if($count){ $failed = 1; $sth = $self->xref->dbc->prepare($sql); $sth->execute(); $sth->bind_columns(\$xref_id); print STDERR "SQL QUERY: $sql\n"; while($sth->fetch){ print STDERR "Problem with ".$type."_id $xref_id\n"; } $sth->finish; } } $count_sql = "select count(1) from xref x, source s, object_xref o left join go_xref g on o.object_xref_id = g.object_xref_id where x.xref_id = o.xref_id and s.source_id = x.source_id and s.name like 'GO' and g.object_xref_id is null"; $sql = "select distinct(o.object_xref_id) from xref x, source s, object_xref o left join go_xref g on o.object_xref_id = g.object_xref_id where x.xref_id = o.xref_id and s.source_id = x.source_id and s.name like 'GO' and g.object_xref_id is null limit 10"; $sth = $self->xref->dbc->prepare($count_sql); $sth->execute(); $sth->bind_columns(\$count); $sth->fetch(); $sth->finish; if($count){ $failed = 1; $sth = $self->xref->dbc->prepare($sql); $sth->execute(); $sth->bind_columns(\$xref_id); print STDERR "SQL QUERY: $sql\n"; while($sth->fetch){ print STDERR "Problem with object_xref $xref_id which is linked to a GO source but has no go_xref reference\n"; } $sth->finish; } if(!$failed){ $sth_stat = $self->xref->dbc->prepare("insert into process_status (status, date) values('tests_finished',now())"); $sth_stat->execute(); } else{ $sth_stat = $self->xref->dbc->prepare("insert into process_status (status, date) values('tests_failed',now())"); $sth_stat->execute(); } $sth_stat->finish; return $failed;
}
General documentation
No general documentation available.