Cache documentation.

  Arg[1]      : String $name - a cache name (e.g. 'genes_by_id')
  Arg[2]      : String type - a cache type (e.g. "source.$slice_name")
  Arg[3]      : String $key - key of this entry (e.g. a gene dbID)
  Arg[4]      : Bio::EnsEMBL::IdMappping::TinyFeature $val - value to cache
  Example     : $cache->add('genes_by_id',
                  'source.chromosome:NCBI36:X:1:1000000:1', '1234', $tiny_gene);
  Description : Adds a TinyFeature object to a named cache.
  Return type : Bio::EnsEMBL::IdMapping::TinyFeature
  Exceptions  : thrown on wrong or missing arguments
  Caller      : internal
  Status      : At Risk
              : under development

add_list

  Arg[1]      : String $name - a cache name (e.g. 'genes_by_id')
  Arg[2]      : String type - a cache type (e.g. "source.$slice_name")
  Arg[3]      : String $key - key of this entry (e.g. a gene dbID)
  Arg[4]      : List of Bio::EnsEMBL::IdMappping::TinyFeature @val - values
                to cache
  Example     : $cache->add_list('transcripts_by_exon_id',
                  'source.chromosome:NCBI36:X:1:1000000:1', '1234',
                  $tiny_transcript1, $tiny_transcript2);
  Description : Adds a list of TinyFeature objects to a named cache.
  Return type : Listref of Bio::EnsEMBL::IdMapping::TinyFeature objects
  Exceptions  : thrown on wrong or missing arguments
  Caller      : internal
  Status      : At Risk
              : under development

build_cache_all

  Arg[1]      : String $dbtype - db type (source|target)
  Example     : my ($num_genes, $filesize) = $cache->build_cache_all('source');
  Description : Builds a cache of genes, transcripts, translations and exons
                needed by the IdMapping application and serialises the
                resulting cache object to a file. All genes across the genome
                are processed in one go. This method should be used when
                build_cache_by_seq_region can't be used due to a large number
                of toplevel seq_regions (e.g. 2x genomes).
  Return type : list of the number of genes processed and the size of the
                serialised cache file
  Exceptions  : thrown on invalid slice name
  Caller      : general
  Status      : At Risk
              : under development

build_cache_by_slice

  Arg[1]      : String $dbtype - db type (source|target)
  Arg[2]      : String $slice_name - the name of a slice (format as returned by
                Bio::EnsEMBL::Slice->name)
  Example     : my ($num_genes, $filesize) = $cache->build_cache_by_slice(
                  'source', 'chromosome:NCBI36:X:1:1000000:-1');
  Description : Builds a cache of genes, transcripts, translations and exons
                needed by the IdMapping application and serialises the resulting
                cache object to a file, one slice at a time.
  Return type : list of the number of genes processed and the size of the
                serialised cache file
  Exceptions  : thrown on invalid slice name
  Caller      : general
  Status      : At Risk
              : under development

build_cache_from_genes

  Arg[1]      : String $type - cache type
  Arg[2]      : Listref of Bio::EnsEMBL::Genes $genes - genes to build cache
                from
  Arg[3]      : Boolean $need_project - indicate if we need to project exons to
                common coordinate system
  Example     : $cache->build_cache_from_genes(
                  'source.chromosome:NCBI36:X:1:100000:1', \@genes);
  Description : Builds the cache by fetching transcripts, translations and exons
                for a list of genes from the database, and creating lightweight
                Bio::EnsEMBL::IdMapping::TinyFeature objects containing only the
                data needed by the IdMapping application. These objects are
                attached to a name cache in this cache object. Exons only need
                to be projected to a commond coordinate system if their native
                coordinate system isn't common to source and target assembly
                itself.
  Return type : int - number of genes after filtering
  Exceptions  : thrown on wrong or missing arguments
  Caller      : internal
  Status      : At Risk
              : under development

conf

  Arg[1]      : 
  Example     : 
  Description : 
  Return type : 
  Exceptions  : 
  Caller      : 
  Status      :

filter_biotypes

  Arg[1]      : Listref of Bio::EnsEMBL::Genes $genes - the genes to filter
  Example     : my @filtered = @{ $cache->filter_biotypes(\@genes) };
  Description : Filters a list of genes by biotype. Biotypes are taken from the
                IdMapping configuration parameter 'biotypes'.
  Return type : Listref of Bio::EnsEMBL::Genes (or empty list)
  Exceptions  : none
  Caller      : internal
  Status      : At Risk
              : under development

logger

  Arg[1]      : 
  Example     : 
  Description : 
  Return type : 
  Exceptions  : 
  Caller      : 
  Status      :

new

  Arg [LOGGER]: Bio::EnsEMBL::Utils::Logger $logger - a logger object
  Arg [CONF]  : Bio::EnsEMBL::Utils::ConfParser $conf - a configuration object
  Example     : my $cache = Bio::EnsEMBL::IdMapping::Cache->new(
                  -LOGGER => $logger,
                  -CONF   => $conf,
                );
  Description : constructor
  Return type : Bio::EnsEMBL::IdMapping::Cache object
  Exceptions  : thrown on wrong or missing arguments
  Caller      : general
  Status      : At Risk
              : under development

Methods code

add

sub add {

  my $self = shift;
  my $name = shift;
  my $type = shift;
  my $key = shift;
  my $val = shift;

  throw("You must provide a cache name (e.g. genes_by_id.") unless $name;
  throw("You must provide a cache type.") unless $type;
  throw("You must provide a cache key (e.g. a gene dbID).") unless $key;

  $self->{'cache'}->{$type}->{$name}->{$key} = $val;

  return $self->{'cache'}->{$type}->{$name}->{$key};

}

add_common_cs

description

sub add_common_cs {

  my $self = shift;
  my $cs = shift;

  unless ($cs and $cs->isa('Bio::EnsEMBL::CoordSystem')) {
    throw('You must provide a CoordSystem');
  }

  my $csid = join(':', $cs->name, $cs->version);

  $self->{'instance'}->{'ccs'}->{$csid} = 1;

}

add_list

sub add_list {

  my $self = shift;
  my $name = shift;
  my $type = shift;
  my $key = shift;
  my @vals = @_;

  throw("You must provide a cache name (e.g. genes_by_id.") unless $name;
  throw("You must provide a cache type.") unless $type;
  throw("You must provide a cache key (e.g. a gene dbID).") unless $key;

  push @{ $self->{'cache'}->{$type}->{$name}->{$key} }, @vals;

  return $self->{'cache'}->{$type}->{$name}->{$key};

}

build_cache_all

sub build_cache_all {

  my $self = shift;
  my $dbtype = shift;
  
  # set cache method (required for loading cache later)
  $self->cache_method('ALL');

  my $dba = $self->get_DBAdaptor($dbtype);
  my $ga = $dba->get_GeneAdaptor;
  
  my $genes = $ga->fetch_all;

  # find common coord_system
  my $common_cs_found = $self->find_common_coord_systems;

  # Build cache. Setting $need_project to 'CHECK' will cause
  # build_cache_from_genes() to check the coordinate system for each gene.
  my $type = "$dbtype.ALL";
  my $num_genes = $self->build_cache_from_genes($type, $genes, 'CHECK');
  undef $genes;

  # write cache to file, then flush cache to reclaim memory
  my $size = $self->write_all_to_file($type);

  return $num_genes, $size;

}

build_cache_by_slice

sub build_cache_by_slice {

  my $self = shift;
  my $dbtype = shift;
  my $slice_name = shift;
  
  # set cache method (required for loading cache later)
  $self->cache_method('BY_SEQ_REGION');

  my $dba = $self->get_DBAdaptor($dbtype);
  my $sa = $dba->get_SliceAdaptor;
  
  my $slice = $sa->fetch_by_name($slice_name);
  unless ($slice) {
    throw("Could not retrieve slice $slice_name.");
  }
  
  my $genes = $slice->get_all_Genes(undef, undef, 1);

  # find common coord_system
  my $common_cs_found = $self->find_common_coord_systems;

  # find out whether native coord_system is a common coord_system.
  # if so, you don't need to project.
  # also don't project if no common coord_system present
  my $need_project = 1;
  my $csid = join(':', $slice->coord_system_name,
                       $slice->coord_system->version);
  if ($self->is_common_cs($csid) or !$self->highest_common_cs) {
    $need_project = 0;
  }
  
  # build cache
  my $type = "$dbtype.$slice_name";
  my $num_genes = $self->build_cache_from_genes($type, $genes, $need_project);
  undef $genes;

  # write cache to file, then flush cache to reclaim memory
  my $size = $self->write_all_to_file($type);

  return $num_genes, $size;

}

build_cache_from_genes

sub build_cache_from_genes {

  my $self = shift;
  my $type = shift;
  my $genes = shift;
  my $need_project = shift;
  
  throw("You must provide a type.") unless $type;
  throw("You must provide a listref of genes.") unless (ref($genes) eq 'ARRAY');

  # biotype filter
  if ( $self->conf->param('biotypes') ) {
    $genes = $self->filter_biotypes($genes);
  }
  my $num_genes = scalar(@$genes);

  # initialise cache for the given type.
  $self->{'cache'}->{$type} = {};

  #my $i = 0;
  #my $num_genes = scalar(@$genes);
  #my $progress_id = $self->logger->init_progress($num_genes);

  # loop over genes sorted by gene location.
  # the sort will hopefully improve assembly mapper cache performance and
  # therefore speed up exon sequence retrieval
  foreach my $gene (sort { $a->start <=> $b->start } @$genes) {
    #$self->logger->log_progressbar($progress_id, ++$i, 2);
    #$self->logger->log_progress($num_genes, ++$i, 20, 3, 1);

    if ($need_project eq 'CHECK') {
      # find out whether native coord_system is a common coord_system.
      # if so, you don't need to project.
      # also don't project if no common coord_system present
      if ($self->highest_common_cs) {
        my $csid = join(':', $gene->slice->coord_system_name,
                             $gene->slice->coord_system->version);
        if ($self->is_common_cs($csid)) {
          $need_project = 0;
        }
      } else {
        $need_project = 0;
      }
    }

    # create lightweigt gene
    my $lgene = Bio::EnsEMBL::IdMapping::TinyGene->new_fast([
        $gene->dbID,
        $gene->stable_id,
        $gene->version,
        $gene->created_date,
        $gene->modified_date,
        $gene->start,
        $gene->end,
        $gene->strand,
        $gene->slice->seq_region_name,
        $gene->biotype,
        $gene->status,
        $gene->analysis->logic_name,
        ($gene->is_known ? 1 : 0),
    ]);

    # build gene caches
    $self->add('genes_by_id', $type, $gene->dbID, $lgene);
    
    # transcripts
    foreach my $tr (@{ $gene->get_all_Transcripts }) {
      my $ltr = Bio::EnsEMBL::IdMapping::TinyTranscript->new_fast([
          $tr->dbID,
          $tr->stable_id,
          $tr->version,
          $tr->created_date,
          $tr->modified_date,
          $tr->start,
          $tr->end,
          $tr->strand,
          $tr->length,
          md5_hex($tr->spliced_seq),
          ($tr->is_known ? 1 : 0),
      ]);

      $lgene->add_Transcript($ltr);

      # build transcript caches
      $self->add('transcripts_by_id', $type, $tr->dbID, $ltr);
      $self->add('genes_by_transcript_id', $type, $tr->dbID, $lgene);

      # translation (if there is one)
      if (my $tl = $tr->translation) {
        my $ltl = Bio::EnsEMBL::IdMapping::TinyTranslation->new_fast([
            $tl->dbID,
            $tl->stable_id,
            $tl->version,
            $tl->created_date,
            $tl->modified_date,
            $tr->dbID,
            $tr->translate->seq,
            ($tr->is_known ? 1 : 0),
        ]);

        $ltr->add_Translation($ltl);

        $self->add('translations_by_id', $type, $tl->dbID, $ltl);

        undef $tl;
      }

      # exons
      foreach my $exon (@{ $tr->get_all_Exons }) {
        my $lexon = Bio::EnsEMBL::IdMapping::TinyExon->new_fast([
            $exon->dbID,
            $exon->stable_id,
            $exon->version,
            $exon->created_date,
            $exon->modified_date,
            $exon->start,
            $exon->end,
            $exon->strand,
            $exon->slice->seq_region_name,
            $exon->slice->coord_system_name,
            $exon->slice->coord_system->version,
            $exon->slice->subseq($exon->start, $exon->end, $exon->strand),
            $exon->phase,
            $need_project,
        ]);

        # get coordinates in common coordinate system if needed
        if ($need_project) {
          my @seg = @{ $exon->project($self->highest_common_cs,
                                      $self->highest_common_cs_version) };

          if (scalar(@seg) == 1) {
            my $sl = $seg[0]->to_Slice;
            $lexon->common_start($sl->start);
            $lexon->common_end($sl->end);
            $lexon->common_strand($sl->strand);
            $lexon->common_sr_name($sl->seq_region_name);
          }
        }
        
        $ltr->add_Exon($lexon);

        $self->add('exons_by_id', $type, $exon->dbID, $lexon);
        $self->add_list('transcripts_by_exon_id', $type, $exon->dbID, $ltr);

        undef $exon;
      }

      undef $tr;
    }

    undef $gene;
  }

  return $num_genes;

}

cache_file

description

sub cache_file {

  my $self = shift;
  my $type = shift;

  throw("You must provide a cache type.") unless $type;

  return $self->dump_path."/$type.object_cache.ser";

}

cache_file_exists

description

sub cache_file_exists {

  my $self = shift;
  my $type = shift;

  throw("You must provide a cache type.") unless $type;

  my $cache_file = $self->cache_file($type);

  if (-e $cache_file) {
    $self->logger->info("Cache file found for $type.\n", 2);
    $self->logger->debug("Will read from $cache_file.\n", 2);
    return 1;
  } else {
    $self->logger->info("No cache file found for $type.\n", 2);
    $self->logger->info("Will build cache from db.\n", 2);
    return 0;
  }

}

cache_method

description

sub cache_method {

  my $self = shift;
  $self->{'instance'}->{'cache_method'} = shift if (@_);
  return $self->{'instance'}->{'cache_method'};

}

check_db_connection

description

sub check_db_connection {

  my $self = shift;
  my $dbtype = shift;
  
  my $err = 0;
  
  eval {
    my $dba = $self->get_DBAdaptor($dbtype);
    $dba->dbc->connect;
  };
  
  if ($@) {
    $self->logger->warning("Can't connect to $dbtype db: $@\n");
    $err++;
  } else {
    $self->logger->debug("Connection to $dbtype db ok.\n");
    $self->{'_db_conn_ok'}->{$dbtype} = 1;
  }

  return $err;

}

check_db_read_permissions

description

sub check_db_read_permissions {

  my $self = shift;
  my $dbtype = shift;

  # skip this check if db connection failed (this prevents re-throwing
  # exceptions).
  return 1 unless ($self->{'_db_conn_ok'}->{$dbtype});
  
  my $err = 0;
  my %privs = %{ $self->get_db_privs($dbtype) };
  
  unless ($privs{'SELECT'} or $privs{'ALL PRIVILEGES'}) {
    $self->logger->warning("User doesn't have read permission on $dbtype db.\n");
    $err++;
  } else {
    $self->logger->debug("Read permission on $dbtype db ok.\n");
  }

  return $err;

}

check_db_write_permissions

description

sub check_db_write_permissions {

  my $self = shift;
  my $dbtype = shift;
  
  # skip this check if db connection failed (this prevents re-throwing
  # exceptions).
  return 1 unless ($self->{'_db_conn_ok'}->{$dbtype});
  
  my $err = 0;

  unless ($self->do_upload) {
    $self->logger->debug("No uploads, so write permission on $dbtype db not required.\n");
    return $err;
  }

  my %privs = %{ $self->get_db_privs($dbtype) };

  unless ($privs{'INSERT'} or $privs{'ALL PRIVILEGES'}) {
    $self->logger->warning("User doesn't have write permission on $dbtype db.\n");
    $err++;
  } else {
    $self->logger->debug("Write permission on $dbtype db ok.\n");
  }

  return $err;

}

check_empty_tables

description

sub check_empty_tables {

  my $self = shift;
  my $dbtype = shift;
  
  # skip this check if db connection failed (this prevents re-throwing
  # exceptions).
  return 1 unless ($self->{'_db_conn_ok'}->{$dbtype});
  
  my $err = 0;
  my $c = 0;

  if ($self->conf->param('no_check_empty_tables') or !$self->do_upload) {
    $self->logger->debug("Won't check for empty stable ID and archive tables in $dbtype db.\n");
    return $err;
  }

  eval {
    my @tables = qw(
      gene_stable_id
      transcript_stable_id
      translation_stable_id
      exon_stable_id
      stable_id_event
      mapping_session
      gene_archive
      peptide_archive
    );

    my $dba = $self->get_DBAdaptor($dbtype);
    foreach my $table (@tables) {
      if ($c = $self->fetch_value_from_db($dba, "SELECT COUNT(*) FROM $table")) {
        $self->logger->warning("$table table in $dbtype db has $c entries.\n");
        $err++;
      }
    }
  };

  if ($@) {
    $self->logger->warning("Error retrieving stable ID and archive table row counts from $dbtype db: $@\n");
    $err++;
  } elsif (!$err) {
    $self->logger->debug("All stable ID and archive tables in $dbtype db are empty.\n");
  }
  return $err;

}

check_meta_entries

description

sub check_meta_entries {

  my $self = shift;
  my $dbtype = shift;
  
  # skip this check if db connection failed (this prevents re-throwing
  # exceptions).
  return 1 unless ($self->{'_db_conn_ok'}->{$dbtype});
  
  my $err = 0;
  my $assembly_default;
  my $schema_version;
  
  eval {
    my $dba = $self->get_DBAdaptor($dbtype);
    $assembly_default = $self->fetch_value_from_db($dba,
      qq(SELECT meta_value FROM meta WHERE meta_key = 'assembly.default'));
    $schema_version = $self->fetch_value_from_db($dba,
      qq(SELECT meta_value FROM meta WHERE meta_key = 'schema_version'));
  };
  
  if ($@) {
    $self->logger->warning("Error retrieving dna table row count from $dbtype db: $@\n");
    return ++$err;
  }
  
  unless ($assembly_default) {
    $self->logger->warning("No meta.assembly_default value found in $dbtype db.\n");
    $err++;
  } else {
    $self->logger->debug("meta.assembly_default value found ($assembly_default).\n");
  }

  unless ($schema_version) {
    $self->logger->warning("No meta.schema.version value found in $dbtype db.\n");
    $err++;
  } else {
    $self->logger->debug("meta.schema.version value found ($schema_version).\n");
  }

  return $err;

}

check_sequence

description

sub check_sequence {

  my $self = shift;
  my $dbtype = shift;
  
  # skip this check if db connection failed (this prevents re-throwing
  # exceptions).
  return 1 unless ($self->{'_db_conn_ok'}->{$dbtype});
  
  my $err = 0;
  my $c = 0;
  
  eval {
    my $dba = $self->get_DBAdaptor($dbtype);
    unless ($c = $self->fetch_value_from_db($dba, "SELECT COUNT(*) FROM dna")) {
      $err++;
    }
  };
  
  if ($@) {
    $self->logger->warning("Error retrieving dna table row count from $dbtype db: $@\n");
    $err++;
  } elsif ($err) {
    $self->logger->warning("No sequence found in $dbtype db.\n");
  } else {
    $self->logger->debug(ucfirst($dbtype)." db has sequence ($c entries).\n");
  }

  return $err;

}

conf

sub conf {

  my $self = shift;
  $self->{'conf'} = shift if (@_);
  return $self->{'conf'};

}

do_upload

description

sub do_upload {

  my $self = shift;

  if ($self->conf->param('dry_run') or
    ! ($self->conf->param('upload_events') or
       $self->conf->param('upload_stable_ids') or
       $self->conf->param('upload_archive'))) {
    return 0;
  } else {
    return 1;
  }

}

dump_path

description

sub dump_path {

  my $self = shift;

  $self->{'dump_path'} ||= path_append($self->conf->param('basedir'), 'cache');

  return $self->{'dump_path'};

}

fetch_value_from_db

description

sub fetch_value_from_db {

  my $self = shift;
  my $dba = shift;
  my $sql = shift;

  unless ($dba and ref($dba) and $dba->isa('Bio::EnsEMBL::DBSQL::DBAdaptor')) {
    throw("Need a Bio::EnsEMBL::DBSQL::DBAdaptor.");
  }
  unless ($sql) {
    throw("Need an SQL statement to execute.\n");
  }

  my $sth = $dba->dbc->prepare($sql);
  $sth->execute;
  my ($c) = $sth->fetchrow_array;
  return $c;

}

filter_biotypes

sub filter_biotypes {

  my $self = shift;
  my $genes = shift;

  my $filtered = [];

  foreach my $biotype ($self->conf->param('biotypes')) {
    push @$filtered, grep { $_->biotype eq $biotype } @$genes;
  }

  return $filtered;

}

find_common_coord_systems

description

sub find_common_coord_systems {

  my $self = shift;

  # get adaptors for source db
  my $s_dba = $self->get_DBAdaptor('source');
  my $s_csa = $s_dba->get_CoordSystemAdaptor;
  my $s_sa = $s_dba->get_SliceAdaptor;

  # get adaptors for target db
  my $t_dba = $self->get_DBAdaptor('target');
  my $t_csa = $t_dba->get_CoordSystemAdaptor;
  my $t_sa = $t_dba->get_SliceAdaptor;

  # find common coord_systems
  my @s_coord_systems = @{ $s_csa->fetch_all };
  my @t_coord_systems = @{ $t_csa->fetch_all };
  my $found_highest = 0;

  SOURCE:
  foreach my $s_cs (@s_coord_systems) {
    
    TARGET:
    foreach my $t_cs (@t_coord_systems) {
      if ($s_cs->name eq $t_cs->name) {

        # test for identical coord_system version
        if ($s_cs->version and ($s_cs->version ne $t_cs->version)) {
          next TARGET;
        }

        # test for at least 50% identical seq_regions
        if ($self->seq_regions_compatible($s_cs, $s_sa, $t_sa)) {
          $self->add_common_cs($s_cs);
          
          unless ($found_highest) {
            $self->highest_common_cs($s_cs->name);
            $self->highest_common_cs_version($s_cs->version);
          }

          $found_highest = 1;

          next SOURCE;
        }
      }
    }
  }
  
  return $found_highest;

}

get_DBAdaptor

description

sub get_DBAdaptor {

  my $self = shift;
  my $prefix = shift;

  unless ($self->{'_dba'}->{$prefix}) {
    # connect to database
    my $dba = new Bio::EnsEMBL::DBSQL::DBAdaptor(
            -host   => $self->conf->param("${prefix}host"),
            -port   => $self->conf->param("${prefix}port"),
            -user   => $self->conf->param("${prefix}user"),
            -pass   => $self->conf->param("${prefix}pass"),
            -dbname => $self->conf->param("${prefix}dbname"),
            -group  => $prefix,
    );
    
    # explicitely set the dnadb to itself - by default the Registry assumes
    # a group 'core' for this now
    $dba->dnadb($dba);

    $self->{'_dba'}->{$prefix} = $dba;
  }

  return $self->{'_dba'}->{$prefix};

}

get_by_key

description

sub get_by_key {

  my $self = shift;
  my $name = shift;
  my $type = shift;
  my $key = shift;

  throw("You must provide a cache name (e.g. genes_by_id.") unless $name;
  throw("You must provide a cache type.") unless $type;
  throw("You must provide a cache key (e.g. a gene dbID).") unless $key;

  # transparently load cache from file unless already loaded
  unless ($self->{'instance'}->{'loaded'}->{"$type"}) {
    $self->read_and_merge($type);
  }

  return $self->{'cache'}->{$type}->{$name}->{$key};

}

get_by_name

description

sub get_by_name {

  my $self = shift;
  my $name = shift;
  my $type = shift;

  throw("You must provide a cache name (e.g. genes_by_id.") unless $name;
  throw("You must provide a cache type.") unless $type;
  
  # transparently load cache from file unless already loaded
  unless ($self->{'instance'}->{'loaded'}->{$type}) {
    $self->read_and_merge($type);
  }

  return $self->{'cache'}->{$type}->{$name} || {};

}

get_count_by_name

description

sub get_count_by_name {

  my $self = shift;
  my $name = shift;
  my $type = shift;

  throw("You must provide a cache name (e.g. genes_by_id.") unless $name;
  throw("You must provide a cache type.") unless $type;
  
  # transparently load cache from file unless already loaded
  unless ($self->{'instance'}->{'loaded'}->{$type}) {
    $self->read_and_merge($type);
  }

  return scalar(keys %{ $self->get_by_name($name, $type) });

}

get_db_privs

description

sub get_db_privs {

  my $self = shift;
  my $dbtype = shift;

  my %privs = ();
  my $r;

  # get privileges from mysql db
  eval {
    my $dbc = $self->get_DBAdaptor($dbtype)->dbc;
    my $sql = qq(SHOW GRANTS FOR ).$dbc->username;
    my $sth = $dbc->prepare($sql);
    $sth->execute;
    ($r) = $sth->fetchrow_array;
    $sth->finish;
  };

  if ($@) {
    $self->logger->warning("Error obtaining privileges from $dbtype db: $@\n");
    return {};
  }

  # parse the output
  $r =~ s/GRANT (.*) ON .*/$1/i;
  foreach my $p (split(',', $r)) {
    # trim leading and trailing whitespace
    $p =~ s/^\s+//;
    $p =~ s/\s+$//;
    $privs{uc($p)} = 1;
  }

  return\% privs;

}

highest_common_cs

description

sub highest_common_cs {

  my $self = shift;
  $self->{'instance'}->{'hccs'} = shift if (@_);
  return $self->{'instance'}->{'hccs'};

}

highest_common_cs_version

description

sub highest_common_cs_version {

  my $self = shift;
  $self->{'instance'}->{'hccsv'} = shift if (@_);
  return $self->{'instance'}->{'hccsv'};

}

instance_file

description

sub instance_file {

  my $self = shift;

  return $self->dump_path."/cache_instance.ser";

}

is_common_cs

description

sub is_common_cs {

  my $self = shift;
  my $csid = shift;

  return $self->{'instance'}->{'ccs'}->{$csid};
}


1;

}

logger

sub logger {

  my $self = shift;
  $self->{'logger'} = shift if (@_);
  return $self->{'logger'};

}

merge

description

sub merge {

  my $self = shift;
  my $dbtype = shift;

  unless ($dbtype eq 'source' or $dbtype eq 'target') {
    throw("Db type must be 'source' or 'target'.");
  }

  foreach my $type (keys %{ $self->{'cache'} || {} }) {
    next unless ($type =~ /^$dbtype/);

    foreach my $name (keys %{ $self->{'cache'}->{$type} || {} }) {
    
      foreach my $key (keys %{ $self->{'cache'}->{$type}->{$name} || {} }) {
        if (defined $self->{'cache'}->{$dbtype}->{$name}->{$key}) {
          # warning("Duplicate key in cache: $name|$dbtype|$key. Skipping.\n");
        } else {
          $self->{'cache'}->{$dbtype}->{$name}->{$key} =
            $self->{'cache'}->{$type}->{$name}->{$key};
        }

        delete $self->{'cache'}->{$type}->{$name}->{$key};
      }
      
      delete $self->{'cache'}->{$type}->{$name};
    }
    
    delete $self->{'cache'}->{$type};

  }

}

new

sub new {

  my $caller = shift;
  my $class = ref($caller) || $caller;

  my ($logger, $conf, $load_instance) =
    rearrange(['LOGGER', 'CONF', 'LOAD_INSTANCE'], @_);

  unless ($logger->isa('Bio::EnsEMBL::Utils::Logger')) {
    throw("You must provide a Bio::EnsEMBL::Utils::Logger for logging.");
  }
  
  unless ($conf->isa('Bio::EnsEMBL::Utils::ConfParser')) {
    throw("You must provide configuration as a Bio::EnsEMBL::Utils::ConfParser object.");
  }
  
  my $self = {};
  bless ($self, $class);

  # initialise
  $self->logger($logger);
  $self->conf($conf);

  if ($load_instance) {
    $self->read_instance_from_file;
  }
  
  return $self;

}

read_and_merge

description

sub read_and_merge {

  my $self = shift;
  my $dbtype = shift;

  unless ($dbtype eq 'source' or $dbtype eq 'target') {
    throw("Db type must be 'source' or 'target'.");
  }

  # read cache from single or multiple files, depending on caching strategy
  my $cache_method = $self->cache_method;
  if ($cache_method eq 'ALL') {
    $self->read_from_file("$dbtype.ALL");
  } elsif ($cache_method eq 'BY_SEQ_REGION') {
    foreach my $slice_name (@{ $self->slice_names($dbtype) }) {
      $self->read_from_file("$dbtype.$slice_name");
    }
  } else {
    throw("Unknown cache method: $cache_method.");
  }

  $self->merge($dbtype);

  # flag as being loaded
  $self->{'instance'}->{'loaded'}->{$dbtype} = 1;

}

read_from_file

description

sub read_from_file {

  my $self = shift;
  my $type = shift;

  throw("You must provide a cache type.") unless $type;

  my $cache_file = $self->cache_file($type);

  if (-s $cache_file) {
    
    #$self->logger->info("Reading cache from file...\n", 0, 'stamped');
    #$self->logger->info("Cache file $cache_file.\n", 1);
    eval { $self->{'cache'}->{$type} = retrieve($cache_file); };
    if ($@) {
      throw("Unable to retrieve cache: $@");
    }
    #$self->logger->info("Done.\n", 0, 'stamped');

  } else {
    $self->logger->warning("Cache file $cache_file not found or empty.\n");
  }


  return $self->{'cache'}->{$type};

}

read_instance_from_file

description

sub read_instance_from_file {

  my $self = shift;

  my $instance_file = $self->instance_file;

  unless (-s $instance_file) {
    throw("No valid cache instance file found at $instance_file.");
  }

  eval { $self->{'instance'} = retrieve($instance_file); };
  if ($@) {
    throw("Unable to retrieve cache instance: $@");
  }

  return $self->{'instance'};

}

seq_regions_compatible

description

sub seq_regions_compatible {

  my $self = shift;
  my $cs = shift;
  my $s_sa = shift;
  my $t_sa = shift;

  unless ($cs and $cs->isa('Bio::EnsEMBL::CoordSystem')) {
    throw('You must provide a CoordSystem');
  }

  unless ($s_sa and $t_sa and $s_sa->isa('Bio::EnsEMBL::DBSQL::SliceAdaptor')
          and $t_sa->isa('Bio::EnsEMBL::DBSQL::SliceAdaptor')) {
    throw('You must provide a source and target SliceAdaptor');
  }

  my %sr_match;
  my $equal = 0;

  my $s_seq_regions = $s_sa->fetch_all($cs->name, $cs->version);
  my $t_seq_regions = $t_sa->fetch_all($cs->name, $cs->version);
  
  # sanity check to prevent divison by zero
  my $s_count = scalar(@$s_seq_regions);
  my $t_count = scalar(@$t_seq_regions);
  return(0) if ($s_count == 0 or $t_count == 0);
  
  foreach my $s_sr (@$s_seq_regions) {
    $sr_match{$s_sr->seq_region_name} = $s_sr->length;
  }

  foreach my $t_sr (@$t_seq_regions) {
    if (exists($sr_match{$t_sr->seq_region_name})) {
      $equal++;

      # return false if we have a region with same name but different length
      return(0) unless ($sr_match{$t_sr->seq_region_name} == $t_sr->length);
    }
  }

  if ($equal/$s_count > 0.5 and $equal/$t_count > 0.5) {
    return(1);
  } else {
    $self->logger->info("Only $equal seq_regions identical for ".$cs->name." ".$cs->version."\n");
    return(0);
  }

}

slice_names

description

sub slice_names {

  my $self = shift;
  my $dbtype = shift;

  throw("You must provide a db type (source|target).") unless $dbtype;

  my $dba = $self->get_DBAdaptor($dbtype);
  my $sa = $dba->get_SliceAdaptor;

  my @slice_names = ();

  if ($self->conf->param('chromosomes')) {
    # filter by chromosome
    foreach my $chr ($self->conf->param('chromosomes')) {
      my $slice = $sa->fetch_by_region('chromosome', $chr);
      push @slice_names, $slice->name;
    }
    
  } elsif ($self->conf->param('region')) {
    # filter by region (specific slice)
    # don't use SliceAdaptor->fetch_by_name() since this will fail if assembly
    # versions are different for source and target db
    my ($cs, $version, $name, $start, $end, $strand) =
      split(/:/, $self->conf->param('region'));
    my $slice = $sa->fetch_by_region($cs, $name, $start, $end);
    push @slice_names, $slice->name;

  } else {
    # fetch all genes, but do in junks to save memory
    my $ga = $dba->get_GeneAdaptor;
    foreach my $srid (@{ $ga->list_seq_region_ids }) {
      my $slice = $sa->fetch_by_seq_region_id($srid);
      push @slice_names, $slice->name;
    }
  }

  return\@ slice_names;

}

write_all_to_file

description

sub write_all_to_file {

  my $self = shift;
  my $type = shift;

  throw("You must provide a cache type.") unless $type;

  my $size = 0;
  $size += $self->write_to_file($type);
  $size += $self->write_instance_to_file;

  return parse_bytes($size);

}

write_instance_to_file

description

sub write_instance_to_file {

  my $self = shift;

  my $instance_file = $self->instance_file;

  eval { nstore($self->{'instance'}, $instance_file) };
  if ($@) {
    throw("Unable to store $instance_file: $@\n");
  }

  my $size = -s $instance_file;
  return $size;

}

write_to_file

description

sub write_to_file {

  my $self = shift;
  my $type = shift;

  throw("You must provide a cache type.") unless $type;

  unless ($self->{'cache'}->{$type}) {
    $self->logger->warning("No features found in $type. Won't write cache file.\n");
    return;
  }

  my $cache_file = $self->cache_file($type);

  eval { nstore($self->{'cache'}->{$type}, $cache_file) };
  if ($@) {
    throw("Unable to store $cache_file: $@\n");
  }

  my $size = -s $cache_file;
  return $size;

}

General documentation

LICENSE

  Copyright (c) 1999-2009 The European Bioinformatics Institute and
  Genome Research Limited.  All rights reserved.

  This software is distributed under a modified Apache license.
  For license details, please see

    /info/about/code_licence.html

CONTACT