GenomeDBAdaptor documentation.

  DEPRECATED : consensus and query sequences are not used anymore.
               Please, refer to Bio::EnsEMBL::Compara::GenomicAlignBlock
               for more details.

  Arg[1]     : Bio::EnsEMBL::Compara::GenomeDB $consensus_genomedb
  Arg[2]     : Bio::EnsEMBL::Compara::GenomeDB $query_genomedb
  Arg[3]     : int $method_link_id
  Example    :
  Description: Checks to see whether a consensus genome database has been
               analysed against the specific query genome database.
               Returns the dbID of the database of the query genomeDB if 
               one is found.  A 0 is returned if no match is found.
  Returntype : int ( 0 or 1 )
  Exceptions : none
  Caller     : Bio::EnsEMBL::Compara::GenomeDB.pm

check_for_query_db [DEPRECATED]

  DEPRECATED : consensus and query sequences are not used anymore.
               Please, refer to Bio::EnsEMBL::Compara::GenomicAlignBlock
               for more details.

  Arg[1]     : Bio::EnsEMBL::Compara::GenomeDB $query_genomedb
  Arg[2]     : Bio::EnsEMBL::Compara::GenomeDB $consensus_genomedb
  Arg[3]     : int $method_link_id
  Example    : none
  Description: Checks to see whether a query genome database has been
               analysed against the specific consensus genome database.
               Returns the dbID of the database of the consensus 
               genomeDB if one is found.  A 0 is returned if no match is
               found.
  Returntype : int ( 0 or 1 )
  Exceptions : none
  Caller     : Bio::EnsEMBL::Compara::GenomeDB.pm

create_GenomeDBs

  Arg [1]    : none
  Example    : none
  Description: Reads the genomedb table and creates an internal cache of the
               values of the table.
  Returntype : none
  Exceptions : none
  Caller     : internal
  Status      : Stable

deleteObj

  Arg         : none
  Example     : none
  Description : Called automatically by DBConnection during object destruction
                phase. Clears the cache to avoid memory leaks.
  Returntype  : none
  Exceptions  : none
  Caller      : general
  Status      : Stable

fetch_all

  Args       : none
  Example    : my $all_genome_dbs = $genome_db_adaptor->fetch_all();
  Description: gets all GenomeDBs for this compara database
  Returntype : listref Bio::EnsEMBL::Compara::GenomeDB
  Exceptions : none
  Caller     : general
  Status     : Stable

fetch_by_Slice

  Arg [1]    : Bio::EnsEMBL::Slice $slice
  Example    : $gdb = $gdba->fetch_by_Slice($slice);
  Description: Retrieves the genome db corresponding to this
               Bio::EnsEMBL::Slice object
  Returntype : Bio::EnsEMBL::Compara::GenomeDB
  Exceptions : thrown if $slice is not a Bio::EnsEMBL::Slice
  Caller     : general
  Status     : Stable

fetch_by_core_DBAdaptor

	Arg [1]     : Bio::EnsEMBL::DBSQL::DBAdaptor
	Example     : my $gdb = $gdba->fetch_by_core_DBAdaptor($core_dba);
	Description : For a given core database adaptor object; this method will
	              return the GenomeDB instance 
	Returntype  : Bio::EnsEMBL::Compara::GenomeDB
	Exceptions  : thrown if no name is found for the adaptor
	Caller      : general
	Status      : Stable

fetch_by_dbID

  Arg [1]    : int $dbid
  Example    : $genome_db = $gdba->fetch_by_dbID(1);
  Description: Retrieves a GenomeDB object via its internal identifier
  Returntype : Bio::EnsEMBL::Compara::GenomeDB
  Exceptions : none
  Caller     : general
  Status     : Stable

fetch_by_name_assembly

  Arg [1]    : string $name
  Arg [2]    : string $assembly
  Example    : $gdb = $gdba->fetch_by_name_assembly("Homo sapiens", 'NCBI36');
  Description: Retrieves a genome db using the name of the species and
               the assembly version.
  Returntype : Bio::EnsEMBL::Compara::GenomeDB
  Exceptions : thrown if GenomeDB of name $name and $assembly cannot be found
  Caller     : general
  Status      : Stable

fetch_by_registry_name

  Arg [1]    : string $name
  Example    : $gdb = $gdba->fetch_by_registry_name("human");
  Description: Retrieves a genome db using the name of the species as
               used in the registry configuration file. Any alias is
               acceptable as well.
  Returntype : Bio::EnsEMBL::Compara::GenomeDB
  Exceptions : thrown if $name is not found in the Registry configuration
  Caller     : general
  Status     : Stable

fetch_by_taxon_id

  Arg [1]    : string $name
  Arg [2]    : string $assembly
  Example    : $gdb = $gdba->fetch_by_taxon_id(1234);
  Description: Retrieves a genome db using the NCBI taxon_id of the species.
  Returntype : Bio::EnsEMBL::Compara::GenomeDB
  Exceptions : thrown if GenomeDB of taxon_id $taxon_id cannot be found. Will
               warn if the taxon returns more than one GenomeDB (possible in
               some branches of the Taxonomy)
  Caller     : general
  Status     : Stable

get_all_db_links

  Arg[1]     : Bio::EnsEMBL::Compara::GenomeDB $query_genomedb
  Arg[2]     : int $method_link_id
  Example    : 
  Description: For the GenomeDB object passed in, check is run to
               verify which other genomes it has been analysed against
               irrespective as to whether this was as the consensus
               or query genome. Returns a list of matching dbIDs 
               separated by white spaces. 
  Returntype : listref of Bio::EnsEMBL::Compara::GenomeDBs 
  Exceptions : none
  Caller     : Bio::EnsEMBL::Compara::GenomeDB.pm
  Status     : At risk

get_species_name_from_core_MetaContainer

  Arg [1]     : Bio::EnsEMBL::MetaContainer
  Example     : $gdba->get_species_name_from_core_MetaContainer($slice->adaptor->db->get_MetaContainer);
  Description : Returns the name of a species which was used to
                name the GenomeDB from a meta container. Can be
                the species binomial name or the value of the
                meta item species.sql_name
  Returntype  : Scalar string
  Exceptions  : thrown if no name is found
  Caller      : general
  Status      : Stable

store

  Arg [1]    : Bio::EnsEMBL::Compara::GenomeDB $gdb
  Example    : $gdba->store($gdb);
  Description: Stores a genome database object in the compara database if
               it has not been stored already.  The internal id of the
               stored genomeDB is returned.
  Returntype : int
  Exceptions : thrown if the argument is not a Bio::EnsEMBL::Compara:GenomeDB
  Caller     : general
  Status     : Stable

sync_with_registry

  Example    :
  Description: Synchronize all the cached genome_db objects
               db_adaptor (connections to core databases)
               with those set in Bio::EnsEMBL::Registry.
               Order of presidence is Registry.conf > ComparaConf > genome_db.locator
  Returntype : none
  Exceptions : none
  Caller     : Bio::EnsEMBL::DBSQL::DBAdaptor
  Status     : At risk

Methods code

check_for_consensus_db

sub check_for_consensus_db {

  my ( $self, $query_gdb, $con_gdb, $method_link_id) = @_;

  deprecate("consensus and query sequences are not used anymore.".
              " Please, refer to Bio::EnsEMBL::Compara::GenomicAlignBlock".
              " for more details");

  # just to make things a wee bit more readable
  my $cid = $con_gdb->dbID;
  my $qid = $query_gdb->dbID;
  
  if ( exists $genome_consensus_xreflist{$cid .":" .$method_link_id} ) {
    for my $i ( 0 .. $#{$genome_consensus_xreflist{$cid .":" .$method_link_id}} ) {
      if ( $qid == $genome_consensus_xreflist{$cid .":" .$method_link_id}[$i] ) {
	return 1;
      }
    }
  }
  return 0;

}

check_for_query_db

sub check_for_query_db {

  my ( $self, $con_gdb, $query_gdb,$method_link_id ) = @_;

  deprecate("consensus and query sequences are not used anymore.".
              " Please, refer to Bio::EnsEMBL::Compara::GenomicAlignBlock".
              " for more details");

  # just to make things a wee bit more readable
  my $cid = $con_gdb->dbID;
  my $qid = $query_gdb->dbID;

  if ( exists $genome_query_xreflist{$qid .":" .$method_link_id} ) {
    for my $i ( 0 .. $#{$genome_query_xreflist{$qid .":" .$method_link_id}} ) {
      if ( $cid == $genome_query_xreflist{$qid .":" .$method_link_id}[$i] ) {
	return 1;
      }
    }
  }
  return 0;

}

create_GenomeDBs

sub create_GenomeDBs {

  my ( $self ) = @_;

  # Populate the hash array which cross-references the consensus
  # and query dbs

#   my $sth = $self->prepare("
#      SELECT consensus_genome_db_id, query_genome_db_id, method_link_id
#      FROM genomic_align_genome
#   ");
# 
#   $sth->execute;
# 
#   while ( my @db_row = $sth->fetchrow_array() ) {
#     my ( $con, $query, $method_link_id ) = @db_row;
# 
#     $genome_consensus_xreflist{$con .":" .$method_link_id} ||= [];
#     $genome_query_xreflist{$query .":" .$method_link_id} ||= [];
# 
#     push @{ $genome_consensus_xreflist{$con .":" .$method_link_id}}, $query;
#     push @{ $genome_query_xreflist{$query .":" .$method_link_id}}, $con;
#   }

  # grab all the possible species databases in the genome db table
  my $sth = $self->prepare("
     SELECT genome_db_id, name, assembly, taxon_id, assembly_default, genebuild, locator
     FROM genome_db 
   ");
   $sth->execute;

  # build a genome db for each species
  $self->{'_cache'} = undef;
  while ( my @db_row = $sth->fetchrow_array() ) {
    my ($dbid, $name, $assembly, $taxon_id, $assembly_default, $genebuild, $locator) = @db_row;

    my $gdb = Bio::EnsEMBL::Compara::GenomeDB->new();
    $gdb->name($name);
    $gdb->assembly($assembly);
    $gdb->taxon_id($taxon_id);
    $gdb->assembly_default($assembly_default);
    $gdb->dbID($dbid);
    $gdb->adaptor( $self );
    $gdb->genebuild($genebuild);
    $gdb->locator($locator);

    $self->{'_cache'}->{$dbid} = $gdb;
  }

  $self->{'_GenomeDB_cache'} = 1;

  $self->sync_with_registry();

}

deleteObj

sub deleteObj {

  my $self = shift;

  if($self->{'_cache'}) {
    foreach my $dbID (keys %{$self->{'_cache'}}) {
      delete $self->{'_cache'}->{$dbID};
    }
  }

  $self->SUPER::deleteObj;
}


1;

}

fetch_all

sub fetch_all {

  my ( $self ) = @_;

  if ( !defined $self->{'_GenomeDB_cache'}) {
    $self->create_GenomeDBs;
  }

  my @genomeDBs = values %{$self->{'_cache'}};

  return\@ genomeDBs;

}

fetch_by_Slice

sub fetch_by_Slice {

  my ($self, $slice) = @_;

  unless (UNIVERSAL::isa($slice, "Bio::EnsEMBL::Slice")) {
    throw("[$slice] must be a Bio::EnsEMBL::Slice");
  }
  unless ($slice->adaptor) {
    throw("[$slice] must have an adaptor");
  }
  
  my $core_dba = $slice->adaptor()->db();
  return $self->fetch_by_core_DBAdaptor($core_dba);

}

fetch_by_core_DBAdaptor

sub fetch_by_core_DBAdaptor {

	my ($self, $core_dba) = @_;
	my $mc = $core_dba->get_MetaContainer();
	my $species_name = $self->get_species_name_from_core_MetaContainer($mc);
	my ($highest_cs) = @{$core_dba->get_CoordSystemAdaptor->fetch_all()};
  my $species_assembly = $highest_cs->version();
  return $self->fetch_by_name_assembly($species_name, $species_assembly);

}

fetch_by_dbID

sub fetch_by_dbID {

   my ($self,$dbid) = @_;

   if( !defined $dbid) {
       throw("Must fetch by dbid");
   }

   # check to see whether all the GenomeDBs have already been created
   if ( !defined $self->{'_GenomeDB_cache'}) {
     $self->create_GenomeDBs;
   }

   my $gdb = $self->{'_cache'}->{$dbid};

   if(!$gdb) {
     return undef; # return undef if fed a bogus dbID
   }

   return $gdb;

}

fetch_by_name_assembly

sub fetch_by_name_assembly {

  my ($self, $name, $assembly) = @_;

  unless($name) {
    throw('name arguments are required');
  }

  my $sth;

  unless (defined $assembly && $assembly ne '') {
    my $sql = "SELECT genome_db_id FROM genome_db WHERE name = ? AND assembly_default = 1";
    $sth = $self->prepare($sql);
    $sth->execute($name);
  } else {
    my $sql = "SELECT genome_db_id FROM genome_db WHERE name = ? AND assembly = ?";
    $sth = $self->prepare($sql);
    $sth->execute($name, $assembly);
  }

  my ($id) = $sth->fetchrow_array();

  if (!defined $id) {
    throw("No GenomeDB with this name [$name] and assembly [".
        ($assembly or "--undef--")."]");
  }
  $sth->finish;
  return $self->fetch_by_dbID($id);

}

fetch_by_registry_name

sub fetch_by_registry_name {

  my ($self, $name) = @_;

  unless($name) {
    throw('name arguments are required');
  }

  my $species_db_adaptor = Bio::EnsEMBL::Registry->get_DBAdaptor($name, "core");
  if (!$species_db_adaptor) {
    throw("Cannot connect to core database for $name!");
  }
  
  return $self->fetch_by_core_DBAdaptor($species_db_adaptor);

}

fetch_by_taxon_id

sub fetch_by_taxon_id {

  my ($self, $taxon_id) = @_;

  unless($taxon_id) {
    throw('taxon_id argument is required');
  }

  my $sth;

  my $sql = "SELECT genome_db_id FROM genome_db WHERE taxon_id = ? AND assembly_default = 1";
  $sth = $self->prepare($sql);
  $sth->execute($taxon_id);

  my @ids = $sth->fetchrow_array();
  $sth->finish;
  
  my $return_count = scalar(@ids);
	my $id;
  if ($return_count ==0) {
    throw("No GenomeDB with this taxon_id [$taxon_id]");
  }
  else {
    ($id) = @ids;
    if($return_count > 1) {
      warning("taxon_id [${taxon_id}] returned more than one row. Returning the first at ID [${id}]");
    }
  }
  
  return $self->fetch_by_dbID($id);

}

get_all_db_links

sub get_all_db_links {

  my ($self, $ref_gdb, $method_link_id) = @_;
  
  my $gdb_list;

  my $method_link_species_set_adaptor = $self->db->get_MethodLinkSpeciesSetAdaptor;
  my $method_link_type = $method_link_species_set_adaptor->
      get_method_link_type_from_method_link_id($method_link_id);
  my $method_link_species_sets = $method_link_species_set_adaptor->fetch_all_by_method_link_type_GenomeDB(
          $method_link_type,
          $ref_gdb
      );

  foreach my $this_method_link_species_set (@{$method_link_species_sets}) {
    foreach my $this_genome_db (@{$this_method_link_species_set->species_set}) {
      next if ($this_genome_db->dbID eq $ref_gdb->dbID);
      $gdb_list->{$this_genome_db} = $this_genome_db;
    }
  }

  return [values %$gdb_list];

}

get_species_name_from_core_MetaContainer

sub get_species_name_from_core_MetaContainer {

	my ($self, $meta_container) = @_;
  my ($species_name) = @{$meta_container->list_value_by_key('species.sql_name')};
  unless(defined $species_name) {
    $species_name = $meta_container->get_Species->binomial;
	}
  throw('Species name was still empty/undefined after looking for species.sql_name and binomial name') unless $species_name;
  return $species_name;

}

store

sub store {

  my ($self,$gdb) = @_;

  unless(defined $gdb && ref $gdb && 
	 $gdb->isa('Bio::EnsEMBL::Compara::GenomeDB') ) {
    $self->throw("Must have genomedb arg [$gdb]");
  }

  my $name = $gdb->name;
  my $assembly = $gdb->assembly;
  my $assembly_default = $gdb->assembly_default;
  my $taxon_id = $gdb->taxon_id;
  my $genebuild = $gdb->genebuild;
  my $locator = $gdb->locator;

  if ($taxon_id == 0 and !$assembly) {
     $assembly = "";
  } else {
      unless($name && $assembly && $taxon_id) {
	  $self->throw("genome db must have a name, assembly, and taxon_id");
      }
  }

  my $sth = $self->prepare("
      SELECT genome_db_id
      FROM genome_db
      WHERE taxon_id='$taxon_id' AND name = '$name'
      AND assembly = '$assembly' AND genebuild = '$genebuild'
   ");

  $sth->execute;

  my $dbID = $sth->fetchrow_array();

  if(!$dbID) {
    #if the genome db has not been stored before, store it now
   # my $sql = "INSERT into genome_db (name,assembly,taxon_id,assembly_default,genebuild,locator) ". 
    #          " VALUES ('$name','$assembly', $taxon_id, $assembly_default, '$genebuild', '$locator')";

     my $sql = qq(
           INSERT into genome_db (name,assembly,taxon_id,assembly_default,genebuild,locator)
           VALUES (?,?,?,?,?,?)
        );      

     #print("$sql\n");
     my $sth = $self->prepare($sql);
     $sth->bind_param(1, $name, SQL_VARCHAR);
     $sth->bind_param(2, $assembly, SQL_VARCHAR);
     $sth->bind_param(3, $taxon_id, SQL_INTEGER);
     $sth->bind_param(4, $assembly_default, SQL_TINYINT);
     $sth->bind_param(5, $genebuild, SQL_VARCHAR);
     $sth->bind_param(6, $locator, SQL_VARCHAR);
     
    if($sth->execute()) {
      $dbID = $sth->{'mysql_insertid'};

      if($gdb->dbID) {
        $sql = "UPDATE genome_db SET genome_db_id=".$gdb->dbID .
               " WHERE genome_db_id=$dbID";
        my $sth = $self->prepare($sql);
        if($sth->execute()) { $dbID = $gdb->dbID; }
      }
    }
  }
  else {
    my $sql = "UPDATE genome_db SET ".
              " assembly_default = '$assembly_default'".
              " ,locator = '$locator'".
              " WHERE genome_db_id=$dbID";
    #print("$sql\n");
    my $sth = $self->prepare($sql);
    $sth->execute();
  }

  #update the genomeDB object so that it's dbID and adaptor are set
  $gdb->dbID($dbID);
  $gdb->adaptor($self);

  return $dbID;

}

sync_with_registry

sub sync_with_registry {

  my $self = shift;

  return unless(eval "require Bio::EnsEMBL::Registry");
  
  #print("Registry eval TRUE\n");
  my $genomeDBs = $self->fetch_all();

  foreach my $genome_db (@{$genomeDBs}) {
    my $coreDBA;
    my $registry_name;
    if ($genome_db->assembly) {
      $registry_name = $genome_db->name ." ". $genome_db->assembly;
      if(Bio::EnsEMBL::Registry->alias_exists($registry_name)) {
        $coreDBA = Bio::EnsEMBL::Registry->get_DBAdaptor($registry_name, 'core');
      }
    }
    if(!defined($coreDBA) and Bio::EnsEMBL::Registry->alias_exists($genome_db->name)) {
      $coreDBA = Bio::EnsEMBL::Registry->get_DBAdaptor($genome_db->name, 'core');
      Bio::EnsEMBL::Registry->add_alias($genome_db->name, $registry_name) if ($registry_name);
    }

    if($coreDBA) {
      #defined in registry so override any previous connection
      #and set in GenomeDB object (ie either locator or compara.conf)
      $genome_db->db_adaptor($coreDBA);
    } else {
      #fetch from genome_db which may be from a compara.conf or from
      #a locator
      $coreDBA = $genome_db->db_adaptor();
      if(defined($coreDBA)) {
        if (Bio::EnsEMBL::Registry->alias_exists($genome_db->name)) {
          Bio::EnsEMBL::Registry->add_alias($genome_db->name, $registry_name) if ($registry_name);
        } else {
          Bio::EnsEMBL::Registry->add_DBAdaptor($registry_name, 'core', $coreDBA);
          Bio::EnsEMBL::Registry->add_alias($registry_name, $genome_db->name) if ($registry_name);
        }
      }
    }
  }

}

General documentation

AUTHOR - Ewan Birney

This modules is part of the Ensembl project
Email birney@ebi.ac.uk
Describe contact details here

APPENDIX