SeqStoreConverter BasicConverter
Package variablesGeneral documentationMethods
Toolbar
WebCvsRaw content
Package variables
No package variables defined.
Included modules
DBI
Synopsis
No synopsis!
Description
No description!
Methods
add_attrib_code
No description
Code
assembly_contig_chromosome
No description
Code
assembly_contig_clone
No description
Code
assembly_contig_supercontig
No description
Code
assembly_supercontig_chromosome
No description
Code
chromosome_to_seq_region
No description
Code
clone_to_seq_region
No description
Code
contig_to_seq_region
No description
Code
copy_other_tables
No description
Code
copy_repeat_consensus
No description
Code
copy_tables
No description
Code
create_assembly
No description
Code
create_attribs
No description
Code
create_coord_systems
No description
Code
create_meta_coord
No description
Code
create_seq_regions
No description
Code
dbh
No description
Code
debug
No description
Code
force
No description
Code
get_coord_system_id
No description
Code
get_default_assembly
No description
Code
host
No description
Code
limit
No description
Code
new
No description
Code
password
No description
Code
port
No description
Code
schema
No description
Code
set_top_level
No description
Code
source
No description
Code
supercontig_to_seq_region
No description
Code
target
No description
Code
transfer_dna
No description
Code
transfer_features
No description
Code
transfer_genes
No description
Code
transfer_meta
No description
Code
transfer_prediction_transcripts
No description
Code
transfer_stable_ids
No description
Code
transfer_vega_stable_ids
No description
Code
user
No description
Code
vegaschema
No description
Code
verbose
No description
Code
Methods description
None available.
Methods code
add_attrib_codedescriptionprevnextTop
sub add_attrib_code {
  my $self = shift;
  my $dbh = $self->dbh();
  my $target = $self->target();

  # add a toplevel code to the attrib_type table if it is not there already
my $sth = $dbh->prepare("SELECT attrib_type_id " . "FROM $target.attrib_type " . "WHERE code = 'toplevel'"); $sth->execute(); if($sth->rows()) { my ($attrib_type_id) = $sth->fetchrow_array(); $sth->finish(); return $attrib_type_id; } $sth->finish(); $sth = $dbh->prepare("INSERT INTO $target.attrib_type " . "SET code = 'toplevel', " . "name = 'Top Level', " . "description = 'Top Level Non-Redundant Sequence Region'"); $sth->execute(); my $attrib_type_id = $sth->{'mysql_insertid'}; $sth->finish(); return $attrib_type_id; } 1;
}
assembly_contig_chromosomedescriptionprevnextTop
sub assembly_contig_chromosome {
  my $self = shift;

  $self->debug("Building assembly table - contig/chromosome");

  my $source = $self->source();
  my $target = $self->target();
  my $dbh    = $self->dbh();

  $dbh->do(
	  "INSERT INTO $target.assembly " .
	  "SELECT tcm.new_id, " .	# asm_seq_region_id (old-new chromosome ID mapping)
"a.contig_id, " . # cmp_seq_region_id
"a.chr_start, " . # asm_start
"a.chr_end, " . # asm_end
"a.contig_start, " . # cmp_start
"a.contig_end, " . # cmp_end
"a.contig_ori " . # ori
"FROM $target.tmp_chr_map tcm, $source.assembly a, $source.contig c " . "WHERE tcm.old_id = a.chromosome_id " . "AND c.contig_id = a.contig_id "); # only copy assembly entries that
# refer to valid contigs (test db has
# superfluous assembly entries)
}
assembly_contig_clonedescriptionprevnextTop
sub assembly_contig_clone {
  my $self = shift;
  
  $self->debug("Building assembly table - contig/clone");

  my $source = $self->source();
  my $target = $self->target();
  my $dbh    = $self->dbh();

  $dbh->do(
	  "INSERT INTO $target.assembly " .
	  "SELECT tcm.new_id, " .	# asm_seq_region_id (old-new clone ID mapping)
"ctg.contig_id, ". # cmp_seq_region_id
"ctg.embl_offset, " . # asm_start
"ctg.embl_offset+ctg.length-1, " . # asm_end
"1, " . # cmp_start
"ctg.length, " . # cmp_end
"1 " . # ori - contig always positively oriented on the clone
"FROM $target.tmp_cln_map tcm, " . "$source.clone cln, $source.contig ctg " . "WHERE tcm.old_id = cln.clone_id " . "AND cln.clone_id = ctg.clone_id");
}
assembly_contig_supercontigdescriptionprevnextTop
sub assembly_contig_supercontig {
  my $self = shift;

  $self->debug("Building assembly table - contig/supercontig");

  my $source = $self->source();
  my $target = $self->target();
  my $dbh    = $self->dbh();

  $dbh->do(
	  "INSERT INTO $target.assembly " .
	  "SELECT tsm.new_id, " .	# asm_seq_region_id (superctg name-sr_id mapping)
"a.contig_id, " . # cmp_seq_region_id
"a.superctg_start, " . # asm_start
"a.superctg_end, " . # asm_end
"a.contig_start, " . # cmp_start
"a.contig_end, " . # cmp_end
"a.contig_ori " . # ori
"FROM $target.tmp_superctg_map tsm, $source.assembly a, $source.contig c ". "WHERE tsm.name = a.superctg_name " . "AND c.contig_id = a.contig_id "); # only copy assembly entries that
# refer to valid contigs (test db might
# have these)
}
assembly_supercontig_chromosomedescriptionprevnextTop
sub assembly_supercontig_chromosome {
  my $self = shift;

  $self->debug("Building assembly table - supercontig/chromosome");

  my $source = $self->source();
  my $target = $self->target();
  my $dbh    = $self->dbh();

  $dbh->do(
	  "INSERT INTO $target.assembly " .
	  "SELECT tcm.new_id, " .	# asm_seq_region_id (chr id)
"tsm.new_id, " . # cmp_seq_region_id (supercontig id)
"min(a.chr_start), " . # asm_start
"max(a.chr_end), " . # asm_end
"min(a.superctg_start), " . # cmp_start
"max(a.superctg_end), " . # cmp_end
"a.superctg_ori " . # ori
"FROM $target.tmp_superctg_map tsm, $target.tmp_chr_map tcm, " . " $source.assembly a ". "WHERE tsm.name = a.superctg_name " . "AND tcm.old_id = a.chromosome_id " . "GROUP BY superctg_name"); } ###############################################################################
# Base class implementations of transfer methods. Can be overridden to
# create species specific behaviour
###############################################################################
}
chromosome_to_seq_regiondescriptionprevnextTop
sub chromosome_to_seq_region {
  my $self = shift;
  my $target_cs_name = shift;

  my $target = $self->target();
  my $source = $self->source();
  my $dbh    = $self->dbh();

  $target_cs_name ||= "chromosome";
  my $cs_id = $self->get_coord_system_id($target_cs_name);

  $self->debug("Transforming chromosomes into $target_cs_name seq_regions");


  my $select_sth = $dbh->prepare
    ("SELECT chromosome_id, name, length FROM $source.chromosome");

  my $insert_sth = $dbh->prepare
    ("INSERT INTO $target.seq_region (name, coord_system_id, length) " .
     "VALUES (?,?,?)");

  my $tmp_insert_sth = $dbh->prepare
    ("INSERT INTO $target.tmp_chr_map (old_id, new_id) VALUES (?, ?)");

  $select_sth->execute();

  my ($chrom_id, $name, $length);
  $select_sth->bind_columns(\$chrom_id,\$ name,\$ length);

  while ($select_sth->fetch()) {
    #insert into seq_region table
$insert_sth->execute($name, $cs_id, $length); #copy old/new mapping into temporary table
$tmp_insert_sth->execute($chrom_id, $insert_sth->{'mysql_insertid'}); } $select_sth->finish(); $insert_sth->finish(); $tmp_insert_sth->finish(); return;
}
clone_to_seq_regiondescriptionprevnextTop
sub clone_to_seq_region {
  my $self = shift;
  my $target_cs_name = shift;

  my $target = $self->target();
  my $source = $self->source();
  my $dbh    = $self->dbh();

  # target coord_system will have a different ID
$target_cs_name ||= "clone"; my $cs_id = $self->get_coord_system_id($target_cs_name); $self->debug("Transforming clones into $target_cs_name seq_regions"); my $select_sth = $dbh->prepare ("SELECT cl.clone_id, CONCAT(cl.embl_acc, '.', cl.embl_version), MAX(ctg.embl_offset+ctg.length-1) FROM $source.clone cl, $source.contig ctg WHERE cl.clone_id = ctg.clone_id GROUP BY ctg.clone_id"); $select_sth->execute(); my ($clone_id, $embl_acc, $length); $select_sth->bind_columns(\$clone_id,\$ embl_acc,\$ length); my $insert_sth = $dbh->prepare ("INSERT INTO $target.seq_region (name, coord_system_id, length) " . "VALUES(?,?,?)"); my $tmp_insert_sth = $dbh->prepare ("INSERT INTO $target.tmp_cln_map (old_id, new_id) VALUES (?, ?)"); while ($select_sth->fetch()) { $insert_sth->execute("$embl_acc", $cs_id, $length); #store mapping of old -> new ids in temp table
$tmp_insert_sth->execute($clone_id, $insert_sth->{'mysql_insertid'}); } $select_sth->finish(); $insert_sth->finish(); $tmp_insert_sth->finish(); return;
}
contig_to_seq_regiondescriptionprevnextTop
sub contig_to_seq_region {
  my $self = shift;
  my $target_cs_name = shift;

  my $target = $self->target();
  my $source = $self->source();
  my $dbh     = $self->dbh();

  $target_cs_name ||= 'contig';

  $self->debug("Transforming contigs into $target_cs_name seq_regions");

  my $cs_id = $self->get_coord_system_id($target_cs_name);

  my $sth = $dbh->prepare
    ("INSERT INTO $target.seq_region " .
     "SELECT contig_id, name, $cs_id, length FROM $source.contig");

  $sth->execute();
  $sth->finish();

  return;
}
copy_other_tablesdescriptionprevnextTop
sub copy_other_tables {
  my $self = shift;

  #xref tables
$self->copy_tables("xref", "go_xref", "identity_xref", "object_xref", "external_db", "external_synonym", #marker/qtl related tables
"map", "marker", "marker_synonym", "qtl", "qtl_synonym", #misc other tables
"supporting_feature", "analysis", "exon_transcript", "interpro", "gene_description", "protein_feature");
}
copy_repeat_consensusdescriptionprevnextTop
sub copy_repeat_consensus {
  my $self = shift;

  my $source = $self->source();
  my $target = $self->target();

  my $dbh = $self->dbh();

  $self->debug("Converting repeat_consensus table.");

  $dbh->do("INSERT INTO $target.repeat_consensus " .
           "(repeat_consensus_id, repeat_name, repeat_class, repeat_type, ".
           " repeat_consensus) " .
           "SELECT repeat_consensus_id, repeat_name, repeat_class, " .
           "       '', repeat_consensus " .
           "FROM $source.repeat_consensus rc" );

  return;
}
copy_tablesdescriptionprevnextTop
sub copy_tables {
  my ($self, @tables) = @_;

  foreach my $table (@tables) {
    $self->debug("Copying $table");
    
    my $source = $self->source();
    my $target = $self->target();

    eval {
      my $sth = $self->dbh()->prepare
        ("INSERT INTO $target.$table SELECT * FROM $source.$table");
      $sth->execute();
      $sth->finish();
    };

    if($@) {
      warn("Copy of table $table failed: $@\n");
    }
  }

  return;
}
create_assemblydescriptionprevnextTop
sub create_assembly {
  my $self = shift;

  $self->assembly_contig_chromosome();
  $self->assembly_contig_clone();
  $self->assembly_contig_supercontig();

  return;
}
create_attribsdescriptionprevnextTop
sub create_attribs {
  my $self = shift;

  my $source = $self->source();
  my $target = $self->target();
  my $dbh    = $self->dbh();

  #copy the attrib types from the mapannotation type table
$dbh->do ("INSERT INTO $target.attrib_type( attrib_type_id, code, " . " name, description ) " . "SELECT mapannotationtype_id, code, name, description " . "FROM $source.mapannotationtype " ); $dbh->do ("INSERT INTO $target.attrib_type( code, name, description ) " . "VALUES ('name', 'Name',''), ('type', 'Type of feature','')"); return; } #
# The process of actually identifying toplevel seq_regions using the info in
# the database is quite slow. Make the assumption that the coordsystem with
# lowest rank value is going to have all of the toplevel seq_regions.
#
# This method must be overridden if alternate behaviour is required.
#
}
create_coord_systemsdescriptionprevnextTop
sub create_coord_systems {
  my $self = shift;

  my $target = $self->target();
  my $dbh    = $self->dbh();

  my $ass_def = $self->get_default_assembly();

  my @coords = 
    (["chromosome" , $ass_def, "default_version"               ,1],
     ["supercontig", undef   , "default_version"               ,2],
     ["clone"      , undef   , "default_version"               ,3],
     ["contig", undef        , "default_version,sequence_level",4]);

  my @assembly_mappings =  ("chromosome:$ass_def|contig",
                            "clone|contig",
                            "supercontig|contig",
                            "supercontig|contig|clone",
                            "chromosome:$ass_def|contig|clone",
                            "chromosome:$ass_def|contig|supercontig");

  $self->debug("Building coord_system table");

  my $sth = $dbh->prepare("INSERT INTO $target.coord_system " .
                           "(name, version, attrib,rank) VALUES (?,?,?,?)");

  my %coord_system_ids;

  foreach my $cs (@coords) {
    $sth->execute(@$cs);
    $coord_system_ids{$cs->[0]} = $sth->{'mysql_insertid'};
  }
  $sth->finish();

  $self->debug("Adding assembly.mapping entries to meta table");

  $sth = $dbh->prepare("INSERT INTO $target.meta(meta_key, meta_value) " .
                       "VALUES ('assembly.mapping', ?)");

  foreach my $mapping (@assembly_mappings) {
    $sth->execute($mapping);
  }

  $sth->finish();

  return;
}


#
# populates the contents of the meta_coord table
# must be executed after all of the feature tables in the target database
# have already been populated
#
}
create_meta_coorddescriptionprevnextTop
sub create_meta_coord {
  my $self = shift;

  $self->debug("Building meta_coord table");

  my $target = $self->target();
  my $dbh    = $self->dbh();

  my @feature_tables = qw(density_feature
                          dna_align_feature
                          exon
                          gene
                          karyotype
                          marker_feature
                          misc_feature
                          prediction_exon
                          prediction_transcript
                          protein_align_feature
                          repeat_feature
                          simple_feature
                          transcript);

  foreach my $ft (@feature_tables) {

    $dbh->do(qq{INSERT INTO $target.meta_coord(table_name, coord_system_id,
                                               max_length)
                SELECT '$ft', sr.coord_system_id,
                        MAX(f.seq_region_end - f.seq_region_start + 1)
                FROM $target.$ft f, $target.seq_region sr
                WHERE sr.seq_region_id = f.seq_region_id
                GROUP BY sr.coord_system_id});
  }

  # special case for assembly exception, features are created from both
# sides of table
$dbh->do(qq{INSERT INTO $target.meta_coord SELECT 'assembly_exception', sr.coord_system_id, MAX(IF(ae.seq_region_end - ae.seq_region_start > ae.exc_seq_region_end - ae.exc_seq_region_start, ae.seq_region_end - ae.seq_region_start + 1, ae.exc_seq_region_end - ae.exc_seq_region_start + 1)) FROM $target.assembly_exception ae, $target.seq_region sr WHERE sr.seq_region_id = ae.seq_region_id GROUP BY sr.coord_system_id});
}
create_seq_regionsdescriptionprevnextTop
sub create_seq_regions {
  my $self = shift;

  my $target = $self->target();
  my $dbh    = $self->dbh();

  #default behaviour is to simply copy all tables as they come
$self->contig_to_seq_region('contig'); $self->chromosome_to_seq_region(); $self->supercontig_to_seq_region(); $self->clone_to_seq_region(); return;
}
dbhdescriptionprevnextTop
sub dbh {
  my $self = shift;
  $self->{'dbh'} = shift if(@_);
  return $self->{'dbh'};
}
debugdescriptionprevnextTop
sub debug {
  my $self = shift;
  my $str = shift;
  print STDERR $str . "\n" if $self->verbose();
  return;
}
forcedescriptionprevnextTop
sub force {
  my $self = shift;
  $self->{'force'} = shift if(@_);
  return $self->{'force'};
}
get_coord_system_iddescriptionprevnextTop
sub get_coord_system_id {
  my $self = shift;
  my $cs_name = shift;
  my $cs_version = shift;

  my $target = $self->target();

  my @bind_vals = ($cs_name);
  my $sql = "SELECT cs.coord_system_id " .
            "FROM   $target.coord_system cs " .
            "WHERE  cs.name = ?";

  if($cs_version) {
    push(@bind_vals, $cs_version);
    $sql .= " AND cs.version = ?";
  }

  my $sth = $self->dbh()->prepare($sql);
  $sth->execute(@bind_vals);

  if($sth->rows() != 1) {
    die("Id for non-existant or ambiguous coord system requested " .
        "$cs_name:$cs_version");
  }

  my ($id) = $sth->fetchrow_array();

  $sth->finish();

  return $id;
}
get_default_assemblydescriptionprevnextTop
sub get_default_assembly {
  my $self = shift;

  my $source = $self->source();

  my $sth = $self->dbh->prepare
    ("SELECT meta_value FROM $source.meta WHERE meta_key='assembly.default'");
  $sth->execute();

  if(!$sth->rows() == 1) {
    die("This species has an ambiguous or non-existant assembly.default" .
        " in the meta table");
  }

  my ($result) = $sth->fetchrow_array();
  
  return $result;
}
hostdescriptionprevnextTop
sub host {
  my $self = shift;
  $self->{'host'} = shift if(@_);
  return $self->{'host'};
}
limitdescriptionprevnextTop
sub limit {
  my $self = shift;
  $self->{'limit'} = shift if(@_);
  return $self->{'limit'};
}


###############################################################################
# Utility methods
###############################################################################
}
newdescriptionprevnextTop
sub new {
  my ( $class, $user, $pass, $host, $source, $target, $schema, $vega_schema, $force, $verbose, $limit ) = @_;

  my $self = bless {}, $class;

  my $port;
  ($host, $port) = split(/:/, $host);
  $port ||= 3306;

  my $dbh = DBI->connect( "DBI:mysql:host=$host:port=$port", $user, $pass,
                          {'RaiseError' => 1});


  $self->verbose( $verbose );
  $self->dbh( $dbh );
  $self->force( $force );
  $self->source( $source );
  $self->target( $target );
  $self->schema( $schema );
  $self->vegaschema( $vega_schema);
  $self->host( $host );
  $self->password( $pass);
  $self->user($user);
  $self->port($port);
  $self->limit($limit);


  #check to see if the destination and source databases exist already.
my %dbs = map {$_->[0] => 1} @{$dbh->selectall_arrayref('show databases')}; if( !$dbs{$source} ) { die ("Source db $source does not exist" ); } if( $dbs{$target} ) { if( $force ) { $dbh->do( "drop database $target" ); } else { die("Target db $target already exists. Use -force option to overwrite."); } } $dbh->do( "create database ".$self->target() ); $self->debug("Building schema for $target from $schema"); die "Cannot open $schema" if (! -e $schema); my $cmd = "/usr/local/mysql/bin/mysql -u $user -p$pass -P $port -h $host $target < $schema"; system ($cmd); if ($vega_schema) { $self->debug("Adding vega tables for $target"); die "Cannot open vega creation script" if (! -e $vega_schema); my $cmd = "/usr/local/mysql/bin/mysql -u $user -p$pass -P $port -h $host $target < $vega_schema"; system ($cmd); } $self->debug("Creating temporary tables"); #create a temporary table to store the mapping of old ids to new ids
$dbh->do ("CREATE TEMPORARY TABLE $target.tmp_cln_map (" . "old_id INT, new_id INT, INDEX new_idx (new_id))"); #create a temp table which will store the mapping of old chromosome
#identifiers to new identifiers
$dbh->do("CREATE TEMPORARY TABLE $target.tmp_chr_map (" . " old_id INT, new_id INT,". " INDEX new_idx (new_id))"); #create a temporary table to hold old supercontig name -> new id mappings
$dbh->do("CREATE TEMPORARY TABLE $target.tmp_superctg_map (" . "name VARCHAR(255), new_id INT, ". "INDEX new_idx (new_id))"); return $self; } ###############################################################################
# Getter/Setters for converter properties
###############################################################################
}
passworddescriptionprevnextTop
sub password {
  my $self = shift;
  $self->{'password'} = shift if(@_);
  return $self->{'password'};
}
portdescriptionprevnextTop
sub port {
  my $self = shift;
  $self->{'port'} = shift if(@_);
  return $self->{'port'};
}
schemadescriptionprevnextTop
sub schema {
  my $self = shift;
  $self->{'schema'} = shift if (@_);
  return $self->{'schema'};
}
set_top_leveldescriptionprevnextTop
sub set_top_level {
  my $self = shift;

  my $target = $self->target();
  my $dbh = $self->dbh();

  my $attrib_type_id = $self->add_attrib_code();

  $self->debug("Setting toplevel attributes of seq_regions");

  my $sth = $dbh->prepare("DELETE FROM $target.seq_region_attrib " .
                          "WHERE attrib_type_id = ?");
  $sth->execute($attrib_type_id);
  $sth->finish();


  $sth = $dbh->prepare("SELECT coord_system_id FROM $target.coord_system " .
                       "ORDER BY RANK ASC LIMIT 1");
  $sth->execute();

  my ($cs_id) = $sth->fetchrow_array();

  $sth->finish();

  $sth = $dbh->prepare("INSERT INTO $target.seq_region_attrib " .
                      '(seq_region_id, attrib_type_id, value) ' .
                      "SELECT sr.seq_region_id, $attrib_type_id, 1 " .
                      "FROM $target.seq_region sr " .
                      "WHERE sr.coord_system_id = $cs_id");

  $sth->execute();
  $sth->finish();
}
sourcedescriptionprevnextTop
sub source {
  my $self = shift;
  $self->{'source'} = shift if(@_);
  return $self->{'source'};
}
supercontig_to_seq_regiondescriptionprevnextTop
sub supercontig_to_seq_region {
  my $self = shift;
  my $target_cs_name = shift || "supercontig";

  my $target = $self->target();
  my $source = $self->source();
  my $dbh    = $self->dbh();

  $self->debug("Transforming supercontigs into $target_cs_name seq_regions");

  my $cs_id = $self->get_coord_system_id($target_cs_name);

  my $select_sth = $dbh->prepare
    ("SELECT superctg_name, " .
     "MAX(superctg_end) AS length " .
     "FROM $source.assembly " .
     "GROUP BY superctg_name");

  my $insert_sth = $dbh->prepare
    ("INSERT INTO $target.seq_region (name, coord_system_id, length) " .
     "VALUES (?,?,?)");

  my $tmp_insert_sth = $dbh->prepare
    ("INSERT INTO $target.tmp_superctg_map (name, new_id) VALUES (?, ?)");

  my ($name, $length);
  $select_sth->execute();
  $select_sth->bind_columns(\$name,\$ length);

  while ($select_sth->fetch()) {
    $insert_sth->execute($name, $cs_id, $length);
    $tmp_insert_sth->execute($name, $insert_sth->{'mysql_insertid'});
  }

  $select_sth->finish();
  $insert_sth->finish();
  $tmp_insert_sth->finish();

  return;
}
targetdescriptionprevnextTop
sub target {
  my $self = shift;
  $self->{'target'} = shift if(@_);
  return $self->{'target'};
}
transfer_dnadescriptionprevnextTop
sub transfer_dna {
  my $self = shift;

  my $target = $self->target();
  my $source = $self->source();
  my $dbh    = $self->dbh();

  $self->debug("Building dna table");

  $dbh->do("INSERT INTO $target.dna " .
          "SELECT c.contig_id as seq_region_id, d.sequence as sequence " .
          "FROM   $source.dna d, $source.contig c " .
          "WHERE  c.dna_id = d.dna_id");
  return;
}
transfer_featuresdescriptionprevnextTop
sub transfer_features {
  my $self = shift;

  my $target = $self->target();
  my $source = $self->source();
  my $dbh    = $self->dbh();

  my $limit = '';
  if($self->limit()) {
    $limit = ' limit ' . $self->limit();
  }


  #
# Feature tables
# Note that we can just rename contig_* to set_region_* since the
# contig IDs were copied verbatim into seq_region
#
# For some reason mysql occasionally refuses to use the index on large
# tables following copies like the following.
# So: drop the indexes first and then re-add them after
# simple_feature
$self->debug("Building simple_feature table"); $dbh->do ("INSERT INTO $target.simple_feature (simple_feature_id, seq_region_id, ". " seq_region_start, seq_region_end, seq_region_strand, " . " display_label, analysis_id, score) " . "SELECT simple_feature_id, contig_id, contig_start, contig_end, " . " contig_strand, display_label, analysis_id, score " . "FROM $source.simple_feature $limit"); # repeat_feature
$self->debug("Dropping indexes on repeat_feature"); $dbh->do("ALTER TABLE $target.repeat_feature DROP INDEX seq_region_idx"); $dbh->do("ALTER TABLE $target.repeat_feature DROP INDEX repeat_idx"); $dbh->do("ALTER TABLE $target.repeat_feature DROP INDEX analysis_idx"); $self->debug("Building repeat_feature table"); $dbh->do ("INSERT INTO $target.repeat_feature (repeat_feature_id, seq_region_id, " . " seq_region_start, seq_region_end, seq_region_strand, analysis_id, " . " repeat_start, repeat_end, repeat_consensus_id, score) " . "SELECT repeat_feature_id, contig_id, contig_start, contig_end, " . " contig_strand, analysis_id, repeat_start, repeat_end, " . " repeat_consensus_id, score FROM $source.repeat_feature $limit"); $self->debug("Reading indexes on repeat_feature"); $dbh->do("ALTER TABLE $target.repeat_feature " . "ADD INDEX seq_region_idx( seq_region_id, seq_region_start)"); $dbh->do("ALTER TABLE $target.repeat_feature " . "ADD INDEX repeat_idx( repeat_consensus_id )"); $dbh->do("ALTER TABLE $target.repeat_feature " . "ADD INDEX analysis_idx(analysis_id)"); # protein_align_feature
$self->debug("Dropping indexes on protein_align_feature"); $dbh->do("ALTER TABLE $target.protein_align_feature DROP INDEX hit_idx"); $dbh->do( "ALTER TABLE $target.protein_align_feature " . "DROP INDEX seq_region_idx"); $self->debug("Building protein_align_feature table"); $dbh->do( "INSERT INTO $target.protein_align_feature " . " (protein_align_feature_id, seq_region_id, seq_region_start, " . " seq_region_end, seq_region_strand, analysis_id, hit_start, " . " hit_end, hit_name, cigar_line, evalue, perc_ident, score) " . "SELECT protein_align_feature_id, contig_id, contig_start, " . " contig_end, contig_strand, analysis_id, hit_start, " . " hit_end, hit_name, cigar_line, evalue, perc_ident, score ". "FROM $source.protein_align_feature $limit"); $self->debug("Reading indexes on protein_align_feature"); $dbh->do( qq{ALTER TABLE $target.protein_align_feature ADD index seq_region_idx( analysis_id, seq_region_id, seq_region_start, score )}); $dbh->do( "ALTER TABLE $target.protein_align_feature " . "ADD index hit_idx(hit_name)"); # dna_align_feature
$self->debug("Dropping indexes on dna_align_feature"); $dbh->do( "ALTER TABLE $target.dna_align_feature DROP INDEX seq_region_idx"); $dbh->do( "ALTER TABLE $target.dna_align_feature DROP INDEX hit_idx"); $self->debug("Building dna_align_feature table"); $dbh->do( "INSERT INTO $target.dna_align_feature " . " (dna_align_feature_id, seq_region_id, seq_region_start, ". " seq_region_end, seq_region_strand, analysis_id, " . " hit_start, hit_end, hit_name, hit_strand, cigar_line, " . " evalue, perc_ident, score) " . "SELECT dna_align_feature_id, contig_id, contig_start, " . " contig_end, contig_strand, analysis_id, hit_start, " . " hit_end, hit_name, hit_strand, cigar_line, evalue, " . " perc_ident, score FROM $source.dna_align_feature $limit"); $self->debug("Reading indexes on dna_align_feature"); $dbh->do( qq{ALTER TABLE $target.dna_align_feature ADD INDEX seq_region_idx(seq_region_id, analysis_id, seq_region_start, score)}); $dbh->do( "ALTER TABLE $target.dna_align_feature " . "ADD index hit_idx(hit_name)"); # marker_feature
$self->debug("Building marker_feature table"); $dbh->do( "INSERT INTO $target.marker_feature " . " (marker_feature_id, marker_id, seq_region_id, " . " seq_region_start, seq_region_end, analysis_id, " . " map_weight) " . "SELECT marker_feature_id, marker_id, contig_id, contig_start, ". " contig_end, analysis_id, map_weight " . "FROM $source.marker_feature $limit"); # qtl_feature
# Note this uses chromosome coords so we have to join with tmp_chr_map to
# get the mapping
$self->debug("Building qtl_feature table"); $dbh->do ("INSERT INTO $target.qtl_feature( seq_region_id, seq_region_start, " . " seq_region_end, qtl_id, analysis_id) " . "SELECT tcm.new_id, " . " q.start, q.end, q.qtl_id, q.analysis_id " . "FROM $target.tmp_chr_map tcm, $source.qtl_feature q " . "WHERE tcm.old_id = q.chromosome_id $limit"); # These tables now have seq_region_* instead of chromosome_*
$self->debug("Building karyotype table"); $dbh->do( "INSERT INTO $target.karyotype " . "SELECT null, tcm.new_id, " . " k.chr_start, k.chr_end, k.band, k.stain " . "FROM $target.tmp_chr_map tcm, $source.karyotype k " . "WHERE tcm.old_id = k.chromosome_id $limit"); $self->debug("Building marker_map_location table"); $dbh->do( "INSERT INTO $target.marker_map_location " . "SELECT mml.marker_id, mml.map_id, " . " c.name, " . " mml.marker_synonym_id, mml.position, mml.lod_score " . "FROM $source.chromosome c, $source.marker_map_location mml " . "WHERE c.chromosome_id = mml.chromosome_id $limit"); $self->debug( "Building misc_feature table" ); $dbh->do ("INSERT INTO $target.misc_feature( misc_feature_id, seq_region_id, " . " seq_region_start, seq_region_end, seq_region_strand ) " . "SELECT m.mapfrag_id, sr.seq_region_id, m.seq_start, m.seq_end, " . " m.orientation " . "FROM $source.mapfrag m, $target.seq_region sr, $source.dnafrag d " . "WHERE m.dnafrag_id = d.dnafrag_id " . "AND d.name = sr.name $limit" ); $self->debug( "Building misc_set table" ); $dbh->do ("INSERT INTO $target.misc_set( misc_set_id, code, name, description, " . " max_length ) " . "SELECT mapset_id, code, name, description, max_length " . "FROM $source.mapset ms" ); $self->debug( "Building misc_attrib table" ); $dbh->do ("INSERT INTO $target.misc_attrib( misc_feature_id, attrib_type_id, " . " value ) ". "SELECT mapfrag_id, mapannotationtype_id, value " . "FROM $source.mapannotation" ); $dbh->do ("INSERT INTO $target.misc_attrib( misc_feature_id, attrib_type_id, " . " value) " . "SELECT mf.mapfrag_id, at.attrib_type_id, mf.name " . "FROM $source.mapfrag mf, $target.attrib_type at " . "WHERE at.code = 'name'"); $dbh->do ("INSERT INTO $target.misc_attrib( misc_feature_id, attrib_type_id, " . " value) " . "SELECT mf.mapfrag_id, at.attrib_type_id, mf.type " . "FROM $source.mapfrag mf, $target.attrib_type at " . "WHERE at.code = 'type'"); $self->debug( "Building misc_feature_misc_set table" ); $dbh->do ("INSERT INTO $target.misc_feature_misc_set(misc_feature_id, misc_set_id)". "SELECT mapfrag_id, mapset_id ". "FROM $source.mapfrag_mapset $limit" ); return;
}
transfer_genesdescriptionprevnextTop
sub transfer_genes {
  my $self = shift;

  my $target = $self->target();
  my $source = $self->source();
  my $dbh    = $self->dbh();

  #
# Transfer the gene table
#
$self->debug("Building gene table"); $dbh->do ("INSERT INTO $target.gene " . "SELECT g.gene_id, g.type, g.analysis_id, tcm.new_id, " . "MIN(IF (a.contig_ori=1,(e.contig_start+a.chr_start-a.contig_start)," . " (a.chr_start+a.contig_end-e.contig_end ))) as start, " . "MAX(IF (a.contig_ori=1,(e.contig_end+a.chr_start-a.contig_start), " . " (a.chr_start+a.contig_end-e.contig_start))) as end, " . " a.contig_ori*e.contig_strand as strand, " . " g.display_xref_id " . "FROM $source.transcript t, $source.exon_transcript et, " . " $source.exon e, $source.assembly a, $source.gene g, " . " $target.tmp_chr_map tcm " . "WHERE t.transcript_id = et.transcript_id " . "AND et.exon_id = e.exon_id " . "AND e.contig_id = a.contig_id " . "AND g.gene_id = t.gene_id " . "AND a.chromosome_id = tcm.old_id " . "GROUP BY g.gene_id"); #
# Transfer the transcript table
#
$self->debug("Building transcript table"); $dbh->do ("INSERT INTO $target.transcript " . "SELECT t.transcript_id, t.gene_id, tcm.new_id, " . "MIN(IF (a.contig_ori=1,(e.contig_start+a.chr_start-a.contig_start)," . " (a.chr_start+a.contig_end-e.contig_end ))) as start, " . "MAX(IF (a.contig_ori=1,(e.contig_end+a.chr_start-a.contig_start), " . " (a.chr_start+a.contig_end-e.contig_start))) as end, " . " a.contig_ori*e.contig_strand as strand, " . " t.display_xref_id " . "FROM $source.transcript t, $source.exon_transcript et, " . " $source.exon e, $source.assembly a, $target.tmp_chr_map tcm " . "WHERE t.transcript_id = et.transcript_id " . "AND et.exon_id = e.exon_id " . "AND e.contig_id = a.contig_id " . "AND a.chromosome_id = tcm.old_id " . "GROUP BY t.transcript_id"); #
# Transfer the exon table
#
$self->debug("Building exon table"); $dbh->do ("INSERT INTO $target.exon " . "SELECT e.exon_id, tcm.new_id, " . "MIN(IF (a.contig_ori=1,(e.contig_start+a.chr_start-a.contig_start)," . " (a.chr_start+a.contig_end-e.contig_end ))) as start, " . "MAX(IF (a.contig_ori=1,(e.contig_end+a.chr_start-a.contig_start), " . " (a.chr_start+a.contig_end-e.contig_start))) as end, " . " a.contig_ori*e.contig_strand as strand, " . " e.phase, e.end_phase " . "FROM $source.transcript t, $source.exon_transcript et, " . " $source.exon e, $source.assembly a, $source.gene g, " . " $target.tmp_chr_map tcm " . "WHERE t.transcript_id = et.transcript_id " . "AND et.exon_id = e.exon_id " . "AND e.contig_id = a.contig_id " . "AND g.gene_id = t.gene_id " . "AND a.chromosome_id = tcm.old_id " . "GROUP BY e.exon_id"); #
# Transfer translation table
#
$self->debug("Building translation table"); $dbh->do ("INSERT INTO $target.translation " . "SELECT tl.translation_id, ts.transcript_id, tl.seq_start, " . " tl.start_exon_id, tl.seq_end, tl.end_exon_id " . "FROM $source.transcript ts, $source.translation tl " . "WHERE ts.translation_id = tl.translation_id"); return;
}
transfer_metadescriptionprevnextTop
sub transfer_meta {
  my $self = shift;

  my $source = $self->source();
  my $target = $self->target();

  my $dbh = $self->dbh();

  $dbh->do("INSERT INTO $target.meta (meta_key, meta_value) " .
           "SELECT m.meta_key, m.meta_value FROM $source.meta m " .
           "ORDER BY meta_id");

  return;
}
transfer_prediction_transcriptsdescriptionprevnextTop
sub transfer_prediction_transcripts {
  my $self = shift;

  my $source = $self->source();
  my $target = $self->target();
  my $dbh    = $self->dbh();
  
  # prediction_transcript / prediction_exon
$self->debug( "Building prediction_exon table" ); $dbh->do ("INSERT INTO $target.prediction_exon ". "( prediction_transcript_id, seq_region_id, seq_region_start, " . " seq_region_end, seq_region_strand, start_phase, score, p_value," . " exon_rank ) " . "SELECT prediction_transcript_id, contig_id, contig_start, contig_end, " . " contig_strand, start_phase, score, p_value, exon_rank " . "FROM $source.prediction_transcript" ); $self->debug("Building prediction_transcript table"); $dbh->do ("INSERT INTO $target.prediction_transcript ". "( prediction_transcript_id, seq_region_id, seq_region_start, " . "seq_region_end, seq_region_strand, analysis_id ) " . "SELECT prediction_transcript_id, contig_id, MIN(contig_start), " . " MAX(contig_end), contig_strand, analysis_id ". "FROM $source.prediction_transcript " . "GROUP BY prediction_transcript_id "); return;
}
transfer_stable_idsdescriptionprevnextTop
sub transfer_stable_ids {
  my $self = shift;

  my $source = $self->source();
  my $target = $self->target();
  my $dbh    = $self->dbh();

  $self->debug("Building stable id tables");

  #
# Copy the stable id tables
# remove the unused created and modified dates from the stable ids
#
$dbh->do ("INSERT INTO $target.exon_stable_id " . " (exon_id, stable_id, version) " . "SELECT exon_id, stable_id, version " . "FROM $source.exon_stable_id" ); $dbh->do ("INSERT INTO $target.gene_stable_id " . " (gene_id, stable_id, version) " . "SELECT gene_id, stable_id, version " . "FROM $source.gene_stable_id" ); $self->copy_tables ("stable_id_event","mapping_session","transcript_stable_id", "translation_stable_id","gene_archive","peptide_archive"); return;
}
transfer_vega_stable_idsdescriptionprevnextTop
sub transfer_vega_stable_ids {
  my $self = shift;

  my $source = $self->source();
  my $target = $self->target();
  my $dbh    = $self->dbh();

  $self->debug("Building vega_stable id tables");

# Copy the stable id tables
# add blank columns for created and modified dates for transcript and translations
$self->debug("Copying transcript_stable_id"); $dbh->do ("INSERT INTO $target.transcript_stable_id " . " (transcript_id, stable_id, version) " . "SELECT * " . "FROM $source.transcript_stable_id" ); $self->debug("Copying translation_stable_id"); $dbh->do ("INSERT INTO $target.translation_stable_id " . " (translation_id, stable_id, version) " . "SELECT * " . "FROM $source.translation_stable_id" ); # copy all other tables
$self->copy_tables ("gene_stable_id","exon_stable_id","stable_id_event", "mapping_session","gene_archive","peptide_archive"); return;
}
userdescriptionprevnextTop
sub user {
  my $self = shift;
  $self->{'user'} = shift if(@_);
  return $self->{'user'};
}
vegaschemadescriptionprevnextTop
sub vegaschema {
  my $self = shift;
  $self->{'vega_schema'} = shift if (@_);
  return $self->{'vega_schema'};
}
verbosedescriptionprevnextTop
sub verbose {
  my $self = shift;
  $self->{'verbose'} = shift if (@_);
  return $self->{'verbose'};
}
General documentation
No general documentation available.