SeqStoreConverter DanioRerio
Included librariesPackage variablesGeneral documentationMethods
Toolbar
WebCvsRaw content
Package variables
No package variables defined.
Included modules
SeqStoreConverter::BasicConverter
Inherit
SeqStoreConverter::BasicConverter
Synopsis
No synopsis!
Description
No description!
Methods
assembly_contig_clone
No description
Code
create_assembly
No description
Code
create_coord_systems
No description
Code
create_seq_regions
No description
Code
set_top_level
No description
Code
transfer_genes
No description
Code
Methods description
None available.
Methods code
assembly_contig_clonedescriptionprevnextTop
sub assembly_contig_clone {
  my $self = shift;


  $self->debug("DanioRerio Specific: building assembly table - chunk/clone");
  #this is easy, there is simply one entire chunk for a given clone
my $source = $self->source(); my $target = $self->target(); my $dbh = $self->dbh(); $dbh->do ("INSERT INTO $target.assembly (asm_seq_region_id, cmp_seq_region_id, " . " asm_start, asm_end, cmp_start, cmp_end, ori) " . "SELECT tcm.new_id, tcm.old_id, 1, sr.length, 1, sr.length, 1 " . "FROM $target.tmp_cln_map tcm, $target.seq_region sr " . "WHERE sr.seq_region_id = tcm.new_id"); } # we need to override the transfer of the genes since danio genes can be on
# supercontigs and on chromosomes
}
create_assemblydescriptionprevnextTop
sub create_assembly {
  my $self = shift;

  #chromosomes are made of chunks
$self->assembly_contig_chromosome(); #supercontigs are made of chunks
$self->assembly_contig_supercontig(); #clones are made of chunks
$self->assembly_contig_clone(); return;
}
create_coord_systemsdescriptionprevnextTop
sub create_coord_systems {
  my $self = shift;

  $self->debug("DanioRerio Specific: creating chromosome, supercontig, clone "
               . " and chunk coordinate systems");

  my $target = $self->target();
  my $dbh    = $self->dbh();

  my $ass_def = $self->get_default_assembly();

  my @coords =
    (["chromosome" , $ass_def, "default_version", 1],
     ["supercontig", $ass_def, "default_version", 2],
     ["clone"      , undef, "default_version", 3],
     ["chunk"      , undef, "default_version,sequence_level", 4]);

  my @assembly_mappings =  ("chromosome:$ass_def|chunk",
                            "clone|chunk",
                            "supercontig:$ass_def|chunk",
                            "chromosome:$ass_def|chunk|clone",
                            "supercontig:$ass_def|chunk|clone",
                            "chromosome:$ass_def|chunk|supercontig");

  $self->debug("Building coord_system table");

  my $sth = $dbh->prepare
    ("INSERT INTO $target.coord_system (name, version, attrib, rank) " .
     "VALUES (?,?,?,?)");

  my %coord_system_ids;

  foreach my $cs (@coords) {
    $sth->execute(@$cs);
    $coord_system_ids{$cs->[0]} = $sth->{'mysql_insertid'};
  }
  $sth->finish();

  $self->debug("Adding assembly.mapping entries to meta table");

  $sth = $dbh->prepare("INSERT INTO $target.meta(meta_key, meta_value) " .
                       "VALUES ('assembly.mapping', ?)");

  foreach my $mapping (@assembly_mappings) {
    $sth->execute($mapping);
  }

  $sth->finish();


  return;
}
create_seq_regionsdescriptionprevnextTop
sub create_seq_regions {
  my $self = shift;

  my $source = $self->source();
  my $target = $self->target();
  my $dbh    = $self->dbh();


  #
# Turn all of the contents of the contig table into 'chunks' and
# give them arbitrary names like chunk1, chunk2. Keep old internal
# ids for conveneience.
#
$self->debug("DanioRerio Specific: creating chunk seq_regions"); my $sth = $dbh->prepare ("INSERT INTO $target.seq_region (seq_region_id, name, coord_system_id, " . " length) ". "SELECT ctg.contig_id, concat('chunk', ctg.contig_id), " . " cs.coord_system_id, ctg.length " . "FROM $source.contig ctg, $target.coord_system cs " . "WHERE cs.name = 'chunk'"); $sth->execute(); $sth->finish(); my $insert_sth = $dbh->prepare ("INSERT INTO $target.seq_region (name, coord_system_id, length) " . "VALUES (?,?,?)"); my $tmp_chr_insert_sth = $dbh->prepare ("INSERT INTO $target.tmp_chr_map (old_id, new_id) VALUES (?, ?)"); my $tmp_supercontig_insert_sth = $dbh->prepare ("INSERT INTO $target.tmp_superctg_map (name, new_id) VALUES (?,?)"); my $tmp_clone_insert_sth = $dbh->prepare ("INSERT INTO $target.tmp_cln_map (old_id, new_id) VALUES (?,?)"); #
# create a temporary table to hold the ids of all 'toplevel'
# seq_regions. Keep the old chromosome_id, and the new seq_region_id
#
$dbh->do ("CREATE TEMPORARY TABLE $target.tmp_toplevel_map " . "(old_id INT, new_id INT, INDEX new_idx(new_id), INDEX old_idx(old_id))"); my $tmp_toplevel_insert_sth = $dbh->prepare ("INSERT INTO $target.tmp_toplevel_map (old_id, new_id) VALUES (?,?)"); #
# Turn real clones into clones
#
$self->debug("DanioRerio Specific: creating clone seq_regions"); my $select_sth = $dbh->prepare ("SELECT ctg.contig_id, ctg.name, ctg.length " . "FROM $source.contig ctg " . "WHERE ctg.name not like 'ctg%' and ctg.name not like 'NA%'"); my $cs_id = $self->get_coord_system_id('clone'); $select_sth->execute(); my ($old_id, $name, $length); $select_sth->bind_columns(\$old_id,\$ name,\$ length); while ($select_sth->fetch()) { #insert into seq_region table
$insert_sth->execute($name, $cs_id, $length); #copy old/new mapping into temporary table
$tmp_clone_insert_sth->execute($old_id, $insert_sth->{'mysql_insertid'}); } $select_sth->finish(); #
# Turn real chromosomes into chromosomes
#
$self->debug("DanioRerio Specific: creating chromosome seq_regions"); $select_sth = $dbh->prepare ("SELECT chr.chromosome_id, chr.name, chr.length " . "FROM $source.chromosome chr " . "WHERE length(chr.name) <= 2"); $cs_id = $self->get_coord_system_id('chromosome'); $select_sth->execute(); $select_sth->bind_columns(\$old_id,\$ name,\$ length); my %chr_id_added; while ($select_sth->fetch()) { #insert into seq_region table
$insert_sth->execute($name, $cs_id, $length); #copy old/new mapping into temporary table
my $new_id = $insert_sth->{'mysql_insertid'}; $tmp_chr_insert_sth->execute($old_id, $new_id); $tmp_toplevel_insert_sth->execute($old_id, $new_id); $chr_id_added{$old_id} = 1; } $select_sth->finish(); #
# Turn supercontigs into supercontigs
#
$self->debug("DanioRerio Specific: creating supercontig seq_regions"); $select_sth = $dbh->prepare ("SELECT a.chromosome_id, a.superctg_name, " . " MAX(a.chr_end) - MIN(a.chr_start) + 1 " . "FROM $source.assembly a, $target.coord_system cs " . "GROUP BY a.superctg_name"); $select_sth->execute(); $select_sth->bind_columns(\$old_id,\$ name,\$ length); $cs_id = $self->get_coord_system_id('supercontig'); while ($select_sth->fetch()) { #insert into seq_region table
$insert_sth->execute($name, $cs_id, $length); #copy old/new mapping into temporary table
my $new_id = $insert_sth->{'mysql_insertid'}; $tmp_supercontig_insert_sth->execute($name,$new_id); if(!$chr_id_added{$old_id}) { $chr_id_added{$old_id} = 1; $tmp_toplevel_insert_sth->execute($old_id, $new_id); } } $select_sth->finish(); $tmp_chr_insert_sth->finish(); $tmp_supercontig_insert_sth->finish(); $tmp_clone_insert_sth->finish(); $tmp_toplevel_insert_sth->finish(); $insert_sth->finish();
}
set_top_leveldescriptionprevnextTop
sub set_top_level {
  my $self = shift;

  my $target = $self->target();
  my $dbh = $self->dbh();

  my $attrib_type_id = $self->add_attrib_code();

  $self->debug("DanioRerio Specific: Setting toplevel attributes of " .
               "seq_regions");

  my $sth = $dbh->prepare("DELETE FROM $target.seq_region_attrib " .
                          "WHERE attrib_type_id = ?");
  $sth->execute($attrib_type_id);
  $sth->finish();

  $sth = $dbh->prepare("INSERT INTO $target.seq_region_attrib " .
                       '            (seq_region_id, attrib_type_id, value) ' .
                       "SELECT toplev.new_id, $attrib_type_id, 1 " .
                       "FROM   $target.tmp_toplevel_map toplev ");

  $sth->execute();
  $sth->finish();
}



1;
}
transfer_genesdescriptionprevnextTop
sub transfer_genes {
  my $self = shift;

  my $target = $self->target();
  my $source = $self->source();
  my $dbh    = $self->dbh();

  #
# Transfer the gene table
#
$self->debug("DanioRerio Specific: Building gene table"); # first transfer genes on chromosomes
$dbh->do ("INSERT INTO $target.gene " . "SELECT g.gene_id, g.type, g.analysis_id, toplev.new_id, " . "MIN(IF (a.contig_ori=1,(e.contig_start+a.chr_start-a.contig_start)," . " (a.chr_start+a.contig_end-e.contig_end ))) as start, " . "MAX(IF (a.contig_ori=1,(e.contig_end+a.chr_start-a.contig_start), " . " (a.chr_start+a.contig_end-e.contig_start))) as end, " . " a.contig_ori*e.contig_strand as strand, " . " g.display_xref_id " . "FROM $source.transcript t, $source.exon_transcript et, " . " $source.exon e, $source.assembly a, $source.gene g, " . " $target.tmp_toplevel_map toplev " . "WHERE t.transcript_id = et.transcript_id " . "AND et.exon_id = e.exon_id " . "AND e.contig_id = a.contig_id " . "AND g.gene_id = t.gene_id " . "AND a.chromosome_id = toplev.old_id " . "GROUP BY g.gene_id"); #
# Transfer the transcript table
#
$self->debug("DanioRerio Specific: Building transcript table "); $dbh->do ("INSERT INTO $target.transcript " . "SELECT t.transcript_id, t.gene_id, toplev.new_id, " . "MIN(IF (a.contig_ori=1,(e.contig_start+a.chr_start-a.contig_start)," . " (a.chr_start+a.contig_end-e.contig_end ))) as start, " . "MAX(IF (a.contig_ori=1,(e.contig_end+a.chr_start-a.contig_start), " . " (a.chr_start+a.contig_end-e.contig_start))) as end, " . " a.contig_ori*e.contig_strand as strand, " . " t.display_xref_id " . "FROM $source.transcript t, $source.exon_transcript et, " . " $source.exon e, $source.assembly a, " . " $target.tmp_toplevel_map toplev " . "WHERE t.transcript_id = et.transcript_id " . "AND et.exon_id = e.exon_id " . "AND e.contig_id = a.contig_id " . "AND a.chromosome_id = toplev.old_id " . "GROUP BY t.transcript_id"); #
# Transfer the exon table
#
$self->debug("DanioRerio Specific: Building exon table "); $dbh->do ("INSERT INTO $target.exon " . "SELECT e.exon_id, toplev.new_id, " . "MIN(IF (a.contig_ori=1,(e.contig_start+a.chr_start-a.contig_start)," . " (a.chr_start+a.contig_end-e.contig_end ))) as start, " . "MAX(IF (a.contig_ori=1,(e.contig_end+a.chr_start-a.contig_start), " . " (a.chr_start+a.contig_end-e.contig_start))) as end, " . " a.contig_ori*e.contig_strand as strand, " . " e.phase, e.end_phase " . "FROM $source.transcript t, $source.exon_transcript et, " . " $source.exon e, $source.assembly a, $source.gene g, " . " $target.tmp_toplevel_map toplev " . "WHERE t.transcript_id = et.transcript_id " . "AND et.exon_id = e.exon_id " . "AND e.contig_id = a.contig_id " . "AND g.gene_id = t.gene_id " . "AND a.chromosome_id = toplev.old_id " . "GROUP BY e.exon_id"); #
# Transfer translation table
#
$self->debug("Building translation table"); $dbh->do ("INSERT INTO $target.translation " . "SELECT tl.translation_id, ts.transcript_id, tl.seq_start, " . " tl.start_exon_id, tl.seq_end, tl.end_exon_id " . "FROM $source.transcript ts, $source.translation tl " . "WHERE ts.translation_id = tl.translation_id"); return;
}
General documentation
No general documentation available.