Raw content of SeqStoreConverter::DanioRerio
use strict;
use warnings;
use SeqStoreConverter::BasicConverter;
package SeqStoreConverter::DanioRerio;
use vars qw(@ISA);
@ISA = qw(SeqStoreConverter::BasicConverter);
sub create_coord_systems {
my $self = shift;
$self->debug("DanioRerio Specific: creating chromosome, supercontig, clone "
. " and chunk coordinate systems");
my $target = $self->target();
my $dbh = $self->dbh();
my $ass_def = $self->get_default_assembly();
my @coords =
(["chromosome" , $ass_def, "default_version", 1],
["supercontig", $ass_def, "default_version", 2],
["clone" , undef, "default_version", 3],
["chunk" , undef, "default_version,sequence_level", 4]);
my @assembly_mappings = ("chromosome:$ass_def|chunk",
"clone|chunk",
"supercontig:$ass_def|chunk",
"chromosome:$ass_def|chunk|clone",
"supercontig:$ass_def|chunk|clone",
"chromosome:$ass_def|chunk|supercontig");
$self->debug("Building coord_system table");
my $sth = $dbh->prepare
("INSERT INTO $target.coord_system (name, version, attrib, rank) " .
"VALUES (?,?,?,?)");
my %coord_system_ids;
foreach my $cs (@coords) {
$sth->execute(@$cs);
$coord_system_ids{$cs->[0]} = $sth->{'mysql_insertid'};
}
$sth->finish();
$self->debug("Adding assembly.mapping entries to meta table");
$sth = $dbh->prepare("INSERT INTO $target.meta(meta_key, meta_value) " .
"VALUES ('assembly.mapping', ?)");
foreach my $mapping (@assembly_mappings) {
$sth->execute($mapping);
}
$sth->finish();
return;
}
sub create_seq_regions {
my $self = shift;
my $source = $self->source();
my $target = $self->target();
my $dbh = $self->dbh();
#
# Turn all of the contents of the contig table into 'chunks' and
# give them arbitrary names like chunk1, chunk2. Keep old internal
# ids for conveneience.
#
$self->debug("DanioRerio Specific: creating chunk seq_regions");
my $sth = $dbh->prepare
("INSERT INTO $target.seq_region (seq_region_id, name, coord_system_id, " .
" length) ".
"SELECT ctg.contig_id, concat('chunk', ctg.contig_id), " .
" cs.coord_system_id, ctg.length " .
"FROM $source.contig ctg, $target.coord_system cs " .
"WHERE cs.name = 'chunk'");
$sth->execute();
$sth->finish();
my $insert_sth = $dbh->prepare
("INSERT INTO $target.seq_region (name, coord_system_id, length) " .
"VALUES (?,?,?)");
my $tmp_chr_insert_sth = $dbh->prepare
("INSERT INTO $target.tmp_chr_map (old_id, new_id) VALUES (?, ?)");
my $tmp_supercontig_insert_sth = $dbh->prepare
("INSERT INTO $target.tmp_superctg_map (name, new_id) VALUES (?,?)");
my $tmp_clone_insert_sth = $dbh->prepare
("INSERT INTO $target.tmp_cln_map (old_id, new_id) VALUES (?,?)");
#
# create a temporary table to hold the ids of all 'toplevel'
# seq_regions. Keep the old chromosome_id, and the new seq_region_id
#
$dbh->do
("CREATE TEMPORARY TABLE $target.tmp_toplevel_map " .
"(old_id INT, new_id INT, INDEX new_idx(new_id), INDEX old_idx(old_id))");
my $tmp_toplevel_insert_sth = $dbh->prepare
("INSERT INTO $target.tmp_toplevel_map (old_id, new_id) VALUES (?,?)");
#
# Turn real clones into clones
#
$self->debug("DanioRerio Specific: creating clone seq_regions");
my $select_sth = $dbh->prepare
("SELECT ctg.contig_id, ctg.name, ctg.length " .
"FROM $source.contig ctg " .
"WHERE ctg.name not like 'ctg%' and ctg.name not like 'NA%'");
my $cs_id = $self->get_coord_system_id('clone');
$select_sth->execute();
my ($old_id, $name, $length);
$select_sth->bind_columns(\$old_id, \$name, \$length);
while ($select_sth->fetch()) {
#insert into seq_region table
$insert_sth->execute($name, $cs_id, $length);
#copy old/new mapping into temporary table
$tmp_clone_insert_sth->execute($old_id, $insert_sth->{'mysql_insertid'});
}
$select_sth->finish();
#
# Turn real chromosomes into chromosomes
#
$self->debug("DanioRerio Specific: creating chromosome seq_regions");
$select_sth = $dbh->prepare
("SELECT chr.chromosome_id, chr.name, chr.length " .
"FROM $source.chromosome chr " .
"WHERE length(chr.name) <= 2");
$cs_id = $self->get_coord_system_id('chromosome');
$select_sth->execute();
$select_sth->bind_columns(\$old_id, \$name, \$length);
my %chr_id_added;
while ($select_sth->fetch()) {
#insert into seq_region table
$insert_sth->execute($name, $cs_id, $length);
#copy old/new mapping into temporary table
my $new_id = $insert_sth->{'mysql_insertid'};
$tmp_chr_insert_sth->execute($old_id, $new_id);
$tmp_toplevel_insert_sth->execute($old_id, $new_id);
$chr_id_added{$old_id} = 1;
}
$select_sth->finish();
#
# Turn supercontigs into supercontigs
#
$self->debug("DanioRerio Specific: creating supercontig seq_regions");
$select_sth = $dbh->prepare
("SELECT a.chromosome_id, a.superctg_name, " .
" MAX(a.chr_end) - MIN(a.chr_start) + 1 " .
"FROM $source.assembly a, $target.coord_system cs " .
"GROUP BY a.superctg_name");
$select_sth->execute();
$select_sth->bind_columns(\$old_id, \$name, \$length);
$cs_id = $self->get_coord_system_id('supercontig');
while ($select_sth->fetch()) {
#insert into seq_region table
$insert_sth->execute($name, $cs_id, $length);
#copy old/new mapping into temporary table
my $new_id = $insert_sth->{'mysql_insertid'};
$tmp_supercontig_insert_sth->execute($name,$new_id);
if(!$chr_id_added{$old_id}) {
$chr_id_added{$old_id} = 1;
$tmp_toplevel_insert_sth->execute($old_id, $new_id);
}
}
$select_sth->finish();
$tmp_chr_insert_sth->finish();
$tmp_supercontig_insert_sth->finish();
$tmp_clone_insert_sth->finish();
$tmp_toplevel_insert_sth->finish();
$insert_sth->finish();
}
sub create_assembly {
my $self = shift;
#chromosomes are made of chunks
$self->assembly_contig_chromosome();
#supercontigs are made of chunks
$self->assembly_contig_supercontig();
#clones are made of chunks
$self->assembly_contig_clone();
return;
}
sub assembly_contig_clone {
my $self = shift;
$self->debug("DanioRerio Specific: building assembly table - chunk/clone");
#this is easy, there is simply one entire chunk for a given clone
my $source = $self->source();
my $target = $self->target();
my $dbh = $self->dbh();
$dbh->do
("INSERT INTO $target.assembly (asm_seq_region_id, cmp_seq_region_id, " .
" asm_start, asm_end, cmp_start, cmp_end, ori) " .
"SELECT tcm.new_id, tcm.old_id, 1, sr.length, 1, sr.length, 1 " .
"FROM $target.tmp_cln_map tcm, $target.seq_region sr " .
"WHERE sr.seq_region_id = tcm.new_id");
}
# we need to override the transfer of the genes since danio genes can be on
# supercontigs and on chromosomes
sub transfer_genes {
my $self = shift;
my $target = $self->target();
my $source = $self->source();
my $dbh = $self->dbh();
#
# Transfer the gene table
#
$self->debug("DanioRerio Specific: Building gene table");
# first transfer genes on chromosomes
$dbh->do
("INSERT INTO $target.gene " .
"SELECT g.gene_id, g.type, g.analysis_id, toplev.new_id, " .
"MIN(IF (a.contig_ori=1,(e.contig_start+a.chr_start-a.contig_start)," .
" (a.chr_start+a.contig_end-e.contig_end ))) as start, " .
"MAX(IF (a.contig_ori=1,(e.contig_end+a.chr_start-a.contig_start), " .
" (a.chr_start+a.contig_end-e.contig_start))) as end, " .
" a.contig_ori*e.contig_strand as strand, " .
" g.display_xref_id " .
"FROM $source.transcript t, $source.exon_transcript et, " .
" $source.exon e, $source.assembly a, $source.gene g, " .
" $target.tmp_toplevel_map toplev " .
"WHERE t.transcript_id = et.transcript_id " .
"AND et.exon_id = e.exon_id " .
"AND e.contig_id = a.contig_id " .
"AND g.gene_id = t.gene_id " .
"AND a.chromosome_id = toplev.old_id " .
"GROUP BY g.gene_id");
#
# Transfer the transcript table
#
$self->debug("DanioRerio Specific: Building transcript table ");
$dbh->do
("INSERT INTO $target.transcript " .
"SELECT t.transcript_id, t.gene_id, toplev.new_id, " .
"MIN(IF (a.contig_ori=1,(e.contig_start+a.chr_start-a.contig_start)," .
" (a.chr_start+a.contig_end-e.contig_end ))) as start, " .
"MAX(IF (a.contig_ori=1,(e.contig_end+a.chr_start-a.contig_start), " .
" (a.chr_start+a.contig_end-e.contig_start))) as end, " .
" a.contig_ori*e.contig_strand as strand, " .
" t.display_xref_id " .
"FROM $source.transcript t, $source.exon_transcript et, " .
" $source.exon e, $source.assembly a, " .
" $target.tmp_toplevel_map toplev " .
"WHERE t.transcript_id = et.transcript_id " .
"AND et.exon_id = e.exon_id " .
"AND e.contig_id = a.contig_id " .
"AND a.chromosome_id = toplev.old_id " .
"GROUP BY t.transcript_id");
#
# Transfer the exon table
#
$self->debug("DanioRerio Specific: Building exon table ");
$dbh->do
("INSERT INTO $target.exon " .
"SELECT e.exon_id, toplev.new_id, " .
"MIN(IF (a.contig_ori=1,(e.contig_start+a.chr_start-a.contig_start)," .
" (a.chr_start+a.contig_end-e.contig_end ))) as start, " .
"MAX(IF (a.contig_ori=1,(e.contig_end+a.chr_start-a.contig_start), " .
" (a.chr_start+a.contig_end-e.contig_start))) as end, " .
" a.contig_ori*e.contig_strand as strand, " .
" e.phase, e.end_phase " .
"FROM $source.transcript t, $source.exon_transcript et, " .
" $source.exon e, $source.assembly a, $source.gene g, " .
" $target.tmp_toplevel_map toplev " .
"WHERE t.transcript_id = et.transcript_id " .
"AND et.exon_id = e.exon_id " .
"AND e.contig_id = a.contig_id " .
"AND g.gene_id = t.gene_id " .
"AND a.chromosome_id = toplev.old_id " .
"GROUP BY e.exon_id");
#
# Transfer translation table
#
$self->debug("Building translation table");
$dbh->do
("INSERT INTO $target.translation " .
"SELECT tl.translation_id, ts.transcript_id, tl.seq_start, " .
" tl.start_exon_id, tl.seq_end, tl.end_exon_id " .
"FROM $source.transcript ts, $source.translation tl " .
"WHERE ts.translation_id = tl.translation_id");
return;
}
sub set_top_level {
my $self = shift;
my $target = $self->target();
my $dbh = $self->dbh();
my $attrib_type_id = $self->add_attrib_code();
$self->debug("DanioRerio Specific: Setting toplevel attributes of " .
"seq_regions");
my $sth = $dbh->prepare("DELETE FROM $target.seq_region_attrib " .
"WHERE attrib_type_id = ?");
$sth->execute($attrib_type_id);
$sth->finish();
$sth = $dbh->prepare("INSERT INTO $target.seq_region_attrib " .
' (seq_region_id, attrib_type_id, value) ' .
"SELECT toplev.new_id, $attrib_type_id, 1 " .
"FROM $target.tmp_toplevel_map toplev ");
$sth->execute();
$sth->finish();
}
1;