Raw content of SeqStoreConverter::GallusGallus
use strict;
use warnings;
use SeqStoreConverter::BasicConverter;
package SeqStoreConverter::GallusGallus;
use vars qw(@ISA);
@ISA = qw(SeqStoreConverter::BasicConverter);
sub create_coord_systems {
my $self = shift;
$self->debug("GallusGallus Specific: loading assembly data");
my $target = $self->target();
my $dbh = $self->dbh();
my $ass_def = $self->get_default_assembly();
my @coords =
(["chromosome" , $ass_def, "default_version", 1 ],
["supercontig", $ass_def, "default_version", 2 ],
["contig" , undef , "default_version,sequence_level", 3]);
my @assembly_mappings = ("chromosome:$ass_def|contig",
"supercontig:$ass_def|contig",
"chromosome:$ass_def|contig|supercontig:$ass_def");
$self->debug("Building coord_system table");
my $sth = $dbh->prepare("INSERT INTO $target.coord_system " .
"(name, version, attrib, rank) VALUES (?,?,?,?)");
my %coord_system_ids;
foreach my $cs (@coords) {
$sth->execute(@$cs);
$coord_system_ids{$cs->[0]} = $sth->{'mysql_insertid'};
}
$sth->finish();
$self->debug("Adding assembly.mapping entries to meta table");
$sth = $dbh->prepare("INSERT INTO $target.meta(meta_key, meta_value) " .
"VALUES ('assembly.mapping', ?)");
foreach my $mapping (@assembly_mappings) {
$sth->execute($mapping);
}
$sth->finish();
return;
}
sub create_seq_regions {
my $self = shift;
$self->debug("GallusGallus Specific: creating contig, " .
"clone, chromosome and supercontig seq_regions");
$self->contig_to_seq_region();
$self->chromosome_to_seq_region();
$self->supercontig_to_seq_region();
}
#
# overridden to do trimming of contig names
#
sub contig_to_seq_region {
my $self = shift;
my $target_cs_name = shift;
my $target = $self->target();
my $source = $self->source();
my $dbh = $self->dbh();
$target_cs_name ||= 'contig';
$self->debug("GallusGallus Specific: Transforming contigs into " .
"$target_cs_name seq_regions");
my $cs_id = $self->get_coord_system_id($target_cs_name);
# this ugly SQL simply takes the first part of the contig name
# but trims everything after and including the second dot
my $sth = $dbh->prepare
("INSERT INTO $target.seq_region " .
"SELECT contig_id, SUBSTRING(name,1, LOCATE('.',name) + LOCATE('.',SUBSTRING(name,LOCATE('.',name)+1)) -1), $cs_id, length FROM $source.contig");
$sth->execute();
$sth->finish();
}
#
# overridden so that left over garbage in chromosome table is not used
#
sub chromosome_to_seq_region {
my $self = shift;
my $target_cs_name = shift;
my $target = $self->target();
my $source = $self->source();
my $dbh = $self->dbh();
$target_cs_name ||= "chromosome";
my $cs_id = $self->get_coord_system_id($target_cs_name);
$self->debug("GallusGallus Specific: Transforming chromosomes into $target_cs_name seq_regions");
# only take chromosomes which are actually in the assembly table
my $select_sth = $dbh->prepare
("SELECT c.chromosome_id, c.name, c.length " .
"FROM $source.chromosome c, $source.assembly a " .
"WHERE c.chromosome_id = a.chromosome_id group by c.chromosome_id");
my $insert_sth = $dbh->prepare
("INSERT INTO $target.seq_region (name, coord_system_id, length) " .
"VALUES (?,?,?)");
my $tmp_insert_sth = $dbh->prepare
("INSERT INTO $target.tmp_chr_map (old_id, new_id) VALUES (?, ?)");
$select_sth->execute();
my ($chrom_id, $name, $length);
$select_sth->bind_columns(\$chrom_id, \$name, \$length);
while ($select_sth->fetch()) {
#insert into seq_region table
$insert_sth->execute($name, $cs_id, $length);
#copy old/new mapping into temporary table
$tmp_insert_sth->execute($chrom_id, $insert_sth->{'mysql_insertid'});
}
$select_sth->finish();
$insert_sth->finish();
$tmp_insert_sth->finish();
return;
}
sub create_assembly {
my $self = shift;
$self->debug("GallusGallus Specific: loading assembly data");
$self->assembly_contig_chromosome();
$self->assembly_contig_supercontig();
}
1;