Raw content of SeqStoreConverter::CaenorhabditisBriggsae
use strict;
use warnings;
use SeqStoreConverter::BasicConverter;
package SeqStoreConverter::CaenorhabditisBriggsae;
use vars qw(@ISA);
@ISA = qw(SeqStoreConverter::BasicConverter);
sub create_coord_systems {
my $self = shift;
$self->debug("CaenorhabditisBriggsae Specific: creating clone, scaffold," .
" and contig coordinate systems");
my $target = $self->target();
my $dbh = $self->dbh();
my $ass_def = $self->get_default_assembly();
my @coords =
(["scaffold" , $ass_def, "default_version", 1 ],
['clone' , undef , 'default_version', 2 ],
["contig" , undef , "default_version,sequence_level", 3]);
my @assembly_mappings = ("scaffold:$ass_def|contig",
"clone|contig",
"scaffold:$ass_def|contig|clone");
$self->debug("Building coord_system table");
my $sth = $dbh->prepare("INSERT INTO $target.coord_system " .
"(name, version, attrib, rank) VALUES (?,?,?,?)");
my %coord_system_ids;
foreach my $cs (@coords) {
$sth->execute(@$cs);
$coord_system_ids{$cs->[0]} = $sth->{'mysql_insertid'};
}
$sth->finish();
$self->debug("Adding assembly.mapping entries to meta table");
$sth = $dbh->prepare("INSERT INTO $target.meta(meta_key, meta_value) " .
"VALUES ('assembly.mapping', ?)");
foreach my $mapping (@assembly_mappings) {
$sth->execute($mapping);
}
$sth->finish();
return;
}
sub create_seq_regions {
my $self = shift;
$self->debug("CaenorhabditisBriggsae Specific: creating contig, " .
"clone, contig and scaffold seq_regions");
$self->contig_to_seq_region();
$self->clone_to_seq_region();
$self->chromosome_to_seq_region('scaffold');
}
sub chromosome_to_seq_region {
my $self = shift;
my $target_cs_name = shift;
my $target = $self->target();
my $source = $self->source();
my $dbh = $self->dbh();
$target_cs_name ||= "chromosome";
my $cs_id = $self->get_coord_system_id($target_cs_name);
$self->debug("CaenorhabditisBriggsae Specific: Transforming " .
"chromosomes into $target_cs_name seq_regions");
## For consistancy with mart and v19 we need to keep chr name the same for
## now, so the following section is commented out and replaced:
##strip off the leading 'cb25.' from the chromosome name
#my $select_sth = $dbh->prepare
# ("SELECT chromosome_id,substring(name,6),length FROM $source.chromosome");
my $select_sth = $dbh->prepare
("SELECT chromosome_id,name,length FROM $source.chromosome");
my $insert_sth = $dbh->prepare
("INSERT INTO $target.seq_region (name, coord_system_id, length) " .
"VALUES (?,?,?)");
my $tmp_insert_sth = $dbh->prepare
("INSERT INTO $target.tmp_chr_map (old_id, new_id) VALUES (?, ?)");
$select_sth->execute();
my ($chrom_id, $name, $length);
$select_sth->bind_columns(\$chrom_id, \$name, \$length);
while ($select_sth->fetch()) {
#insert into seq_region table
$insert_sth->execute($name, $cs_id, $length);
#copy old/new mapping into temporary table
$tmp_insert_sth->execute($chrom_id, $insert_sth->{'mysql_insertid'});
}
$select_sth->finish();
$insert_sth->finish();
$tmp_insert_sth->finish();
return;
}
sub create_assembly {
my $self = shift;
$self->debug("CaenorhabditisBriggsae Specific: loading assembly data");
$self->assembly_contig_chromosome();
$self->assembly_contig_clone();
}
#
# Override the assembly contig clone method because the briggsae database
# does not have any embl_offsets
#
sub assembly_contig_clone {
my $self = shift;
my $target = $self->target();
my $source = $self->source();
my $dbh = $self->dbh();
$self->debug("CaenorhabditisBriggsae Specific: loading contig/clone " .
"assembly relationship");
my $asm_sth = $dbh->prepare
("INSERT INTO $target.assembly " .
"set asm_seq_region_id = ?, ".
" asm_start = ?, " .
" asm_end = ?, " .
" cmp_seq_region_id = ?, ".
" cmp_start = ?, " .
" cmp_end = ?, " .
" ori = ?");
# get a list of the contigs that have clones, their ids, and the
# corresponding clone ids
my $ctg_sth = $dbh->prepare
("SELECT ctg.name, ctg.contig_id, ctg.length, cln.new_id " .
"FROM $source.contig ctg, $target.tmp_cln_map cln " .
"WHERE ctg.name not like 'c%' " . # only contigs w/ proper accessions
"AND ctg.clone_id = cln.old_id");
$ctg_sth->execute();
my ($ctg_name, $ctg_id, $ctg_len, $cln_id);
$ctg_sth->bind_columns(\$ctg_name, \$ctg_id, \$ctg_len, \$cln_id);
while($ctg_sth->fetch()) {
my (undef,$cln_start, $cln_end) = split(/\./, $ctg_name);
my $cln_len = $cln_end - $cln_start + 1;
if($cln_len != $ctg_len) {
die("Contig len $ctg_len != Clone len $cln_len");
}
$asm_sth->execute($cln_id, $cln_start, $cln_end,
$ctg_id, 1, $ctg_len, 1);
}
$ctg_sth->finish();
$asm_sth->finish();
return;
}
#
# Override contig_to_seq_region and clone_to_seq_region to provide
# briggsae specific behaviour
#
# sub contig_to_seq_region {
# my $self = shift;
# my $target_cs_name = shift;
# my $target = $self->target();
# my $source = $self->source();
# my $dbh = $self->dbh();
# $target_cs_name ||= 'contig';
# $self->debug("CaenorhabditisBriggsae Specific: Transforming contigs into " .
# "$target_cs_name seq_regions");
# my $cs_id = $self->get_coord_system_id($target_cs_name);
# #There are two types of contigs in briggsae:
# #
# # cosmids/clones
# #
# my $sth = $dbh->prepare
# ("INSERT INTO $target.seq_region " .
# "SELECT contig_id, name, $cs_id, length " .
# "FROM $source.contig " .
# "WHERE name not like 'c%'");
# $sth->execute();
# $sth->finish();
# #
# # WGS contigs
# #
# $sth = $dbh->prepare
# ("INSERT INTO $target.seq_region " .
# "SELECT ctg.contig_id, cln.name, $cs_id, length " .
# "FROM $source.contig ctg, $source.clone cln " .
# "WHERE ctg.clone_id = cln.clone_id " .
# "AND ctg.name like 'c%'");
# $sth->execute();
# $sth->finish();
# return;
# }
sub clone_to_seq_region {
my $self = shift;
my $target_cs_name = shift;
my $target = $self->target();
my $source = $self->source();
my $dbh = $self->dbh();
# target coord_system will have a different ID
$target_cs_name ||= "clone";
my $cs_id = $self->get_coord_system_id($target_cs_name);
$self->debug("CaenorhabditisBriggsae Specific:Transforming clones " .
"into $target_cs_name seq_regions");
#
# We don't want to make clones out of the WGS contigs, only out of
# the clones with proper embl accessions. Also for some reason the embl_offset
# is not set in the briggsae 17/18/19 databases, which means we have to deduce the
# length from the name of the contigs!
#
my $select_sth = $dbh->prepare
("SELECT cl.clone_id,
CONCAT(cl.embl_acc, '.', cl.embl_version),
ctg.name
FROM $source.clone cl, $source.contig ctg
WHERE cl.clone_id = ctg.clone_id
AND cl.embl_acc not like 'c%'
ORDER BY cl.clone_id");
$select_sth->execute();
my ($clone_id, $embl_acc, $ctg_name);
$select_sth->bind_columns(\$clone_id, \$embl_acc, \$ctg_name);
my $highest_end = undef;
my $current_clone = undef;
my $current_clone_id = undef;
my $length;
my $insert_sth = $dbh->prepare
("INSERT INTO $target.seq_region (name, coord_system_id, length) " .
"VALUES(?,?,?)");
my $tmp_insert_sth = $dbh->prepare
("INSERT INTO $target.tmp_cln_map (old_id, new_id) VALUES (?, ?)");
while ($select_sth->fetch()) {
#extract the end position of the contig
my $ctg_end;
(undef,undef,$ctg_end) = split(/\./, $ctg_name);
if(!defined($current_clone)) {
$current_clone = $embl_acc;
$current_clone_id = $clone_id;
$highest_end = $ctg_end;
}
if($current_clone ne $embl_acc) {
#started new clone, store last one
$insert_sth->execute($current_clone, $cs_id, $highest_end);
#store mapping of old -> new ids in temp table
$tmp_insert_sth->execute($current_clone_id, $insert_sth->{'mysql_insertid'});
$current_clone = $embl_acc;
$current_clone_id = $clone_id;
$highest_end = $ctg_end;
} elsif($ctg_end > $highest_end) {
#same clone, adjust end if end of contig is highest yet seen
$highest_end = $ctg_end;
}
}
#insert the last clone
$insert_sth->execute($current_clone, $cs_id, $highest_end);
$tmp_insert_sth->execute($current_clone_id, $insert_sth->{'mysql_insertid'});
$select_sth->finish();
$insert_sth->finish();
$tmp_insert_sth->finish();
return;
}
1;