SeqStoreConverter CaenorhabditisBriggsae
Included librariesPackage variablesGeneral documentationMethods
Toolbar
WebCvsRaw content
Package variables
No package variables defined.
Included modules
SeqStoreConverter::BasicConverter
Inherit
SeqStoreConverter::BasicConverter
Synopsis
No synopsis!
Description
No description!
Methods
assembly_contig_clone
No description
Code
chromosome_to_seq_region
No description
Code
clone_to_seq_region
No description
Code
create_assembly
No description
Code
create_coord_systems
No description
Code
create_seq_regions
No description
Code
Methods description
None available.
Methods code
assembly_contig_clonedescriptionprevnextTop
sub assembly_contig_clone {
  my $self = shift;

  my $target = $self->target();
  my $source = $self->source();
  my $dbh    = $self->dbh();


  $self->debug("CaenorhabditisBriggsae Specific: loading contig/clone " .
               "assembly relationship");

  my $asm_sth = $dbh->prepare
    ("INSERT INTO $target.assembly " .
     "set asm_seq_region_id = ?, ".
     "    asm_start = ?, " .
     "    asm_end   = ?, " .
     "    cmp_seq_region_id = ?, ".
     "    cmp_start = ?, " .
     "    cmp_end   = ?, " .
     "    ori       = ?");

  # get a list of the contigs that have clones, their ids, and the
# corresponding clone ids
my $ctg_sth = $dbh->prepare ("SELECT ctg.name, ctg.contig_id, ctg.length, cln.new_id " . "FROM $source.contig ctg, $target.tmp_cln_map cln " . "WHERE ctg.name not like 'c%' " . # only contigs w/ proper accessions
"AND ctg.clone_id = cln.old_id"); $ctg_sth->execute(); my ($ctg_name, $ctg_id, $ctg_len, $cln_id); $ctg_sth->bind_columns(\$ctg_name,\$ ctg_id,\$ ctg_len,\$ cln_id); while($ctg_sth->fetch()) { my (undef,$cln_start, $cln_end) = split(/\./, $ctg_name); my $cln_len = $cln_end - $cln_start + 1; if($cln_len != $ctg_len) { die("Contig len $ctg_len != Clone len $cln_len"); } $asm_sth->execute($cln_id, $cln_start, $cln_end, $ctg_id, 1, $ctg_len, 1); } $ctg_sth->finish(); $asm_sth->finish(); return; } #
# Override contig_to_seq_region and clone_to_seq_region to provide
# briggsae specific behaviour
#
# sub contig_to_seq_region {
# my $self = shift;
# my $target_cs_name = shift;
# my $target = $self->target();
# my $source = $self->source();
# my $dbh = $self->dbh();
# $target_cs_name ||= 'contig';
# $self->debug("CaenorhabditisBriggsae Specific: Transforming contigs into " .
# "$target_cs_name seq_regions");
# my $cs_id = $self->get_coord_system_id($target_cs_name);
# #There are two types of contigs in briggsae:
# #
# # cosmids/clones
# #
# my $sth = $dbh->prepare
# ("INSERT INTO $target.seq_region " .
# "SELECT contig_id, name, $cs_id, length " .
# "FROM $source.contig " .
# "WHERE name not like 'c%'");
# $sth->execute();
# $sth->finish();
# #
# # WGS contigs
# #
# $sth = $dbh->prepare
# ("INSERT INTO $target.seq_region " .
# "SELECT ctg.contig_id, cln.name, $cs_id, length " .
# "FROM $source.contig ctg, $source.clone cln " .
# "WHERE ctg.clone_id = cln.clone_id " .
# "AND ctg.name like 'c%'");
# $sth->execute();
# $sth->finish();
# return;
#
}
chromosome_to_seq_regiondescriptionprevnextTop
sub chromosome_to_seq_region {
  my $self = shift;
  my $target_cs_name = shift;

  my $target = $self->target();
  my $source = $self->source();
  my $dbh    = $self->dbh();

  $target_cs_name ||= "chromosome";
  my $cs_id = $self->get_coord_system_id($target_cs_name);

  $self->debug("CaenorhabditisBriggsae Specific: Transforming " .
               "chromosomes into $target_cs_name seq_regions");


  ## For consistancy with mart and v19 we need to keep chr name the same for
## now, so the following section is commented out and replaced:
##strip off the leading 'cb25.' from the chromosome name
#my $select_sth = $dbh->prepare
# ("SELECT chromosome_id,substring(name,6),length FROM $source.chromosome");
my $select_sth = $dbh->prepare ("SELECT chromosome_id,name,length FROM $source.chromosome"); my $insert_sth = $dbh->prepare ("INSERT INTO $target.seq_region (name, coord_system_id, length) " . "VALUES (?,?,?)"); my $tmp_insert_sth = $dbh->prepare ("INSERT INTO $target.tmp_chr_map (old_id, new_id) VALUES (?, ?)"); $select_sth->execute(); my ($chrom_id, $name, $length); $select_sth->bind_columns(\$chrom_id,\$ name,\$ length); while ($select_sth->fetch()) { #insert into seq_region table
$insert_sth->execute($name, $cs_id, $length); #copy old/new mapping into temporary table
$tmp_insert_sth->execute($chrom_id, $insert_sth->{'mysql_insertid'}); } $select_sth->finish(); $insert_sth->finish(); $tmp_insert_sth->finish(); return;
}
clone_to_seq_regiondescriptionprevnextTop
sub clone_to_seq_region {
  my $self = shift;
  my $target_cs_name = shift;

  my $target = $self->target();
  my $source = $self->source();
  my $dbh    = $self->dbh();

  # target coord_system will have a different ID
$target_cs_name ||= "clone"; my $cs_id = $self->get_coord_system_id($target_cs_name); $self->debug("CaenorhabditisBriggsae Specific:Transforming clones " . "into $target_cs_name seq_regions"); #
# We don't want to make clones out of the WGS contigs, only out of
# the clones with proper embl accessions. Also for some reason the embl_offset
# is not set in the briggsae 17/18/19 databases, which means we have to deduce the
# length from the name of the contigs!
#
my $select_sth = $dbh->prepare ("SELECT cl.clone_id, CONCAT(cl.embl_acc, '.', cl.embl_version), ctg.name FROM $source.clone cl, $source.contig ctg WHERE cl.clone_id = ctg.clone_id AND cl.embl_acc not like 'c%' ORDER BY cl.clone_id"); $select_sth->execute(); my ($clone_id, $embl_acc, $ctg_name); $select_sth->bind_columns(\$clone_id,\$ embl_acc,\$ ctg_name); my $highest_end = undef; my $current_clone = undef; my $current_clone_id = undef; my $length; my $insert_sth = $dbh->prepare ("INSERT INTO $target.seq_region (name, coord_system_id, length) " . "VALUES(?,?,?)"); my $tmp_insert_sth = $dbh->prepare ("INSERT INTO $target.tmp_cln_map (old_id, new_id) VALUES (?, ?)"); while ($select_sth->fetch()) { #extract the end position of the contig
my $ctg_end; (undef,undef,$ctg_end) = split(/\./, $ctg_name); if(!defined($current_clone)) { $current_clone = $embl_acc; $current_clone_id = $clone_id; $highest_end = $ctg_end; } if($current_clone ne $embl_acc) { #started new clone, store last one
$insert_sth->execute($current_clone, $cs_id, $highest_end); #store mapping of old -> new ids in temp table
$tmp_insert_sth->execute($current_clone_id, $insert_sth->{'mysql_insertid'}); $current_clone = $embl_acc; $current_clone_id = $clone_id; $highest_end = $ctg_end; } elsif($ctg_end > $highest_end) { #same clone, adjust end if end of contig is highest yet seen
$highest_end = $ctg_end; } } #insert the last clone
$insert_sth->execute($current_clone, $cs_id, $highest_end); $tmp_insert_sth->execute($current_clone_id, $insert_sth->{'mysql_insertid'}); $select_sth->finish(); $insert_sth->finish(); $tmp_insert_sth->finish(); return; } 1;
}
create_assemblydescriptionprevnextTop
sub create_assembly {
  my $self = shift;

  $self->debug("CaenorhabditisBriggsae Specific: loading assembly data");

  $self->assembly_contig_chromosome();
  $self->assembly_contig_clone();
}




#
# Override the assembly contig clone method because the briggsae database
# does not have any embl_offsets
#
}
create_coord_systemsdescriptionprevnextTop
sub create_coord_systems {
  my $self = shift;

  $self->debug("CaenorhabditisBriggsae Specific: creating clone, scaffold," .
              " and contig coordinate systems");

  my $target = $self->target();
  my $dbh    = $self->dbh();

  my $ass_def = $self->get_default_assembly();

  my @coords = 
    (["scaffold" , $ass_def,   "default_version", 1     ],
     ['clone'      , undef   , 'default_version', 2     ],
     ["contig"     , undef   , "default_version,sequence_level", 3]);

  my @assembly_mappings =  ("scaffold:$ass_def|contig",
                            "clone|contig",
                            "scaffold:$ass_def|contig|clone");

  $self->debug("Building coord_system table");

  my $sth = $dbh->prepare("INSERT INTO $target.coord_system " .
                           "(name, version, attrib, rank) VALUES (?,?,?,?)");

  my %coord_system_ids;

  foreach my $cs (@coords) {
    $sth->execute(@$cs);
    $coord_system_ids{$cs->[0]} = $sth->{'mysql_insertid'};
  }
  $sth->finish();

  $self->debug("Adding assembly.mapping entries to meta table");

  $sth = $dbh->prepare("INSERT INTO $target.meta(meta_key, meta_value) " .
                       "VALUES ('assembly.mapping', ?)");

  foreach my $mapping (@assembly_mappings) {
    $sth->execute($mapping);
  }

  $sth->finish();

  return;
}
create_seq_regionsdescriptionprevnextTop
sub create_seq_regions {
  my $self = shift;

  $self->debug("CaenorhabditisBriggsae Specific: creating contig, " .
               "clone, contig and scaffold seq_regions");

  $self->contig_to_seq_region();
  $self->clone_to_seq_region();
  $self->chromosome_to_seq_region('scaffold');
}
General documentation
No general documentation available.