Raw content of dbSNP::GenericChromosome
use strict;
use warnings;
#object that contains the specific methods to dump data when there are chromosome coordinates from dbSNP (not contigs, as usual).
#So far, this is the case for rat and chicken
package dbSNP::GenericChromosome;
use dbSNP::GenericContig;
use vars qw(@ISA);
use ImportUtils qw(debug load dumpSQL create_and_load);
@ISA = ('dbSNP::GenericContig');
sub variation_feature{
my $self = shift;
### TBD not sure if variations with map_weight > 1 or 2 should be
### imported.
debug("Dumping seq_region data");
#only take toplevel coordinates
dumpSQL($self->{'dbCore'}->dbc()->db_handle, qq{SELECT sr.seq_region_id,
if (sr.name like "E%", CONCAT("LG",sr.name),sr.name) ##add LG for chicken
FROM seq_region_attrib sra, attrib_type at, seq_region sr
WHERE sra.attrib_type_id=at.attrib_type_id
AND at.code="toplevel"
AND sr.seq_region_id = sra.seq_region_id
});
debug("Loading seq_region data");
create_and_load($self->{'dbVariation'}, "tmp_seq_region", "seq_region_id", "name *");
debug("Dumping SNPLoc data");
my ($tablename1,$tablename2,$row);
print "assembly_version is ",$self->{'assembly_version'},"\n";
my ($assembly_version) = $self->{'assembly_version'} =~ /^[a-zA-Z]+(\d+)\.*.*$/;
$assembly_version=1;
my $sth = $self->{'dbSNP'}->prepare(qq{SHOW TABLES LIKE
'$self->{'dbSNP_version'}\_SNPContigLoc\_$assembly_version\__'});
$sth->execute();
while($row = $sth->fetchrow_arrayref()) {
$tablename1 = $row->[0];
}
my $sth1 = $self->{'dbSNP'}->prepare(qq{SHOW TABLES LIKE
'$self->{'dbSNP_version'}\_ContigInfo\_$assembly_version\__'});
$sth1->execute();
while($row = $sth1->fetchrow_arrayref()) {
$tablename2 = $row->[0];
}
print "table_name1 is $tablename1 table_name2 is $tablename2\n";
#my $tablename = $self->{'species_prefix'} . 'SNPContigLoc';
dumpSQL($self->{'dbSNP'}, qq{SELECT t1.snp_id, t2.contig_acc,t1.lc_ngbr+2,t1.rc_ngbr,
IF(t2.group_term like "ref_%",t2.contig_chr,t2.contig_label),
IF(t1.loc_type = 3, t1.phys_pos_from+2, t1.phys_pos_from+1),
IF(t1.loc_type = 3, t1.phys_pos_from+1, t1.phys_pos_from+length(t1.allele)),
IF(t1.orientation, -1, 1)
FROM $tablename1 t1, $tablename2 t2
WHERE t1.ctg_id = t2.ctg_id
#AND t2.group_term like "ref_%"
$self->{'limit'}});
debug("Loading SNPLoc data");
create_and_load($self->{'dbVariation'}, "tmp_contig_loc_chrom", "snp_id i*", "ctg *", "ctg_start i", "ctg_end i", "chr *", "start i", "end i", "strand i");
debug("Creating genotyped variations");
#creating the temporary table with the genotyped variations
$self->{'dbVariation'}->do(qq{CREATE TABLE tmp_genotyped_var SELECT DISTINCT variation_id FROM tmp_individual_genotype_single_bp});
$self->{'dbVariation'}->do(qq{CREATE UNIQUE INDEX variation_idx ON tmp_genotyped_var (variation_id)});
$self->{'dbVariation'}->do(qq{INSERT IGNORE INTO tmp_genotyped_var SELECT DISTINCT variation_id FROM individual_genotype_multiple_bp});
debug("Creating tmp_variation_feature_chrom data");
dumpSQL($self->{'dbVariation'},qq{SELECT v.variation_id, ts.seq_region_id,
tcl.start,tcl.end,
tcl.strand, v.name, v.source_id, v.validation_status
FROM variation v, tmp_contig_loc_chrom tcl, tmp_seq_region ts
WHERE v.snp_id = tcl.snp_id
AND tcl.end>1
AND tcl.chr = ts.name
});
create_and_load($self->{'dbVariation'},'tmp_variation_feature_chrom',"variation_id *","seq_region_id", "seq_region_start", "seq_region_end", "seq_region_strand", "variation_name", "source_id", "validation_status");
debug("Creating tmp_variation_feature_ctg data");
dumpSQL($self->{'dbVariation'},qq{SELECT v.variation_id, ts.seq_region_id,
tcl.ctg_start,tcl.ctg_end,
tcl.strand, v.name, v.source_id, v.validation_status
FROM variation v, tmp_contig_loc_chrom tcl, tmp_seq_region ts
WHERE v.snp_id = tcl.snp_id
AND (tcl.start = 1 or tcl.end=1)
AND tcl.ctg = ts.name
});
create_and_load($self->{'dbVariation'},'tmp_variation_feature_ctg',"variation_id *","seq_region_id", "seq_region_start", "seq_region_end", "seq_region_strand", "variation_name", "source_id", "validation_status");
debug("Dumping data into variation_feature table");
$self->{'dbVariation'}->do(qq{INSERT INTO variation_feature (variation_id, seq_region_id,seq_region_start, seq_region_end, seq_region_strand,variation_name, flags, source_id, validation_status)
SELECT tvf.variation_id, tvf.seq_region_id, tvf.seq_region_start, tvf.seq_region_end, tvf.seq_region_strand,tvf.variation_name,IF(tgv.variation_id,'genotyped',NULL), tvf.source_id, tvf.validation_status
FROM tmp_variation_feature_chrom tvf LEFT JOIN tmp_genotyped_var tgv ON tvf.variation_id = tgv.variation_id
});
debug("Dumping data into variation_feature table");
$self->{'dbVariation'}->do(qq{INSERT INTO variation_feature (variation_id, seq_region_id,seq_region_start, seq_region_end, seq_region_strand,variation_name, flags, source_id, validation_status)
SELECT tvf.variation_id, tvf.seq_region_id, tvf.seq_region_start, tvf.seq_region_end, tvf.seq_region_strand,tvf.variation_name,NULL, tvf.source_id, tvf.validation_status
FROM tmp_variation_feature_chrom tvf
});
debug("Dumping data into variation_feature table");
$self->{'dbVariation'}->do(qq{INSERT INTO variation_feature (variation_id, seq_region_id,seq_region_start, seq_region_end, seq_region_strand,variation_name, flags, source_id, validation_status)
SELECT tvf.variation_id, tvf.seq_region_id, tvf.seq_region_start, tvf.seq_region_end, tvf.seq_region_strand,tvf.variation_name,NULL, tvf.source_id, tvf.validation_status
FROM tmp_variation_feature_ctg tvf
});
#$self->{'dbVariation'}->do("DROP TABLE tmp_contig_loc");
#$self->{'dbVariation'}->do("DROP TABLE tmp_seq_region");
#$self->{'dbVariation'}->do("DROP TABLE tmp_genotyped_var");
#$self->{'dbVariation'}->do("DROP TABLE tmp_variation_feature_chrom");
#$self->{'dbVariation'}->do("DROP TABLE tmp_variation_feature_ctg");
#for the chicken, delete 13,000 SNPs that cannot be mapped to EnsEMBL coordinate
if ($self->{'dbCore'}->species =~ /gga/i){
$self->{'dbVariation'}->do("DELETE FROM variation_feature WHERE seq_region_end = -1");
}
}
1;