None available.
sub dump_HGVbaseIDs
{ my $self = shift;
system "gunzip -c dbSNP/rs_hgvbase.txt.gz > " . $self->{'tmpdir'} . "/" . $self->{'tmpfile'};
debug("Loading HGVbase data");
create_and_load($self->{'dbVariation'},"tmp_rs_hgvbase","rsID *","HGVbaseID");
$self->{'dbVariation'}->do(qq{INSERT INTO source (name,version) values ('HGVbase',15)
});
debug("Adding HGVbaseIDs to synonym table");
my $source_id = $self->{'dbVariation'}->{'mysql_insertid'}; $self->{'dbVariation'}->do(qq{INSERT INTO variation_synonym (variation_id,source_id,name)
SELECT v.variation_id, $source_id, trh.HGVbaseID
FROM variation v, tmp_rs_hgvbase trh
WHERE v.name = trh.rsID
});
$self->{'dbVariation'}->do(qq{DROP TABLE tmp_rs_hgvbase
});
}
} |
sub dump_TSCIDs
{ my $self = shift;
$self->{'dbVariation'}->do(qq{INSERT INTO source (name,version) values ('TSC',1)
});
my $source_id = $self->{'dbVariation'}->{'mysql_insertid'}; debug("Dumping TSC information from dbSNP");
dumpSQL($self->{'dbSNP'}, qq{SELECT concat('rs',ss.snp_id), $source_id, s.loc_snp_id
FROM SubSNP s, SNPSubSNPLink ss
WHERE ss.subsnp_id = s.subsnp_id
AND s.loc_snp_id like 'TSC%'
}
);
debug("Loading TSC ids into temporary table");
create_and_load($self->{'dbVariation'},"tmp_rs_TSC","rsID *","source_id","TSCid");
$self->{'dbVariation'}->do(qq{ INSERT IGNORE INTO variation_synonym (variation_id, source_id, name)
SELECT v.variation_id, trt.source_id, trt.TSCid
FROM variation v, tmp_rs_TSC trt
WHERE v.name = trt.rsID
}
);
$self->{'dbVariation'}->do(qq{DROP TABLE tmp_rs_TSC
});
}
} |
sub dump_mitocondrialSNPs
{ my $self = shift;
my %mitoSNPs; my $variation_id; my $region; my $slice_adaptor = $self->{'dbCore'}->get_SliceAdaptor();
my $slice;
my $seq_region_id; my $status;
$self->{'dbVariation'}->do(qq{INSERT INTO source (name) values ('mitomap.com')
});
my $source_id = $self->{'dbVariation'}->dbh()->{'mysql_insertid'}; $self->read_RefSNP(\%mitoSNPs,'/ecs2/scratch4/yuan/hum/MT_35/RefSNP.tb');
$self->read_ContigHit(\%mitoSNPs,'/ecs2/scratch4/yuan/hum/MT_35/ContigHit.tb');
foreach my $snp (keys %mitoSNPs){
if ($mitoSNPs{$snp}{'status'} eq 'by-other-pop'){
$status = 4;
}
$self->{'dbVariation'}->do(qq{INSERT INTO variation (source_id,name,validation_status) VALUES ($source_id, "$mitoSNPs{$snp}{'name'}", $status);
});
$variation_id = $self->{'dbVariation'}->dbh()->{'mysql_insertid'}; #get the last autoinc id in the database (the in the variation table)
if (!exists $region->{$mitoSNPs{$snp}{'region'}}){
$slice = $slice_adaptor->fetch_by_region('toplevel',$mitoSNPs{$snp}{'region'}); #will get the slice for the region where the SNP is present
$region->{$mitoSNPs{$snp}{'region'}} = $slice_adaptor->get_seq_region_id($slice); #get the seq_region_id and store it in a hash
}
$seq_region_id = $region->{$mitoSNPs{$snp}{'region'}};
#insert in the Flanking_sequence table
$self->{'dbVariation'}->do(qq{INSERT INTO flanking_sequence (variation_id,seq_region_id,seq_region_strand,up_seq,down_seq)
VALUES ($variation_id,$seq_region_id,$mitoSNPs{$snp}{'strand'},"$mitoSNPs{$snp}{'up_seq'}",
"$mitoSNPs{$snp}{'down_seq'}")
});
#insert all the alleles
foreach my $allele (split /\//,$mitoSNPs{$snp}{'alleles'}){
$self->{'dbVariation'}->do(qq{INSERT INTO allele (variation_id, allele) VALUES ($variation_id,"$allele")
});
}
#and finally, insert the variation_feature table
$self->{'dbVariation'}->do(qq{INSERT INTO variation_feature (variation_id, seq_region_id,
seq_region_start, seq_region_end, seq_region_strand, variation_name,source_id,validation_status)
VALUES ($variation_id, $seq_region_id, $mitoSNPs{$snp}{'start'}, $mitoSNPs{$snp}{'end'},
$mitoSNPs{$snp}{'strand'}, "$mitoSNPs{$snp}{'name'}",$source_id, $status)
});
}
}
#will read from Yuans directory /ecs2/scratch4/yuan/hum/MT_35 the RefSNP.tb file. Important that in each release the format of the data and the location
#of the file might change } |
sub read_ContigHit
{ my $self = shift;
my $snps = shift;
my $file_location = shift;
my ($snp_id,$region, $strand, $start, $end);
my @line;
open IN,$file_location || die "Could not open file with mitochondrial SNP information at $file_location\n";
while (<IN>){
chomp; @line = split /\t/;
$snp_id = $line[0]; $region = $line[7]; $strand = $line[9]; $start = $line[10]; $end = $line[11]; $region =~ /MT_(NC_\d+)/;
$snps->{$snp_id}->{'region'} = $1;
$snps->{$snp_id}->{'strand'} = $strand;
$snps->{$snp_id}->{'start'} = $start;
$snps->{$snp_id}->{'end'} = $end;
}
close IN;
}
1; } |
sub read_RefSNP
{ my $self = shift;
my $snps = shift;
my $file_location = shift;
my ($snp_id, $snp_name, $alleles, $up_seq, $down_seq, $status); my @line;
open IN,$file_location || die "file with mitochondrial SNP information doesn't exist at $file_location\n";
while (<IN>){
chomp; @line = split /\t/;
$snp_id = $line[0]; $snp_name = $line[1]; $alleles = $line[5]; $up_seq = $line[6]; $down_seq = $line[7]; $status = $line[13]; $snps->{$snp_id}->{'name'} = $snp_name;
$snps->{$snp_id}->{'alleles'} = $alleles;
$snps->{$snp_id}->{'up_seq'} = $up_seq;
$snps->{$snp_id}->{'down_seq'} = $down_seq;
$snps->{$snp_id}->{'status'} = $status;
}
close IN;
}
} |