Bio::EnsEMBL::ExternalData::Glovar
GlovarSNPAdaptor
Toolbar
Summary
Bio::EnsEMBL::ExternalData::Glovar::GlovarSNPAdaptor -
Object adaptor for Glovar SNPs
Package variables
No package variables defined.
Included modules
Inherit
Synopsis
$glodb = Bio::EnsEMBL::ExternalData::Glovar::DBAdaptor->new(
-user => 'ensro',
-pass => 'secret',
-dbname => 'snp',
-host => 'go_host',
-driver => 'Oracle'
);
my $glovar_adaptor = $glodb->get_GlovarSNPAdaptor;
my $snps = $glovar_adaptor->fetch_all_by_clone_accession('AL100005', 'AL100005', 1, 10000);
Description
This module is an entry point into a glovar database. It allows you to retrieve
SNPs from Glovar.
Methods
Methods description
Arg[1] : (optional) consequence experiment id Example : $glovar_adaptor->consequence_ext(2046); Description : getter/setter for the consequence experiment (coding_sequence.design_entry in the glovar db) Return type : String - consequence experiment id Exceptions : none Caller : general |
Arg[1] : none Example : my @coord_systems = $glovar_adaptor->coordinate_systems; Description : This method returns a list of coordinate systems which are implemented by this class. A minimum of one valid coordinate system must be implemented. Valid coordinate systems are: 'SLICE', 'ASSEMBLY', 'CONTIG', and 'CLONE'. Return type : list of strings Exceptions : none Caller : internal |
Arg[1] : String - Variation ID Example : my $variation = $glovar_adaptor->fetch_SNP_by_id($id); Description : retrieve variations from Glovar by ID Return type : Listref of Bio::EnsEMBL::Variation::VariationFeature objects. Exceptions : none Caller : $self |
Arg[1] : clone internal ID Arg[2] : clone embl accession Arg[3] : clone start coordinate Arg[4] : clone end coordinate Example : @list = @{$glovar_adaptor->fetch_all_by_clone_accession('AL100005', 'AL100005', 1, 10000)}; Description: Retrieves a list of SNPs on a clone in clone coordinates. Returntype : Listref of Bio::EnsEMBL::Variation::VariationFeature objects Exceptions : none Caller : $self->fetch_all_by_Clone |
Arg[1] : Bio::EnsEMBL::SNP object Example : $glovar_adaptor->get_DBLinks($snp, '104567'); Description : adds external database links to snp object Return type : none Exceptions : none Caller : $self |
Arg[1] : Bio::EnsEMBL::Variation::Variation object Example : $glovar_adaptor->get_consequences($var); Description : Adds a TranscriptVariation object to the variation Return type : none Exceptions : none Caller : $self |
Arg[1] : Bio::EnsEMBL::Variation::Variation object Example : $self->get_population_genotypes($var); Description : Adds PopulationGenotype objects to a variation by fetching population genotype info from the db Return type : none Exceptions : thrown if no Bio::EnsEMBL::Variation::Variation is supplied Caller : internal |
Example : my $version = $glovar_adaptor->get_source_version; Description : This method is just for compatibility with Bio::EnsEMBL::Variation API. Return type : undef Exceptions : none Caller : general |
Arg[1] : none Example : my $track_name = $snp_adaptor->track_name; Description : returns the track name Return type : String - track name Exceptions : none Caller : Bio::EnsEMBL::Slice, Bio::EnsEMBL::ExternalData::ExternalFeatureAdaptor |
Methods code
sub consequence_exp
{ my ($self, $exp) = @_;
if ($exp) {
$self->{'consequence_exp'} = $exp;
}
return $self->{'consequence_exp'}; } |
sub coordinate_systems
{ return ('CLONE'); } |
sub fetch_SNP_by_id
{ my ($self, $id) = @_;
my $dnadb = Bio::EnsEMBL::Registry->get_DNAAdaptor($ENV{'ENSEMBL_SPECIES'}, 'glovar');
unless ($dnadb) {
warn "ERROR: No dnadb attached to Glovar.\n";
return;
}
my $q1 = qq(
SELECT
distinct(ss.id_snp) as internal_id,
ss.default_name as id_default,
ms.position as snp_start,
ms.end_position as snp_end,
ms.is_revcomp as snp_strand,
scd.description as validated,
ss.alleles as alleles,
svd.description as snpclass,
cd.chromosome as chr_name,
sseq.id_sequence as nt_id,
sseq.database_seqnname as seq_name,
sseq.database_source as database_source
FROM
snp_sequence sseq,
mapped_snp ms,
snp,
snpvartypedict svd,
snp_confirmation_dict scd,
snp_summary ss,
chromosomedict cd
WHERE ss.default_name = ?
AND sseq.id_sequence = ms.id_sequence
AND ms.id_snp = ss.id_snp
AND ss.id_snp = snp.id_snp
AND snp.var_type = svd.id_dict
AND ss.confirmation_status = scd.id_dict
AND sseq.chromosome = cd.id_dict
AND sseq.is_current = 1
);
my @snps = ();
my $sth1;
eval {
$sth1 = $self->prepare($q1);
$sth1->execute($id);
};
if ($@){
warn("ERROR: SQL failed in " . (caller(0))[3] . "\n$@");
return([]);
}
my (@seq_nt, @seq_clone);
while (my $r = $sth1->fetchrow_hashref()) {
return([]) unless keys %{$r};
if ($r->{'SEQ_NAME'} =~ /^NT/) {
push @seq_nt, $r;
} else {
push @seq_clone, $r;
}
}
if (@seq_nt > 1 or @seq_clone > 1) {
warning(
"More than one mapping of SNP to NT_contig ("
. join(", ", map { $_->{'SEQ_NAME'} } @seq_nt)
. ") or clone ("
. join(", ", map { $_->{'SEQ_NAME'} } @seq_clone)
. ")."
);
}
my $varfeat;
SEQ:
foreach my $row (@seq_clone, @seq_nt) {
$row->{'SNP_END'} ||= $row->{'SNP_START'};
my $snp_start = $row->{'SNP_START'};
my $id_seq = $row->{'NT_ID'};
my $q2 = qq(
SELECT
cs.database_seqname as embl_acc,
csm.start_coordinate as clone_start,
csm.end_coordinate as clone_end,
csm.contig_orientation as clone_strand
FROM clone_seq cs,
clone_seq_map csm
WHERE csm.id_sequence = '$id_seq'
AND cs.id_cloneseq = csm.id_cloneseq
AND (csm.start_coordinate < $snp_start)
AND (csm.end_coordinate > $snp_start)
);
my $sth2;
eval {
$sth2 = $self->prepare($q2);
$sth2->execute();
};
if ($@){
warn("ERROR: SQL failed in " . (caller(0))[3] . "\n$@");
return([]);
}
my $j;
CLONE:
while (my ($embl_acc, $clone_start, $clone_end, $clone_strand) = $sth2->fetchrow_array()) {
$j++;
next CLONE if ($varfeat);
my $clone = $dnadb->get_SliceAdaptor->fetch_by_region('clone', $embl_acc);
next CLONE unless ($clone);
my ($start, $end);
if ($clone_strand == 1) {
$start = $row->{'SNP_START'} - $clone_start + 1;
$end = $row->{'SNP_END'} - $clone_start + 1;
} else {
$start = $clone_end - $row->{'SNP_END'} + 1;
$end = $clone_end - $row->{'SNP_START'} + 1;
}
next CLONE if ($start < 0);
$varfeat = Bio::EnsEMBL::Variation::VariationFeature->new(-dbID => $row->{'INTERNAL_ID'});
$varfeat->slice($clone);
$varfeat->start($start);
$varfeat->end($end);
$varfeat->strand($row->{'SNP_STRAND'}*$clone_strand);
$varfeat = $varfeat->transform('chromosome');
}
warn "WARNING: Multiple clones ($j) returned" if ($j > 1);
next SEQ unless $varfeat;
my $var = Bio::EnsEMBL::Variation::Variation->new(-dbID => $row->{'INTERNAL_ID'});
$varfeat->variation_name($id);
$var->name($id);
$varfeat->seq_region_name($row->{'CHR_NAME'});
$varfeat->source('Glovar');
$var->source('Glovar');
$var->adaptor($self);
$varfeat->allele_string($row->{'ALLELES'});
foreach my $al (split(/\|/, $row->{'ALLELES'})) {
my $allele = Bio::EnsEMBL::Variation::Allele->new(-allele => $al);
$var->add_Allele($allele);
}
my $slice = $dnadb->get_SliceAdaptor->fetch_by_region(
'chromosome',
$row->{'CHR_NAME'},
$varfeat->start - 25,
$varfeat->end + 25,
);
$slice = $slice->invert if ($row->{'SNP_STRAND'} == -1);
my $seq = $slice->seq;
my $up_end = 25;
$up_end++ if (($row->{'SNPCLASS'} eq "SNP - indel") && ($varfeat->start ne $varfeat->end));
$var->five_prime_flanking_seq(substr($seq, 0, $up_end));
$var->three_prime_flanking_seq(substr($seq, 26));
$self->get_consequences($varfeat);
$self->get_DBLinks($var);
$varfeat->add_validation_state($VSTATE_MAP{$row->{'VALIDATED'}});
$var->add_validation_state($VSTATE_MAP{$row->{'VALIDATED'}});
$varfeat->variation($var);
}
$varfeat ? (return [$varfeat]) : (return([])); } |
sub fetch_all_by_clone_accession
{ my ($self, $embl_acc, $embl_version, $cl_start, $cl_end) = @_;
my @cloneinfo = $self->fetch_clone_by_accession($embl_acc);
return([]) unless (@cloneinfo);
my ($nt_name, $id_seq, $clone_start, $clone_end, $clone_strand) = @cloneinfo;
my ($q_start, $q_end);
if ($clone_strand == 1) {
$q_start = $clone_start + $cl_start - 1;
$q_end = $clone_start + $cl_end + 1;
} else{
$q_start = $clone_end - $cl_end - 1;
$q_end = $clone_end - $cl_start + 1;
}
my $q2 = qq(
SELECT
distinct(sgc.id_snp) as id_snp,
ss.id_snp as internal_id,
ss.default_name as id_default,
ms.position as snp_start,
ms.end_position as snp_end,
ms.is_revcomp as snp_strand,
scd.description as validated,
ss.alleles as alleles,
svd.description as snpclass,
ss.is_private as private,
ptd.description as pos_type,
cs.design_entry as expt_id,
sgc_dict.description as consequence
FROM
mapped_snp ms,
snp,
snpvartypedict svd,
snp_confirmation_dict scd,
snp_summary ss
LEFT JOIN (
snp_gene_consequence sgc
INNER JOIN
coding_sequence cs on sgc.id_codingseq = cs.id_codingseq
AND cs.design_entry = ?
INNER JOIN
sgc_dict on sgc.consequence = sgc_dict.id_dict
) on sgc.id_snp = ss.id_snp
LEFT JOIN
position_type_dict ptd on sgc.position_description = ptd.id_dict
WHERE ms.id_sequence = ?
AND ms.id_snp = ss.id_snp
AND ss.id_snp = snp.id_snp
AND snp.var_type = svd.id_dict
AND ss.confirmation_status = scd.id_dict
AND ms.position BETWEEN $q_start AND $q_end
);
my $sth;
eval {
$sth = $self->prepare($q2);
$sth->execute($self->consequence_exp, $id_seq);
};
if ($@){
warn("ERROR: SQL failed in " . (caller(0))[3] . "\n$@");
return([]);
}
my (%varfeats, %cons);
while (my $row = $sth->fetchrow_hashref()) {
return([]) unless keys %{$row};
warn "WARNING: private data!" if $row->{'PRIVATE'};
next unless $row->{'SNP_STRAND'};
my ($start, $end);
$row->{'SNP_END'} ||= $row->{'SNP_START'};
if ($clone_strand == 1) {
$start = $row->{'SNP_START'} - $clone_start + 1;
$end = $row->{'SNP_END'} - $clone_start + 1;
} else {
$start = $clone_end - $row->{'SNP_END'} + 1;
$end = $clone_end -$row->{'SNP_START'} + 1;
}
my $strand = $row->{'SNP_STRAND'}*$clone_strand;
my $key = join(":", $row->{'ID_DEFAULT'}, $start, $end, $strand);
my $consequence_type = $CONSEQUENCE_TYPE_MAP{$row->{'POS_TYPE'}." ".$row->{'CONSEQUENCE'}} || '_';
my $cons_rank = $Bio::EnsEMBL::Variation::VariationFeature::CONSEQUENCE_TYPES{uc($consequence_type)} || 99;
if ((!$cons{$key}) or ($cons_rank < $cons{$key})) {
my $varfeat = Bio::EnsEMBL::Variation::VariationFeature->new_fast(
{
'dbID' => $row->{'INTERNAL_ID'},
'adaptor' => $self,
'variation_name' => $row->{'ID_DEFAULT'},
'start' => $start,
'end' => $end,
'strand' => $strand,
'allele_string' => $row->{'ALLELES'},
'source' => 'Glovar',
'validation_code' => [ $VSTATE_MAP{$row->{'VALIDATED'}} ],
'consequence_type' => [ $consequence_type ],
});
my $var = Bio::EnsEMBL::Variation::Variation->new(
-dbID => $row->{'INTERNAL_ID'},
-ADAPTOR => $self,
-NAME => $row->{'ID_DEFAULT'},
-SOURCE => 'Glovar',
);
$self->get_DBLinks($var);
$varfeat->variation($var);
$cons{$key} = $cons_rank;
$varfeats{$key} = $varfeat;
}
}
return [values %varfeats]; } |
sub get_DBLinks
{ my ($self, $var) = @_;
my $q = qq(
SELECT
snp_name.SNP_NAME as NAME,
snpnametypedict.DESCRIPTION as TYPE
FROM
snp_name,
snpnametypedict
WHERE snp_name.ID_SNP = ?
AND snp_name.SNP_NAME_TYPE = snpnametypedict.ID_DICT
);
my $sth;
eval {
$sth = $self->prepare($q);
$sth->execute($var->dbID);
};
if ($@){
warn("ERROR: SQL failed in " . (caller(0))[3] . "\n$@");
return;
}
while (my $xref = $sth->fetchrow_hashref()) {
$var->add_synonym($xref->{'TYPE'}, $xref->{'NAME'});
} } |
sub get_consequences
{ my ($self, $varfeat) = @_;
my $q = qq(
SELECT
ptd.description as pos_type,
sgc_dict.description as consequence,
cs.name as transcript_stable_id,
sgc.transcript_position as cdna_start
FROM
coding_sequence cs,
snp_gene_consequence sgc
LEFT JOIN
position_type_dict ptd on sgc.position_description = ptd.id_dict
LEFT JOIN
sgc_dict on sgc.consequence = sgc_dict.id_dict
WHERE sgc.id_snp = ?
AND sgc.id_codingseq = cs.id_codingseq
AND cs.design_entry = ?
);
my $sth;
eval {
$sth = $self->prepare($q);
$sth->execute($varfeat->{'dbID'}, $self->consequence_exp);
};
if ($@){
warn("ERROR: SQL failed in " . (caller(0))[3] . "\n$@");
return;
}
while (my $row = $sth->fetchrow_hashref) {
my $consequence_type = $CONSEQUENCE_TYPE_MAP{$row->{'POS_TYPE'}." ".$row->{'CONSEQUENCE'}};
my $key = join(":", $row->{'TRANSCRIPT_STABLE_ID'}, $row->{'CDNA_START'}, $consequence_type);
my $found_tvar;
foreach my $oldtvar (@{ $varfeat->get_all_TranscriptVariations || [] }) {
if (join(":", $oldtvar->transcript->stable_id,
$oldtvar->cdna_start,
$oldtvar->consequence_type
) eq $key) {
$found_tvar = 1;
}
}
unless ($found_tvar) {
$varfeat->add_consequence_type( [$consequence_type] );
my $trans = Bio::EnsEMBL::Transcript->new(
-STABLE_ID => $row->{'TRANSCRIPT_STABLE_ID'},
);
my $tvar = Bio::EnsEMBL::Variation::TranscriptVariation->new_fast({
transcript => $trans,
cdna_start => $row->{'CDNA_START'},
cdna_end => $row->{'CDNA_START'},
consequence_type => [ $consequence_type ],
});
$varfeat->add_TranscriptVariation($tvar);
}
} } |
sub get_population_genotypes
{ my ($self, $var) = @_;
unless ($var->isa('Bio::EnsEMBL::Variation::Variation')) {
throw("You must provide a Bio::EnsEMBL::Variation::Variation object.");
}
my $q = qq(
SELECT
sv.var_string as allele,
af.frequency as frequency,
p.description as pop_name,
ao.sample_size as sample_size
FROM
snp_variation sv,
allele_frequency af,
assay_outcome ao,
population p
WHERE sv.id_snp = ?
AND sv.id_var = af.id_var
AND af.id_outcome = ao.id_outcome
AND ao.id_pop = p.id_pop
);
my $sth;
eval {
$sth = $self->prepare($q);
$sth->execute($var->dbID);
};
if ($@){
warn("ERROR: SQL failed in " . (caller(0))[3] . "\n$@");
return;
}
my $pop;
while (my $row = $sth->fetchrow_hashref) {
$pop->{$row->{'POP_NAME'}}->{'sample_size'} = $row->{'SAMPLE_SIZE'};
push @{ $pop->{$row->{'POP_NAME'}}->{'frequency'} }, $row->{'FREQUENCY'};
push @{ $pop->{$row->{'POP_NAME'}}->{'alleles'} }, $row->{'ALLELE'};
}
foreach my $p (keys %$pop) {
my $population = Bio::EnsEMBL::Variation::Population->new(
-dbID => $p,
-NAME => $p,
-SIZE => $pop->{$p}->{'size'},
);
my ($allele1, $allele2) = @{ $pop->{$p}->{'alleles'} };
my ($freq1, $freq2) = @{ $pop->{$p}->{'frequency'} };
my $pop_genotype = Bio::EnsEMBL::Variation::PopulationGenotype->new(
-dbID => $p,
-ALLELE1 => $allele1,
-ALLELE2 => $allele1,
-FREQUENCY => $freq1**2,
);
$pop_genotype->population($population);
$var->add_PopulationGenotype($pop_genotype);
my $pop_genotype = Bio::EnsEMBL::Variation::PopulationGenotype->new(
-dbID => $p,
-ALLELE1 => $allele2,
-ALLELE2 => $allele2,
-FREQUENCY => $freq2**2,
);
$pop_genotype->population($population);
$var->add_PopulationGenotype($pop_genotype);
my $pop_genotype = Bio::EnsEMBL::Variation::PopulationGenotype->new(
-dbID => $p,
-ALLELE1 => $allele1,
-ALLELE2 => $allele2,
-FREQUENCY => $freq1*$freq2*2,
);
$pop_genotype->population($population);
$var->add_PopulationGenotype($pop_genotype);
} } |
sub get_source_version
{ return undef; } |
sub track_name
{ return("GlovarSNP");
}
1; } |
General documentation