Bio::EnsEMBL::ExternalData::Glovar GlovarSNPAdaptor
SummaryIncluded librariesPackage variablesSynopsisDescriptionGeneral documentationMethods
Toolbar
WebCvsRaw content
Summary
Bio::EnsEMBL::ExternalData::Glovar::GlovarSNPAdaptor -
Object adaptor for Glovar SNPs
Package variables
No package variables defined.
Included modules
Bio::EnsEMBL::ExternalData::Glovar::GlovarAdaptor
Bio::EnsEMBL::Registry
Bio::EnsEMBL::Transcript
Bio::EnsEMBL::Utils::Eprof qw ( eprof_start eprof_end eprof_dump )
Bio::EnsEMBL::Utils::Exception qw ( throw warning )
Bio::EnsEMBL::Variation::Allele
Bio::EnsEMBL::Variation::Population
Bio::EnsEMBL::Variation::PopulationGenotype
Bio::EnsEMBL::Variation::TranscriptVariation
Bio::EnsEMBL::Variation::Variation
Bio::EnsEMBL::Variation::VariationFeature
Inherit
Bio::EnsEMBL::ExternalData::Glovar::GlovarAdaptor
Synopsis
$glodb = Bio::EnsEMBL::ExternalData::Glovar::DBAdaptor->new(
-user => 'ensro',
-pass => 'secret',
-dbname => 'snp',
-host => 'go_host',
-driver => 'Oracle'
);
my $glovar_adaptor = $glodb->get_GlovarSNPAdaptor;
my $snps = $glovar_adaptor->fetch_all_by_clone_accession('AL100005', 'AL100005', 1, 10000);
Description
This module is an entry point into a glovar database. It allows you to retrieve
SNPs from Glovar.
Methods
consequence_expDescriptionCode
coordinate_systemsDescriptionCode
fetch_SNP_by_idDescriptionCode
fetch_all_by_clone_accessionDescriptionCode
get_DBLinksDescriptionCode
get_consequencesDescriptionCode
get_population_genotypesDescriptionCode
get_source_versionDescriptionCode
track_nameDescriptionCode
Methods description
consequence_expcode    nextTop
  Arg[1]      : (optional) consequence experiment id
Example : $glovar_adaptor->consequence_ext(2046);
Description : getter/setter for the consequence experiment
(coding_sequence.design_entry in the glovar db)
Return type : String - consequence experiment id
Exceptions : none
Caller : general
coordinate_systemscodeprevnextTop
  Arg[1]      : none
Example : my @coord_systems = $glovar_adaptor->coordinate_systems;
Description : This method returns a list of coordinate systems which are
implemented by this class. A minimum of one valid coordinate
system must be implemented. Valid coordinate systems are:
'SLICE', 'ASSEMBLY', 'CONTIG', and 'CLONE'.
Return type : list of strings
Exceptions : none
Caller : internal
fetch_SNP_by_idcodeprevnextTop
  Arg[1]      : String - Variation ID
Example : my $variation = $glovar_adaptor->fetch_SNP_by_id($id);
Description : retrieve variations from Glovar by ID
Return type : Listref of Bio::EnsEMBL::Variation::VariationFeature objects.
Exceptions : none
Caller : $self
fetch_all_by_clone_accessioncodeprevnextTop
  Arg[1]      : clone internal ID
Arg[2] : clone embl accession
Arg[3] : clone start coordinate
Arg[4] : clone end coordinate
Example : @list = @{$glovar_adaptor->fetch_all_by_clone_accession('AL100005', 'AL100005', 1, 10000)};
Description: Retrieves a list of SNPs on a clone in clone coordinates.
Returntype : Listref of Bio::EnsEMBL::Variation::VariationFeature objects
Exceptions : none
Caller : $self->fetch_all_by_Clone
get_DBLinkscodeprevnextTop
  Arg[1]      : Bio::EnsEMBL::SNP object
Example : $glovar_adaptor->get_DBLinks($snp, '104567');
Description : adds external database links to snp object
Return type : none
Exceptions : none
Caller : $self
get_consequencescodeprevnextTop
  Arg[1]      : Bio::EnsEMBL::Variation::Variation object
Example : $glovar_adaptor->get_consequences($var);
Description : Adds a TranscriptVariation object to the variation
Return type : none
Exceptions : none
Caller : $self
get_population_genotypescodeprevnextTop
  Arg[1]      : Bio::EnsEMBL::Variation::Variation object
Example : $self->get_population_genotypes($var);
Description : Adds PopulationGenotype objects to a variation by fetching
population genotype info from the db
Return type : none
Exceptions : thrown if no Bio::EnsEMBL::Variation::Variation is supplied
Caller : internal
get_source_versioncodeprevnextTop
  Example     : my $version = $glovar_adaptor->get_source_version;
Description : This method is just for compatibility with
Bio::EnsEMBL::Variation API.
Return type : undef
Exceptions : none
Caller : general
track_namecodeprevnextTop
  Arg[1]      : none
Example : my $track_name = $snp_adaptor->track_name;
Description : returns the track name
Return type : String - track name
Exceptions : none
Caller : Bio::EnsEMBL::Slice,
Bio::EnsEMBL::ExternalData::ExternalFeatureAdaptor
Methods code
consequence_expdescriptionprevnextTop
sub consequence_exp {
    my ($self, $exp) = @_;
    if ($exp) {
        $self->{'consequence_exp'} = $exp;
    }
    return $self->{'consequence_exp'};
}
coordinate_systemsdescriptionprevnextTop
sub coordinate_systems {
    return ('CLONE');
}
fetch_SNP_by_iddescriptionprevnextTop
sub fetch_SNP_by_id {
    my ($self, $id) = @_;
    #&eprof_start('fetch_snp_by_id');
my $dnadb = Bio::EnsEMBL::Registry->get_DNAAdaptor($ENV{'ENSEMBL_SPECIES'}, 'glovar'); unless ($dnadb) { warn "ERROR: No dnadb attached to Glovar.\n"; return; } ## SNP query
my $q1 = qq( SELECT distinct(ss.id_snp) as internal_id, ss.default_name as id_default, ms.position as snp_start, ms.end_position as snp_end, ms.is_revcomp as snp_strand, scd.description as validated, ss.alleles as alleles, svd.description as snpclass, cd.chromosome as chr_name, sseq.id_sequence as nt_id, sseq.database_seqnname as seq_name, sseq.database_source as database_source FROM snp_sequence sseq, mapped_snp ms, snp, snpvartypedict svd, snp_confirmation_dict scd, snp_summary ss, chromosomedict cd WHERE ss.default_name = ? AND sseq.id_sequence = ms.id_sequence AND ms.id_snp = ss.id_snp AND ss.id_snp = snp.id_snp AND snp.var_type = svd.id_dict AND ss.confirmation_status = scd.id_dict AND sseq.chromosome = cd.id_dict AND sseq.is_current = 1 ); my @snps = (); my $sth1; eval { $sth1 = $self->prepare($q1); $sth1->execute($id); }; if ($@){ warn("ERROR: SQL failed in " . (caller(0))[3] . "\n$@"); return([]); } # loop over all SNP mappings found and pick one; mappings to clones
# take preference over mappings to NT_contigs
my (@seq_nt, @seq_clone); while (my $r = $sth1->fetchrow_hashref()) { return([]) unless keys %{$r}; if ($r->{'SEQ_NAME'} =~ /^NT/) { push @seq_nt, $r; } else { push @seq_clone, $r; } } # if more than one NT or clone mapping has been returned, something is
# wrong with snp_sequence.is_current, so print a warning
if (@seq_nt > 1 or @seq_clone > 1) { warning( "More than one mapping of SNP to NT_contig (" . join(", ", map { $_->{'SEQ_NAME'} } @seq_nt) . ") or clone (" . join(", ", map { $_->{'SEQ_NAME'} } @seq_clone) . ")." ); } # loop over all mappings returned; try clones first; you're done once
# you've managed to transform to chromosomal coords successfully
my $varfeat; SEQ: foreach my $row (@seq_clone, @seq_nt) { $row->{'SNP_END'} ||= $row->{'SNP_START'}; my $snp_start = $row->{'SNP_START'}; my $id_seq = $row->{'NT_ID'}; # get clone the SNP is on
my $q2 = qq( SELECT cs.database_seqname as embl_acc, csm.start_coordinate as clone_start, csm.end_coordinate as clone_end, csm.contig_orientation as clone_strand FROM clone_seq cs, clone_seq_map csm WHERE csm.id_sequence = '$id_seq' AND cs.id_cloneseq = csm.id_cloneseq AND (csm.start_coordinate < $snp_start) AND (csm.end_coordinate > $snp_start) ); my $sth2; eval { $sth2 = $self->prepare($q2); $sth2->execute(); }; if ($@){ warn("ERROR: SQL failed in " . (caller(0))[3] . "\n$@"); return([]); } my $j; CLONE: while (my ($embl_acc, $clone_start, $clone_end, $clone_strand) = $sth2->fetchrow_array()) { $j++; ## map to chromosome
# you might get more than one clone back from Glovar, since it
# contains the whole clones, not only the golden parts; if you
# managed to transform to chromosome, you already had the right
# one, so skip
next CLONE if ($varfeat); # get clone from core db
# if no clone was found, you've picked the wrong snp_sequence
# try the next one
my $clone = $dnadb->get_SliceAdaptor->fetch_by_region('clone', $embl_acc); next CLONE unless ($clone); # calculate clone coordinates for SNP
my ($start, $end); if ($clone_strand == 1) { $start = $row->{'SNP_START'} - $clone_start + 1; $end = $row->{'SNP_END'} - $clone_start + 1; } else { $start = $clone_end - $row->{'SNP_END'} + 1; $end = $clone_end - $row->{'SNP_START'} + 1; } next CLONE if ($start < 0); $varfeat = Bio::EnsEMBL::Variation::VariationFeature->new(-dbID => $row->{'INTERNAL_ID'}); $varfeat->slice($clone); $varfeat->start($start); $varfeat->end($end); $varfeat->strand($row->{'SNP_STRAND'}*$clone_strand); $varfeat = $varfeat->transform('chromosome'); } warn "WARNING: Multiple clones ($j) returned" if ($j > 1); # try next snp_sequence if you couldn't transform this one
next SEQ unless $varfeat; my $var = Bio::EnsEMBL::Variation::Variation->new(-dbID => $row->{'INTERNAL_ID'}); $varfeat->variation_name($id); $var->name($id); $varfeat->seq_region_name($row->{'CHR_NAME'}); $varfeat->source('Glovar'); $var->source('Glovar'); $var->adaptor($self); # alleles
$varfeat->allele_string($row->{'ALLELES'}); # temporary hack; change sql query to get data from snp_variation,
# allele_frequency, population
foreach my $al (split(/\|/, $row->{'ALLELES'})) { my $allele = Bio::EnsEMBL::Variation::Allele->new(-allele => $al); $var->add_Allele($allele); } # get flanking sequence from core
my $slice = $dnadb->get_SliceAdaptor->fetch_by_region( 'chromosome', $row->{'CHR_NAME'}, $varfeat->start - 25, $varfeat->end + 25, ); $slice = $slice->invert if ($row->{'SNP_STRAND'} == -1); my $seq = $slice->seq; # determine end of upstream sequence depending on range type (in-dels
# of type "between", i.e. start !== end, are actually inserts)
my $up_end = 25; $up_end++ if (($row->{'SNPCLASS'} eq "SNP - indel") && ($varfeat->start ne $varfeat->end)); $var->five_prime_flanking_seq(substr($seq, 0, $up_end)); $var->three_prime_flanking_seq(substr($seq, 26)); # consequences and DBLinks
$self->get_consequences($varfeat); $self->get_DBLinks($var); # validation state
$varfeat->add_validation_state($VSTATE_MAP{$row->{'VALIDATED'}}); $var->add_validation_state($VSTATE_MAP{$row->{'VALIDATED'}}); # population genotypes
# $self->get_population_genotypes($var);
# add variation to variationFeature
$varfeat->variation($var); } #&eprof_end('fetch_snp_by_id');
#&eprof_dump(\*STDERR);
$varfeat ? (return [$varfeat]) : (return([]));
}
fetch_all_by_clone_accessiondescriptionprevnextTop
sub fetch_all_by_clone_accession {
    my ($self, $embl_acc, $embl_version, $cl_start, $cl_end) = @_;

    #&eprof_start('clone_sql');
# get info on clone
my @cloneinfo = $self->fetch_clone_by_accession($embl_acc); return([]) unless (@cloneinfo); my ($nt_name, $id_seq, $clone_start, $clone_end, $clone_strand) = @cloneinfo; ## temporary hack for SNP density script: skip vega-specific clones
#unless ($nt_name =~ /^NT/) {
# warn "WARNING: Skipping vega-specific clone ($embl_acc).\n";
# return ([]);
#}
# now get the SNPs on this clone
# get only features in the desired region of the clone
my ($q_start, $q_end); if ($clone_strand == 1) { $q_start = $clone_start + $cl_start - 1; $q_end = $clone_start + $cl_end + 1; } else{ $q_start = $clone_end - $cl_end - 1; $q_end = $clone_end - $cl_start + 1; } my $q2 = qq( SELECT distinct(sgc.id_snp) as id_snp, ss.id_snp as internal_id, ss.default_name as id_default, ms.position as snp_start, ms.end_position as snp_end, ms.is_revcomp as snp_strand, scd.description as validated, ss.alleles as alleles, svd.description as snpclass, ss.is_private as private, ptd.description as pos_type, cs.design_entry as expt_id, sgc_dict.description as consequence FROM mapped_snp ms, snp, snpvartypedict svd, snp_confirmation_dict scd, snp_summary ss LEFT JOIN ( snp_gene_consequence sgc INNER JOIN coding_sequence cs on sgc.id_codingseq = cs.id_codingseq AND cs.design_entry = ? INNER JOIN sgc_dict on sgc.consequence = sgc_dict.id_dict ) on sgc.id_snp = ss.id_snp LEFT JOIN position_type_dict ptd on sgc.position_description = ptd.id_dict WHERE ms.id_sequence = ? AND ms.id_snp = ss.id_snp AND ss.id_snp = snp.id_snp AND snp.var_type = svd.id_dict AND ss.confirmation_status = scd.id_dict AND ms.position BETWEEN $q_start AND $q_end ); my $sth; eval { $sth = $self->prepare($q2); $sth->execute($self->consequence_exp, $id_seq); }; if ($@){ warn("ERROR: SQL failed in " . (caller(0))[3] . "\n$@"); return([]); } my (%varfeats, %cons); while (my $row = $sth->fetchrow_hashref()) { return([]) unless keys %{$row}; warn "WARNING: private data!" if $row->{'PRIVATE'}; ## filter SNPs without strand (this is gruft in mapped_snp)
next unless $row->{'SNP_STRAND'}; ## calculate coords depending on clone orientation
my ($start, $end); $row->{'SNP_END'} ||= $row->{'SNP_START'}; if ($clone_strand == 1) { $start = $row->{'SNP_START'} - $clone_start + 1; $end = $row->{'SNP_END'} - $clone_start + 1; } else { $start = $clone_end - $row->{'SNP_END'} + 1; $end = $clone_end -$row->{'SNP_START'} + 1; } my $strand = $row->{'SNP_STRAND'}*$clone_strand; my $key = join(":", $row->{'ID_DEFAULT'}, $start, $end, $strand); my $consequence_type = $CONSEQUENCE_TYPE_MAP{$row->{'POS_TYPE'}." ".$row->{'CONSEQUENCE'}} || '_'; my $cons_rank = $Bio::EnsEMBL::Variation::VariationFeature::CONSEQUENCE_TYPES{uc($consequence_type)} || 99; if ((!$cons{$key}) or ($cons_rank < $cons{$key})) { # VariationFeature
my $varfeat = Bio::EnsEMBL::Variation::VariationFeature->new_fast( { 'dbID' => $row->{'INTERNAL_ID'}, 'adaptor' => $self, 'variation_name' => $row->{'ID_DEFAULT'}, 'start' => $start, 'end' => $end, 'strand' => $strand, 'allele_string' => $row->{'ALLELES'}, 'source' => 'Glovar', 'validation_code' => [ $VSTATE_MAP{$row->{'VALIDATED'}} ], 'consequence_type' => [ $consequence_type ], }); # add minimal Variation object (needed for DBLinks)
my $var = Bio::EnsEMBL::Variation::Variation->new( -dbID => $row->{'INTERNAL_ID'}, -ADAPTOR => $self, -NAME => $row->{'ID_DEFAULT'}, -SOURCE => 'Glovar', ); $self->get_DBLinks($var); $varfeat->variation($var); $cons{$key} = $cons_rank; $varfeats{$key} = $varfeat; } } #&eprof_end('clone_sql');
#&eprof_dump(\*STDERR);
return [values %varfeats];
}
get_DBLinksdescriptionprevnextTop
sub get_DBLinks {
    my ($self, $var) = @_;
    my $q = qq(
        SELECT
                snp_name.SNP_NAME               as NAME,
                snpnametypedict.DESCRIPTION     as TYPE
        FROM    
                snp_name,
                snpnametypedict
        WHERE   snp_name.ID_SNP = ?
        AND     snp_name.SNP_NAME_TYPE = snpnametypedict.ID_DICT
    );
    my $sth;
    eval {
        $sth = $self->prepare($q);
        $sth->execute($var->dbID);
    }; 
    if ($@){
        warn("ERROR: SQL failed in " . (caller(0))[3] . "\n$@");
        return;
    }

    while (my $xref = $sth->fetchrow_hashref()) {
        $var->add_synonym($xref->{'TYPE'}, $xref->{'NAME'});
    }
}
get_consequencesdescriptionprevnextTop
sub get_consequences {
    my ($self, $varfeat) = @_;
    my $q = qq(
        SELECT
                ptd.description         as pos_type,
                sgc_dict.description    as consequence,
                cs.name                 as transcript_stable_id,
                sgc.transcript_position as cdna_start
        FROM    
                coding_sequence cs,
                snp_gene_consequence sgc
        LEFT JOIN
                position_type_dict ptd on sgc.position_description = ptd.id_dict
        LEFT JOIN
                sgc_dict on sgc.consequence = sgc_dict.id_dict
        WHERE   sgc.id_snp = ?
        AND     sgc.id_codingseq = cs.id_codingseq
        AND     cs.design_entry = ?
    );
    my $sth;
    eval {
        $sth = $self->prepare($q);
        $sth->execute($varfeat->{'dbID'}, $self->consequence_exp);
    }; 
    if ($@){
        warn("ERROR: SQL failed in " . (caller(0))[3] . "\n$@");
        return;
    }

    while (my $row = $sth->fetchrow_hashref) {
        # add consequence
my $consequence_type = $CONSEQUENCE_TYPE_MAP{$row->{'POS_TYPE'}." ".$row->{'CONSEQUENCE'}}; my $key = join(":", $row->{'TRANSCRIPT_STABLE_ID'}, $row->{'CDNA_START'}, $consequence_type); # only add consequence once (workaround for duplicates in db)
my $found_tvar; foreach my $oldtvar (@{ $varfeat->get_all_TranscriptVariations || [] }) { if (join(":", $oldtvar->transcript->stable_id, $oldtvar->cdna_start, $oldtvar->consequence_type ) eq $key) { $found_tvar = 1; } } unless ($found_tvar) { $varfeat->add_consequence_type( [$consequence_type] ); # add TranscriptVariation object
my $trans = Bio::EnsEMBL::Transcript->new( -STABLE_ID => $row->{'TRANSCRIPT_STABLE_ID'}, ); my $tvar = Bio::EnsEMBL::Variation::TranscriptVariation->new_fast({ transcript => $trans, cdna_start => $row->{'CDNA_START'}, cdna_end => $row->{'CDNA_START'}, consequence_type => [ $consequence_type ], }); $varfeat->add_TranscriptVariation($tvar); } }
}
get_population_genotypesdescriptionprevnextTop
sub get_population_genotypes {
    my ($self, $var) = @_;
    unless ($var->isa('Bio::EnsEMBL::Variation::Variation')) {
        throw("You must provide a Bio::EnsEMBL::Variation::Variation object.");
    }
    my $q = qq(
        SELECT
                sv.var_string       as allele,
                af.frequency        as frequency,
                p.description       as pop_name,
                ao.sample_size      as sample_size
        FROM
                snp_variation sv,
                allele_frequency af,
                assay_outcome ao,
                population p
        WHERE   sv.id_snp = ?
        AND     sv.id_var = af.id_var
        AND     af.id_outcome = ao.id_outcome
        AND     ao.id_pop = p.id_pop
    );
    my $sth;
    eval {
        $sth = $self->prepare($q);
        $sth->execute($var->dbID);
    }; 
    if ($@){
        warn("ERROR: SQL failed in " . (caller(0))[3] . "\n$@");
        return;
    }

    my $pop;
    while (my $row = $sth->fetchrow_hashref) {
        # collect genotypes by population
$pop->{$row->{'POP_NAME'}}->{'sample_size'} = $row->{'SAMPLE_SIZE'}; push @{ $pop->{$row->{'POP_NAME'}}->{'frequency'} }, $row->{'FREQUENCY'}; push @{ $pop->{$row->{'POP_NAME'}}->{'alleles'} }, $row->{'ALLELE'}; } foreach my $p (keys %$pop) { my $population = Bio::EnsEMBL::Variation::Population->new( -dbID => $p, -NAME => $p, -SIZE => $pop->{$p}->{'size'}, ); # add PopulationGenotype for each genotype
# genotype frequencies are calculated by combinatorics from allele
# frequencies, which is biologically not correct.
# ToDo: find the right data in the database
my ($allele1, $allele2) = @{ $pop->{$p}->{'alleles'} }; my ($freq1, $freq2) = @{ $pop->{$p}->{'frequency'} }; # homozygous genotypes
my $pop_genotype = Bio::EnsEMBL::Variation::PopulationGenotype->new( -dbID => $p, -ALLELE1 => $allele1, -ALLELE2 => $allele1, -FREQUENCY => $freq1**2, ); $pop_genotype->population($population); $var->add_PopulationGenotype($pop_genotype); my $pop_genotype = Bio::EnsEMBL::Variation::PopulationGenotype->new( -dbID => $p, -ALLELE1 => $allele2, -ALLELE2 => $allele2, -FREQUENCY => $freq2**2, ); $pop_genotype->population($population); $var->add_PopulationGenotype($pop_genotype); # heterozygous genotype
my $pop_genotype = Bio::EnsEMBL::Variation::PopulationGenotype->new( -dbID => $p, -ALLELE1 => $allele1, -ALLELE2 => $allele2, -FREQUENCY => $freq1*$freq2*2, ); $pop_genotype->population($population); $var->add_PopulationGenotype($pop_genotype); }
}
get_source_versiondescriptionprevnextTop
sub get_source_version {
    return undef;
}
track_namedescriptionprevnextTop
sub track_name {
    return("GlovarSNP");
}

1;
}
General documentation
AUTHORSTop
Tony Cox <avc@sanger.ac.uk>
Patrick Meidl <pm2@sanger.ac.uk>
CONTACTTop
Post questions to the EnsEMBL development list ensembl-dev@ebi.ac.uk