Bio::EnsEMBL::ExternalData::SangerSNP VariationAdaptor
SummaryIncluded librariesPackage variablesSynopsisGeneral documentationMethods
Toolbar
WebCvsRaw content
Summary
Bio::EnsEMBL::ExternalData::SangerSNP::VariationAdaptor
Package variables
No package variables defined.
Included modules
Bio::EnsEMBL::Analysis
Bio::EnsEMBL::DBSQL::BaseAdaptor
Bio::EnsEMBL::External::ExternalFeatureAdaptor
Bio::EnsEMBL::ExternalData::Variation
Bio::EnsEMBL::SNP
Bio::EnsEMBL::Variation::Variation
Bio::EnsEMBL::Variation::VariationFeature
Inherit
Bio::EnsEMBL::DBSQL::BaseAdaptor Bio::EnsEMBL::External::ExternalFeatureAdaptor
Synopsis
A SNP adaptor which sits over the Sanger SNP database. Provides a means of
getting SNPs out of the Sanger SNP database as
Bio::EnsEMBL::Variation::VariationFeature objects.
Description
No description!
Methods
coordinate_systems
No description
Code
fetch_all_by_chr_start_end
No description
Code
fetch_all_by_dbID
No description
Code
fetch_by_dbID_position_range
No description
Code
Methods description
None available.
Methods code
coordinate_systemsdescriptionprevnextTop
sub coordinate_systems {
  return ("ASSEMBLY");
}
fetch_all_by_chr_start_enddescriptionprevnextTop
sub fetch_all_by_chr_start_end {
  my ($self,$chr,$start,$end) = @_;

  my $assembly = $self->ensembl_db->get_CoordSystemAdaptor->fetch_all->[0]->version();
  
  (my $assembly_name = $assembly) =~ s/[0-9]*$//;
  (my $assembly_version = $assembly) =~ s/[A-Z,a-z]*([0-9]*)$/$1/;

  my $query = qq  {
SELECT MAPPED_SNP.ID_SNP,  
          (MAPPED_SNP.POSITION + SEQ_SEQ_MAP.START_COORDINATE -1) AS snppos,
          (MAPPED_SNP.END_POSITION + SEQ_SEQ_MAP.START_COORDINATE -1) AS snpendpos,
          (MAPPED_SNP.IS_REVCOMP * SEQ_SEQ_MAP.CONTIG_ORIENTATION) AS snpstrand,
           CHROM_SEQ.DATABASE_SEQNAME as chrname,
           SNP_SUMMARY.ALLELES,
           SNP_SUMMARY.DEFAULT_NAME
FROM     DATABASE_DICT,
         CHROM_SEQ,
         SEQ_SEQ_MAP,
         MAPPED_SNP,
         SNP_SUMMARY
WHERE     DATABASE_DICT.DATABASE_NAME = '$assembly_name'
    AND   DATABASE_DICT.DATABASE_VERSION = '$assembly_version'
    AND   CHROM_SEQ.DATABASE_SOURCE = DATABASE_DICT.ID_DICT
    AND   CHROM_SEQ.IS_CURRENT = 1
    AND   CHROM_SEQ.DATABASE_SEQNAME='$chr'
    AND   CHROM_SEQ.ID_CHROMSEQ = SEQ_SEQ_MAP.ID_CHROMSEQ
    AND   MAPPED_SNP.ID_SEQUENCE =SEQ_SEQ_MAP.SUB_SEQUENCE
    AND   SNP_SUMMARY.ID_SNP = MAPPED_SNP.ID_SNP
    AND   MAPPED_SNP.IGNORE_REASON IS NULL
    AND   MAPPED_SNP.IS_REVCOMP IS NOT NULL
    AND   (MAPPED_SNP.POSITION + SEQ_SEQ_MAP.START_COORDINATE -1) BETWEEN $start AND $end
ORDER BY MAPPED_SNP.ID_SNP, SNPPOS
  };

  my $sth = $self->prepare($query);

  # print $sth->{Statement} . "\n";
$sth->execute; # print "Query finished\n";
my @snps; # Naughty but should speed things up a bit
my $cur_snp_id = -1; my $snp; my %ids; my $hashref; while ($hashref = $sth->fetchrow_hashref) { if ($hashref->{SNPSTRAND} != 1 && $hashref->{SNPSTRAND} != -1) { print STDERR "Got non 1 or -1 strand for " . $hashref->{ID_SNP} . "\n"; } my $start; my $end; if ($hashref->{SNPPOS} >= $hashref->{SNPENDPOS} || ($hashref->{ALLELES} =~ /-/ && abs($hashref->{SNPPOS}-$hashref->{SNPENDPOS})==1)) { $start = $hashref->{SNPENDPOS}; $end = $hashref->{SNPPOS}; } else { $start = $hashref->{SNPPOS}; $end = $hashref->{SNPENDPOS}; } if (exists($ids{$hashref->{ID_SNP} . ":" .$start})) { print STDERR "Warning: Skipping duplicate for " . $hashref->{ID_SNP} . " at $start\n"; next; } my $varfeat = Bio::EnsEMBL::Variation::VariationFeature->new_fast( { 'dbID' => $hashref->{ID_SNP}, 'adaptor' => $self, 'variation_name' => $hashref->{DEFAULT_NAME}, 'start' => $start, 'end' => $end, 'strand' => $hashref->{SNPSTRAND}, 'allele_string' => $hashref->{ALLELES}, 'source' => 'SangerSNP', }); $varfeat->slice($self->ensembl_db->get_SliceAdaptor->fetch_by_region('chromosome', $hashref->{CHRNAME})); # add minimal Variation object
my $var = Bio::EnsEMBL::Variation::Variation->new( -dbID => $hashref->{'ID_SNP'}, -ADAPTOR => $self, -NAME => $hashref->{'DEFAULT_NAME'}, -SOURCE => 'Glovar', ); # my %snp_hash;
# if ($hashref->{SNPPOS} >= $hashref->{SNPENDPOS} ||
# ($hashref->{ALLELES} =~ /-/ && abs($hashref->{SNPPOS}-$hashref->{SNPENDPOS})==1)) {
# $snp_hash{_gsf_start} = $hashref->{SNPENDPOS};
# $snp_hash{_gsf_end} = $hashref->{SNPPOS};
# } else {
# $snp_hash{_gsf_start} = $hashref->{SNPPOS};
# $snp_hash{_gsf_end} = $hashref->{SNPENDPOS};
# }
# if ($hashref->{SNPSTRAND} != 1 && $hashref->{SNPSTRAND} != -1) {
# print STDERR "Got non 1 or -1 strand\n";
# }
push @snps,$varfeat; $ids{$hashref->{ID_SNP} . ":" .$start} = 1; } return\@ snps;
}
fetch_all_by_dbIDdescriptionprevnextTop
sub fetch_all_by_dbID {
  my ($self,$dbID) = @_;

  my $assembly = $self->ensembl_db->assembly_type;
  
  (my $assembly_name = $assembly) =~ s/[0-9]*$//;
  (my $assembly_version = $assembly) =~ s/[A-Z,a-z]*([0-9]*)$/$1/;

  my $query = qq  {
SELECT DISTINCT MAPPED_SNP.ID_SNP,  
          (MAPPED_SNP.POSITION + SEQ_SEQ_MAP.START_COORDINATE -1) AS snppos,
          (MAPPED_SNP.END_POSITION + SEQ_SEQ_MAP.START_COORDINATE -1) AS snpendpos,
          (MAPPED_SNP.IS_REVCOMP * SEQ_SEQ_MAP.CONTIG_ORIENTATION) AS snpstrand,
           CHROM_SEQ.DATABASE_SEQNAME as chrname,
           SNP_SUMMARY.ALLELES,
           SNP_SUMMARY.DEFAULT_NAME
FROM     DATABASE_DICT,
         CHROM_SEQ,
         SEQ_SEQ_MAP,
         MAPPED_SNP,
         SNP_SUMMARY
WHERE     DATABASE_DICT.DATABASE_NAME = '$assembly_name'
    AND   DATABASE_DICT.DATABASE_VERSION = '$assembly_version'
    AND   CHROM_SEQ.DATABASE_SOURCE = DATABASE_DICT.ID_DICT
    AND   CHROM_SEQ.IS_CURRENT = 1
    AND   CHROM_SEQ.ID_CHROMSEQ = SEQ_SEQ_MAP.ID_CHROMSEQ
    AND   MAPPED_SNP.ID_SEQUENCE =SEQ_SEQ_MAP.SUB_SEQUENCE
    AND   SNP_SUMMARY.ID_SNP = MAPPED_SNP.ID_SNP
    AND   MAPPED_SNP.IS_REVCOMP IS NOT NULL
    AND   SNP_SUMMARY.ID_SNP = $dbID
ORDER BY MAPPED_SNP.ID_SNP, SNPPOS
  };

  my $sth = $self->prepare($query);

  #print $sth->{Statement} . "\n";
$sth->execute; # print "Query finished\n";
my @snps; # Naughty but should speed things up a bit
my $cur_snp_id = -1; my $snp; my %ids; my $hashref; while ($hashref = $sth->fetchrow_hashref) { my $start; my $end; if ($hashref->{SNPPOS} >= $hashref->{SNPENDPOS} || ($hashref->{ALLELES} =~ /-/ && abs($hashref->{SNPPOS}-$hashref->{SNPENDPOS})==1)) { $start = $hashref->{SNPENDPOS}; $end = $hashref->{SNPPOS}; } else { $start = $hashref->{SNPPOS}; $end = $hashref->{SNPENDPOS}; } my $varfeat = Bio::EnsEMBL::Variation::VariationFeature->new_fast( { 'dbID' => $hashref->{ID_SNP}, 'adaptor' => $self, 'variation_name' => $hashref->{DEFAULT_NAME}, 'start' => $start, 'end' => $end, 'strand' => $hashref->{SNPSTRAND}, 'allele_string' => $hashref->{ALLELES}, 'source' => 'SangerSNP', }); $varfeat->slice($self->ensembl_db->get_SliceAdaptor->fetch_by_region('chromosome',$hashref->{CHRNAME})); # add minimal Variation object
my $var = Bio::EnsEMBL::Variation::Variation->new( -dbID => $hashref->{'ID_SNP'}, -ADAPTOR => $self, -NAME => $hashref->{'DEFAULT_NAME'}, -SOURCE => 'Glovar', ); push @snps,$varfeat; } return\@ snps; } 1;
}
fetch_by_dbID_position_rangedescriptionprevnextTop
sub fetch_by_dbID_position_range {
  my ($self,$dbID,$range_chr,$range_start,$range_end) = @_;

  my $assembly = $self->ensembl_db->assembly_type;
  
  (my $assembly_name = $assembly) =~ s/[0-9]*$//;
  (my $assembly_version = $assembly) =~ s/[A-Z,a-z]*([0-9]*)$/$1/;

  my $query = qq  {
SELECT DISTINCT MAPPED_SNP.ID_SNP,  
          (MAPPED_SNP.POSITION + SEQ_SEQ_MAP.START_COORDINATE -1) AS snppos,
          (MAPPED_SNP.END_POSITION + SEQ_SEQ_MAP.START_COORDINATE -1) AS snpendpos,
          (MAPPED_SNP.IS_REVCOMP * SEQ_SEQ_MAP.CONTIG_ORIENTATION) AS snpstrand,
           CHROM_SEQ.DATABASE_SEQNAME as chrname,
           SNP_SUMMARY.ALLELES,
           SNP_SUMMARY.DEFAULT_NAME
FROM     DATABASE_DICT,
         CHROM_SEQ,
         SEQ_SEQ_MAP,
         MAPPED_SNP,
         SNP_SUMMARY
WHERE     DATABASE_DICT.DATABASE_NAME = '$assembly_name'
    AND   DATABASE_DICT.DATABASE_VERSION = '$assembly_version'
    AND   CHROM_SEQ.DATABASE_SOURCE = DATABASE_DICT.ID_DICT
    AND   CHROM_SEQ.IS_CURRENT = 1
    AND   CHROM_SEQ.ID_CHROMSEQ = SEQ_SEQ_MAP.ID_CHROMSEQ
    AND   MAPPED_SNP.ID_SEQUENCE =SEQ_SEQ_MAP.SUB_SEQUENCE
    AND   SNP_SUMMARY.ID_SNP = MAPPED_SNP.ID_SNP
    AND   MAPPED_SNP.IS_REVCOMP IS NOT NULL
    AND   SNP_SUMMARY.ID_SNP = $dbID
ORDER BY MAPPED_SNP.ID_SNP, SNPPOS
  };

  my $sth = $self->prepare($query);

  #print $sth->{Statement} . "\n";
$sth->execute; # print "Query finished\n";
my @snps; # Naughty but should speed things up a bit
my $cur_snp_id = -1; my $snp; my %ids; my $hashref; while ($hashref = $sth->fetchrow_hashref) { my $start; my $end; if ($hashref->{SNPPOS} >= $hashref->{SNPENDPOS} || ($hashref->{ALLELES} =~ /-/ && abs($hashref->{SNPPOS}-$hashref->{SNPENDPOS})==1)) { $start = $hashref->{SNPENDPOS}; $end = $hashref->{SNPPOS}; } else { $start = $hashref->{SNPPOS}; $end = $hashref->{SNPENDPOS}; } if ($hashref->{CHRNAME} ne $range_chr || $start < $range_start || $start > $range_end) { #print "Outside range ($range_chr,$range_start,$range_end) for id $dbID " . $hashref->{CHRNAME} . " $start $end\n";
next; } #print "In range ($range_chr,$range_start,$range_end) for id $dbID " . $hashref->{CHRNAME} . " $start $end\n";
my $varfeat = Bio::EnsEMBL::Variation::VariationFeature->new_fast( { 'dbID' => $hashref->{ID_SNP}, 'adaptor' => $self, 'variation_name' => $hashref->{DEFAULT_NAME}, 'start' => $start, 'end' => $end, 'strand' => $hashref->{SNPSTRAND}, 'allele_string' => $hashref->{ALLELES}, 'source' => 'SangerSNP', }); $varfeat->slice($self->ensembl_db->get_SliceAdaptor->fetch_by_region('chromosome',$hashref->{CHRNAME})); # add minimal Variation object
my $var = Bio::EnsEMBL::Variation::Variation->new( -dbID => $hashref->{'ID_SNP'}, -ADAPTOR => $self, -NAME => $hashref->{'DEFAULT_NAME'}, -SOURCE => 'Glovar', ); push @snps,$varfeat; } if (scalar(@snps) > 1) { print STDERR "Got multiple vars for $dbID - only returning 1\n"; } return $snps[0];
}
General documentation
CONTACTTop
Post questions to the EnsEMBL developer list: <ensembl-dev@ebi.ac.uk>
APPENDIXTop