Raw content of Bio::EnsEMBL::ExternalData::SNPSQL::SNPAdaptor # EnsEMBL Gene reading writing adaptor for mySQL # # Copyright EMBL-EBI 2002 # # Author: Heikki Lehvaslaiho # # Date : 09.08.2002 # =head1 NAME Bio::EnsEMBL::ExternalData::SNPSQL::SNPAdaptor =head1 SYNOPSIS A SNP adaptor which sits over a SNP database. Provides a means of getting SNPs out of a SNP database as Bio::EnsEMBL::ExternalData::Variation objects. =head1 CONTACT Post questions to the EnsEMBL developer list: <ensembl-dev@ebi.ac.uk> =head1 APPENDIX =cut use strict; package Bio::EnsEMBL::ExternalData::SNPSQL::SNPAdaptor; use Bio::EnsEMBL::DBSQL::BaseAdaptor; use Bio::EnsEMBL::ExternalData::Variation; use Bio::EnsEMBL::ExternalData::Population; use Bio::EnsEMBL::ExternalData::Frequency; use Bio::EnsEMBL::SNP; use Bio::EnsEMBL::Utils::Eprof qw( eprof_start eprof_end); use Bio::EnsEMBL::External::ExternalFeatureAdaptor; use vars '@ISA'; @ISA = qw(Bio::EnsEMBL::DBSQL::BaseAdaptor Bio::EnsEMBL::External::ExternalFeatureAdaptor ); =head2 fetch_attributes_only Arg [1] : int refsnp_id Arg [2] : (optional) string source Example : none Description: Retrieves a snp objcet from the SNP database but does not populate the location information. This is necessary given the current state of the snp database because location information has to be retrieved differently for different species! Returntype : Bio::EnsEMBL::SNP Exceptions : none Caller : snpview =cut sub fetch_attributes_only{ my $self = shift; my $refsnp_id = shift; my $source = shift || 'dbSNP'; my $sth = $self->prepare(' SELECT refsnp.internal_id, refsnp.snpclass, refsnp.snptype, refsnp.observed, refsnp.seq5, refsnp.seq3, refsnp.het, refsnp.hetse, refsnp.validated, refsnp.mapweight, refsnp.hapmap_snp, ds.version FROM RefSNP refsnp, DataSource ds WHERE refsnp.id = ? AND ds.id = refsnp.datasource AND ds.datasource = ?'); $sth->execute("$refsnp_id", $source); $sth->rows || $self->throw("snp with refsnp_id/src [$refsnp_id/$source]" . "not found in database"); my ($dbID, $snp_class, $snp_type, $alleles, $seq5, $seq3, $het, $hetse, $confirmed, $mapweight, $hapmap_snp, $source_version) = $sth->fetchrow_array; $sth->finish; # use the right vocabulary for the SNP status if ($confirmed eq 'no-info') { $confirmed = "suspected"; } else { $confirmed =~ s/-/ /; $confirmed = "proven $confirmed"; } # the allele separator should be '|' $alleles =~ s/\//\|/g; #prune flank sequences to 25 nt $seq5 = substr($seq5, -25, 25); $seq3 = substr($seq3, 0, 25); #add Ns to length of 25; $seq3 .= 'N' x ( 25 - length $seq3 ) if length($seq3) < 25 ; $seq5 = ('N' x ( 25 - length $seq5 ) ). $seq5 if length($seq5) < 25 ; my $snp = Bio::EnsEMBL::SNP->new; $snp->dbID($dbID); $snp->source_tag($source); $snp->source_version($source_version); $snp->status($confirmed); $snp->alleles($alleles); $snp->upStreamSeq($seq5); $snp->dnStreamSeq($seq3); $snp->score($mapweight); $snp->het($het); $snp->hetse($hetse); $snp->hapmap_snp($hapmap_snp); #DBLink my $link = new Bio::Annotation::DBLink; $link->database('dbSNP'); $link->primary_id($refsnp_id); $snp->add_DBLink($link); #get alternative IDs $sth = $self->prepare(" SELECT subsnp.handle, subsnp.altid FROM SubSNP as subsnp WHERE subsnp.internal_id = ?"); $sth->execute($dbID); while(my ($handle, $altid) = $sth->fetchrow_array) { my $link = new Bio::Annotation::DBLink; $link->database($handle); $link->primary_id($altid); #add dbXref to Variation $snp->add_DBLink($link); } # Add Genotypes (Variation objects) to the SNP # fc1 & jws 2004 eval { foreach my $genotype (@{$self->fetch_genotype_by_SNP_id($snp->id)}){ $snp->add_genotype($genotype); } }; # Add Population and Frequency (allele frequency) objects to the SNP foreach my $pop (@{$self->fetch_pops_by_SNP_id($snp->id)}) { my $freqs = undef; eval { $freqs = $self->fetch_freqs_by_pop_SNP_id($pop->pop_id, $snp->id); }; next if $@; next unless @$freqs; foreach my $freq (@$freqs) { $pop->add_frequency($freq); } $snp->add_population($pop); } return $snp; } =head2 fetch_by_SNP_id Arg [1] : int $refsnpid The refsnp identifier of the snp to retrieve Arg [2] : string $source The source string of the snp to retrieve Example : @snps = @{$snp_adaptor->fetch_by_SNP_id($refsnpid, 'dbSNP')}; Description: Retreives a snp via its refsnp identifier and datasource. One variation object is returned per mapped location of the snp. Returntype : Bio::EnsEMBL::Variation Exceptions : none Caller : internal =cut sub fetch_by_SNP_id { my ($self, $refsnpid, $source) = @_; unless($refsnpid && $source) { die("Both refsnpid and source arguments are required"); } my $sth = $self->prepare(' SELECT STRAIGHT_JOIN refsnp.internal_id, refsnp.id, hit.acc, hit.version, hit.start, hit.end, hit.type, hit.strand, refsnp.snpclass, refsnp.snptype, refsnp.observed, refsnp.seq5, refsnp.seq3, refsnp.het, refsnp.hetse, refsnp.validated, refsnp.mapweight, ds.datasource FROM RefSNP refsnp, DataSource ds left join Hit hit on hit.internal_id = refsnp.internal_id WHERE ds.id = refsnp.datasource AND refsnp.id = ? AND ds.datasource = ?'); $sth->execute("$refsnpid", $source); $sth->rows || $self->throw("$source $refsnpid not in database or not mapped to contig"); my $arr; my @variations = (); while ($arr = $sth->fetchrow_arrayref) { my ($internal_id, $dbsnp_id, $acc, $ver, $begin, $end, $postype, $strand, $class, $type, $alleles, $seq5, $seq3, $het, $hetse, $confirmed, $mapweight, $source ) = @$arr; #snp info not valid $self->throw("SNP withdrawn. Reason: $type ") if ($type && $type ne 'notwithdrawn'); # use the right vocabulary for the SNP status if ($confirmed eq 'no-info') { $confirmed = "suspected"; } else { $confirmed =~ s/-/ /; $confirmed = "proven $confirmed"; } # the allele separator should be '|' $alleles =~ s/\//\|/g; #prune flank sequences to 25 nt $seq5 = substr($seq5, -25, 25); $seq3 = substr($seq3, 0, 25); #add Ns to length of 25; $seq3 .= 'N' x ( 25 - length $seq3 ) if length($seq3) < 25 ; $seq5 = ('N' x ( 25 - length $seq5 ) ). $seq5 if length($seq5) < 25 ; #create output objects my $acc_version = ''; $acc_version .= uc $acc if $acc; $acc_version .= ".$ver" if $ver; my $snp = new Bio::EnsEMBL::ExternalData::Variation; if ($acc_version) { $snp->seqname($acc_version); $snp->start($begin); $snp->end($end); $snp->strand($strand); $snp->original_strand($strand); } $snp->source_tag($source); $snp->status($confirmed); $snp->alleles($alleles); $snp->upStreamSeq($seq5); $snp->dnStreamSeq($seq3); $snp->score($mapweight); $snp->het($het); $snp->hetse($hetse); #DBLink my $link = new Bio::Annotation::DBLink; $link->database('dbSNP'); $link->primary_id($dbsnp_id); #add dbXref to Variation $snp->add_DBLink($link); #get alternative IDs my $sth2 = $self->prepare(" SELECT subsnp.handle, subsnp.altid FROM SubSNP as subsnp WHERE subsnp.internal_id = ?"); $sth2->execute($internal_id); while( (my $arr2 = $sth2->fetchrow_arrayref()) ) { my ($handle, $altid) = @{$arr2}; my $link = new Bio::Annotation::DBLink; $link->database($handle); $link->primary_id($altid); #add dbXref to Variation $snp->add_DBLink($link); } push @variations, $snp; } return \@variations; } =head2 fetch_genotype_by_SNP_id Arg [1] : int $refsnpid The refsnp identifier of the snp to retrieve genotypes Example : @snps = @{$snp_adaptor->fetch_genotype_by_SNP_id($refsnpid)}; Description: Retreives a variation object via its refsnp identifier One variation object is returned for genotype data associated with that snp Returntype : Bio::EnsEMBL::Variation Exceptions : none Caller : internal =cut sub fetch_genotype_by_SNP_id { my ($self, $refsnpid) = @_; my $sth = $self->prepare(' SELECT refsnp.id, Strain.ssid, Strain.name, Strain.allele, GTInd.sex, GTInd.source, GTInd.source_ind_id FROM RefSNP refsnp, Strain Strain, GTInd GTInd WHERE refsnp.internal_id = Strain.internal_id and Strain.ind_id = GTInd.ind_id AND refsnp.id = ?'); $sth->execute("$refsnpid") || $self->throw("$refsnpid not in database or don't have genotype data"); my $arr; my @snps = (); while ($arr = $sth->fetchrow_arrayref) { my ($refsnpid, $ssid, $strain_name, $strain_alleles, $sex, $gt_source, $gt_source_ind_id ) = @$arr; my $snp = new Bio::EnsEMBL::ExternalData::Variation; $snp->snpid($refsnpid); $snp->ssid($ssid); $snp->strain_name($strain_name); $snp->strain_alleles($strain_alleles); $snp->sex($sex); $snp->gt_source($gt_source); $snp->gt_source_ind_id($gt_source_ind_id); push @snps, $snp; } return \@snps; } =head2 fetch_pops_by_SNP_id Arg 1 : int $refsnpid Use refsnp identifier of the snp to retrieve the populations in which it has been genotyped. Example : @pops = @{$snp_adaptor->fetch_pop_by_SNP_id($refsnpid)}; Description: Retreives a population objects for a given refSNP id Returntype : Array ref of Bio::EnsEMBL::ExternalData::Population objects Exceptions : none Caller : internal =cut sub fetch_pops_by_SNP_id { my ($self, $refsnpid) = @_; my $sth = $self->prepare(' SELECT straight_join sp.id, sp.name, sp.class, sp.samplesize FROM RefSNP rs, SubSNP ss, SubPop sp WHERE rs.internal_id = ss.internal_id AND ss.id=sp.ssid AND sp.batchtype="ALE" AND rs.id = ?'); $sth->execute($refsnpid) || $self->throw("$refsnpid not in database or don't have genotype data"); my $arr; my @pops = (); while ($arr = $sth->fetchrow_arrayref) { my ($pop_id, $name, $region, $sample_size) = @$arr; my $pop = new Bio::EnsEMBL::ExternalData::Population; $pop->pop_id($pop_id); $pop->name($name); $pop->region($region); $pop->sample_size($sample_size); push @pops, $pop; } return \@pops; } =head2 fetch_freqs_by_pop_SNP_id Arg 1 : int $refsnpid Arg 2 : int population ID Use refsnp identifier and population ID to retrieve the allele frequencies Example : @freqs = @{$snp_adaptor->fetch_freqs_by_pop_SNP_id($pop_id,$refsnpid)}; Description: Retreives frequency population objects for a given refSNP id Returntype : Array ref of Bio::EnsEMBL::ExternalData::Frequency objects Exceptions : none Caller : internal =cut sub fetch_freqs_by_pop_SNP_id { my ($self, $pop_id, $refsnpid) = @_; my $sth = $self->prepare(' SELECT straight_join f.snpallele, f.otherallele, f.freq, f.count, f.ssid, f.batchid FROM RefSNP rs, SubSNP ss, Freq f WHERE rs.internal_id = ss.internal_id AND ss.id=f.ssid AND f.type="ALE" AND rs.id=? AND f.popid=? ORDER BY f.batchid;'); $sth->execute($refsnpid, $pop_id); $sth->rows || $self->throw("$refsnpid not in database or don't have frequency data"); my $arr; my @freqs = (); while ($arr = $sth->fetchrow_arrayref) { my ($snpallele, $otherallele, $frequency, $count, $ssid,$batch_id) = @$arr; my $freq = new Bio::EnsEMBL::ExternalData::Frequency; my $allele = $snpallele || $otherallele; $freq->allele($allele); $freq->frequency($frequency); $freq->count($count); $freq->ssid($ssid); $freq->batch_id($batch_id); push @freqs, $freq; } return \@freqs; } =head2 fetch_slice_strand_by_ssid Arg 1 : int $ssid Arg 2 : $slice Use $ssid and $slice to retrieve the slice strand of given $ssid Example : my $slice_strand = $snp_adaptor->fetch_slice_strand_by_ssid($ssid,$slice) Description: Retreives slice strand for a given SubSNP id Returntype : int $slice_strand Exceptions : none Caller : internal =cut sub fetch_slice_strand_by_ssid { my ($self, $ssid, $slice) = @_; my $slice_name = $slice->name; my @names = split /\:/, $slice_name; my $slice_strand = $names[-1]; my $sth = $self->prepare(' SELECT ch.strand as ref_strand, ss.strand_to_rs as ss_strand FROM ContigHit ch, SubSNP ss WHERE ch.internal_id = ss.internal_id and ss.id = ?;'); $sth->execute($ssid)|| $self->throw("The SubSNP table don't have strand_to_rs column?"); my ($ref_strand, $ss_strand) = $sth->fetchrow; my $ssid_slice_strand = $slice_strand * $ref_strand * $ss_strand; return $ssid_slice_strand; } =head2 fetch_by_clone_accession_vesion Title : fetch_by_clone_accession_version Usage : fetch_by_clone_accession_version($embl_accession_number, $sequence_version,$start,$end); Function: The semantics of this method is as follows: $sequence_version - embl/genbank sequence version $embl_accession_number - the embl/genbank accession number The $start/$end can be ignored, but methods can take advantage of it. This is so that ensembl can ask for features only on a region of DNA, and if desired, the external database can respond with features only in this region, rather than the entire sequence. The hope is that the second method could potentially have a very complex set of mappings of other embl_accession numbers to one embl_accession number and provide the complex mapping. Example : Returns : list of Bio::SeqFeature::Variation objects Args : $embl_accession_number, $sequence_version, $start of range, optional $end of range, optional =cut sub fetch_by_clone_accession_version { my($self) = shift; my ($acc, $ver, $start, $stop) = @_; #lists of variations to be returned my @variations; #sanity checks if ( ! defined $acc) { $self->throw("Two arguments are requided: embl_accession number and version_number!"); } if ( ! defined $ver) { $self->throw("Two arguments are required: embl_accession number and version_number!"); } if (defined $start) { $start = 1 if $start eq ""; if ( $start !~ /^\d+$/ and $start > 0) { $self->throw("$start is not a valid start"); } } if (defined $stop) { $start = 1 if not defined $start; if ( $stop !~ /^\d+$/ and $stop > 0 ) { $self->throw("$stop is not a valid stop"); } } if (defined $start and defined $stop) { if ($stop < $start) { $self->throw("$stop is smaller than $start not a valid start"); } } my $acc_version = uc "$acc.$ver"; # db query to return all variation information ; confidence attribute is gone!! my $query = qq{ SELECT p1.start, p1.end, p1.type, p1.strand, p2.id, p2.snpclass, p2.snptype, p2.observed, p2.seq5, p2.seq3, p2.het, p2.hetse, p2.validated, p2.mapweight, p3.datasource, p2.internal_id FROM Hit as p1, RefSNP as p2, DataSource as p3 WHERE p1.acc = "$acc" and p1.version = "$ver" AND p3.id = p2.datasource AND p1.internal_id = p2.internal_id }; if($start) { $query .= " AND p1.start >= $start"; } if($stop) { $query .= " AND p1.end <= $stop"; } my $sth = $self->prepare($query); my $res = $sth->execute(); while( (my $arr = $sth->fetchrow_arrayref()) ) { my ($begin, $end, $hittype, $strand, $snpuid, $class, $type, $alleles, $seq5, $seq3, $het, $hetse, $confirmed, $mapweight, $source, $primid ) = @{$arr}; #snp info not valid next if ($type && $type ne 'notwithdrawn'); next if $mapweight > 2; #exclude SNPs outside the given $start-$end range if (defined $start) { next if $begin < $start; } if (defined $stop) { next if $end > $stop; } # use the right vocabulary for the SNP status if ($confirmed eq 'no-info') { $confirmed = "suspected"; } else { $confirmed =~ s/-/ /; $confirmed = "proven $confirmed"; } # the allele separator should be '|' $alleles =~ s/\//\|/g; #prune flank sequences to 25 nt $seq5 = substr($seq5, -25, 25); $seq3 = substr($seq3, 0, 25); #add Ns to length of 25; $seq3 .= 'N' x ( 25 - length $seq3 ) if length($seq3) < 25 ; $seq5 = ('N' x ( 25 - length $seq5 ) ). $seq5 if length($seq5) < 25 ; # # prepare the output objects # #Variation my $snp = new Bio::EnsEMBL::ExternalData::Variation (-start => $begin, -end => $end, -strand => $strand, -original_strand => $strand, -source_tag => $source, -score => $mapweight, -status => $confirmed, -alleles => $alleles, ); $snp->upStreamSeq($seq5); $snp->dnStreamSeq($seq3); $snp->het($het); $snp->hetse($hetse); # set for compatibility to Virtual Contigs $snp->seqname($acc_version); #DBLink my $link = new Bio::Annotation::DBLink; $link->database('dbSNP'); $link->primary_id($snpuid); $link->optional_id($acc_version); #add dbXref to Variation $snp->add_DBLink($link); #get alternative IDs my $query2 = qq{ SELECT p1.handle, p1.altid FROM SubSNP as p1 WHERE p1.internal_id = "$primid" }; my $sth2 = $self->prepare($query2); my $res2 = $sth2->execute(); while( (my $arr2 = $sth2->fetchrow_arrayref()) ) { my ($handle, $altid) = @{$arr2}; my $link = new Bio::Annotation::DBLink; $link->database($handle); $link->primary_id($altid); #add dbXref to Variation $snp->add_DBLink($link); } #add SNP to the list push(@variations, $snp); } return \@variations; } sub fetch_all_by_Clone { my ( $self, $clone, $start, $end ) = @_; $self->fetch_by_clone_accession_version( $clone->embl_id(), $clone->embl_version(), $start, $end); } sub coordinate_systems { return ("CLONE"); } =head2 fetch_between_internal_ids Title : fetch_between_internal_ids Usage : Function: Example : Returns : a list of all snp info between start_internal_id and end_internal_id Args : start_internal_id and end_internal_id =cut sub fetch_between_internal_ids { my ($self,$start_internal_id,$end_internal_id) = @_; my ($query, @var_objs, %var_objs); if (!$end_internal_id) { $end_internal_id = $start_internal_id; } $query = qq{ SELECT r.id, r.snpclass, r.mapweight, r.observed, r.seq5, r.seq3, h.acc, h.version, h.start, h.end, h.strand FROM RefSNP as r left join Hit as h on r.internal_id = h.internal_id WHERE snptype = "notwithdrawn" and r.internal_id between $start_internal_id and $end_internal_id }; my $sth=$self->prepare($query); my $res=$sth->execute(); while (my $info = $sth->fetchrow_hashref()) { if ($info) { my $var_obj = $self->_objFromHashref($info); #$var_objs{$var_obj->snpid}=$var_obj; push (@var_objs, $var_obj); } } return \@var_objs; } =head2 fetch_all_by_Slice Title : fetch_all_by_Slice Usage : snpa->fetch_all_by_Slice($slice) Function: return variation objects in the given slice Example : snpa->fetch_all_by_Slice($slice) Returns : a list of all variation objects in the given slice Args : $slice =cut sub fetch_all_by_Slice{ my ($self,$slice) = @_; my ($query, @var_objs, %var_objs); my $chr_start = $slice->start; my $chr_end = $slice->end; my $chr_name = $slice->seq_region_name; $query = qq{ SELECT r.id, r.snpclass, r.mapweight, r.observed, r.seq5, r.seq3, ch.physmap as start, ch.physmapstr as end , ch.physmapstrand as strand FROM RefSNP as r, ContigHit as ch WHERE r.snptype = "notwithdrawn" and r.internal_id = ch.internal_id and ch.physmap between $chr_start and $chr_end and ch.chr = "$chr_name" }; my $sth=$self->prepare($query); my $res=$sth->execute(); while (my $info = $sth->fetchrow_hashref()) { if ($info) { my $physmapstr = $info->{end}; my ($start, $end) = split /\^|\.\./, $physmapstr if ($info->{start} ne $info->{end}); $info->{start} = $start-$chr_start+1 if ($start); $info->{end} = $end-$chr_start+1 if ($end); $info->{start} = $info->{start}-$chr_start+1 if (!$start); $info->{end} = $info->{end}-$chr_start+1 if (!$end); my $var_obj = $self->_objFromHashref($info); #$var_objs{$var_obj->snpid}=$var_obj; push (@var_objs, $var_obj); } } return \@var_objs; } =head2 fetch_genotyped_by_Slice Title : fetch_genotyped_by_Slice Usage : snpa->fetch_genotyped_by_Slice($slice) Function: return variation objects in the given slice that have been genotyped Example : snpa->fetch_genotyped_by_Slice($slice) Returns : a list of genotyped variation objects in the given slice Args : $slice =cut sub fetch_genotyped_by_Slice{ my ($self, $slice) = @_; warn ("ERROR: No slice object passed to $0") if !$slice; my ($query, @var_objs, %var_objs); my $chr_start = $slice->start; my $chr_end = $slice->end; my $chr_name = $slice->seq_region_name; #mysql> SELECT distinct(r.id), r.snpclass, r.mapweight, r.observed, r.seq5, r.seq3,ch.physmap as start, ch.physmapstr as end , ch.physmapstrand as strand FROM RefSNP as r, ContigHit as ch, Strain as s WHERE r.snptype = "notwithdrawn" and r.internal_id = ch.internal_id and r.internal_id= s.internal_id and ch.physmap between 11474465 and 11584465 and ch.chr = '11' and s.allele like "%/%"; $query = qq{ SELECT distinct(r.id), r.snpclass, r.mapweight, r.observed, r.seq5, r.seq3, ch.physmap as start, ch.physmapstr as end, ch.physmapstrand as strand, r.internal_id as internal_id FROM RefSNP as r, ContigHit as ch, Strain as s WHERE r.snptype = "notwithdrawn" and r.internal_id = s.internal_id and r.internal_id = ch.internal_id and s.allele != "" and ch.physmap between $chr_start and $chr_end and ch.chr = "$chr_name" }; my $sth=$self->prepare($query); my $res=$sth->execute(); while (my $info = $sth->fetchrow_hashref()) { if ($info) { my $physmapstr = $info->{end}; my ($start, $end) = split /\^|\.\./, $physmapstr if ($info->{start} ne $info->{end}); $info->{chr_start} = $start || $info->{start}; $info->{start} = $start-$chr_start+1 if ($start); $info->{end} = $end-$chr_start+1 if ($end); $info->{start} = $info->{start}-$chr_start+1 if (!$start); $info->{end} = $info->{end}-$chr_start+1 if (!$end); my $var_obj = $self->_objFromHashref($info); #$var_objs{$var_obj->snpid}=$var_obj; push (@var_objs, $var_obj); } } return \@var_objs; } =head2 fetch_by_refsnpid Title : fetch_by_refsnpid Usage : Function: Example : Returns : a list of snp info by given refsnpid Args : refsnpid, mouse_flag =cut sub fetch_by_refsnpid { my ($self,$refsnpid,$mouse) = @_; my (@infos,$query,%var_objs); if ($mouse) { $query = qq{ SELECT t1.id, t1.snpclass, t1.snptype, t1.observed, t1.seq5, t1.seq3 FROM RefSNP as t1 WHERE t1.id = "$refsnpid" }; } else { $query = qq{ SELECT t1.id, t1.snpclass, t1.snptype, t1.observed, t1.seq5, t1.seq3, t2.acc, t2.version, t2.start, t2.end, t2.strand FROM RefSNP as t1 left join Hit as t2 on t1.internal_id = t2.internal_id WHERE t1.mapweight <=2 and t1.id = "$refsnpid" }; } my $sth=$self->prepare($query); my $res=$sth->execute(); while (my $info = $sth->fetchrow_hashref()) { if ($info) { my $var_obj = $self->_objFromHashref($info); return $var_obj if $var_obj; } } } sub _objFromHashref { my ($self,$info) = @_; my $acc_version = ''; my $acc = $info->{acc}; my $ver = $info->{version}; $acc_version .= uc $acc if $acc; $acc_version .= ".$ver" if $ver; my $snp = new Bio::EnsEMBL::ExternalData::Variation; $snp->acc($info->{acc}); $snp->version($info->{version}); $snp->seqname($acc_version); $snp->start($info->{start}); $snp->end($info->{end}); $snp->strand($info->{strand}); $snp->source_tag('dbSNP'); #$snp->status($info->{confirmed}); $snp->alleles($info->{observed}); $snp->upStreamSeq($info->{seq5}); $snp->dnStreamSeq($info->{seq3}); $snp->score($info->{mapweight}); #$snp->het($info->{het}); #$snp->hetse($info->{hetse}); $snp->snpid($info->{id}); $snp->snpclass($info->{snpclass}); $snp->unique_id($info->{chr_start},$info->{internal_id}); #DBLink my $link = new Bio::Annotation::DBLink; $link->database('dbSNP'); $link->primary_id($info->{id}); #add dbXref to Variation $snp->add_DBLink($link); return $snp; } 1;