Raw content of Bio::EnsEMBL::Variation::Variation # Ensembl module for Bio::EnsEMBL::Variation::Variation # # Copyright (c) 2004 Ensembl # =head1 NAME Bio::EnsEMBL::Variation::Variation - Ensembl representation of a nucleotide variation. =head1 SYNOPSIS $v = Bio::EnsEMBL::Variation::Variation->new(-name => 'rs123', -source => 'dbSNP'); # add additional synonyms for the same SNP $v->add_synonym('dbSNP', 'ss3242'); $v->add_synonym('TSC', '53253'); # add some validation states for this SNP $v->add_validation_status('freq'); $v->add_validation_status('cluster'); # add alleles associated with this SNP $a1 = Bio::EnsEMBL::Allele->new(...); $a2 = Bio::EnsEMBL::Allele->new(...); $v->add_Allele($a1); $v->add_Allele($a2); # set the flanking sequences $v->five_prime_flanking_seq($seq); $v->three_prime_flanking_seq($seq); ... # print out the default name and source of the variation and the version print $v->source(), ':',$v->name(), ".",$v->version,"\n"; # print out every synonym associated with this variation @synonyms = @{$v->get_all_synonyms()}; print "@synonyms\n"; # print out synonyms and their database associations my $sources = $v->get_all_synonym_sources(); foreach my $src (@$sources) { @synonyms = $v->get_all_synonyms($src); print "$src: @synonyms\n"; } # print out validation states my @vstates = @{$v->get_all_validation_states()}; print "@validation_states\n"; # print out flanking sequences print "5' flanking: ", $v->five_prime_flanking_seq(), "\n"; print "3' flanking: ", $v->three_prime_flanking_seq(), "\n"; =head1 DESCRIPTION This is a class representing a nucleotide variation from the ensembl-variation database. A variation may be a SNP a multi-base substitution or an insertion/deletion. The objects Alleles associated with a Variation object describe the nucleotide change that Variation represents. A Variation object has an associated identifier and 0 or more additional synonyms. The position of a Variation object on the Genome is represented by the B<Bio::EnsEMBL::Variation::VariationFeature> class. =head1 CONTACT Post questions to the Ensembl development list: ensembl-dev@ebi.ac.uk =head1 METHODS =cut use strict; use warnings; package Bio::EnsEMBL::Variation::Variation; use Bio::EnsEMBL::Storable; use Bio::EnsEMBL::Utils::Argument qw(rearrange); use Bio::EnsEMBL::Variation::Utils::Sequence qw(ambiguity_code variation_class); use Bio::EnsEMBL::Utils::Exception qw(throw deprecate warning); use vars qw(@ISA); @ISA = qw(Bio::EnsEMBL::Storable); # List of validation states. Order must match that of set in database our @VSTATES = ('cluster','freq','submitter','doublehit','hapmap', 'failed', 'non-polymorphic', 'observed'); # Conversion of validation state to bit value our %VSTATE2BIT = ('cluster' => 1, # 00000001 'freq' => 2, # 00000010 'submitter' => 4, # 00000100 'doublehit' => 8, # 00001000 'hapmap' => 16, # 00010000 'failed' => 32, 'non-polymorphic' => 64, # 00100000 'observed' => 128 # 01000000 ); =head2 new Arg [-dbID] : int - unique internal identifier for snp Arg [-ADAPTOR] : Bio::EnsEMBL::Variation::DBSQL::VariationAdaptor Adaptor which provides database connectivity for this Variation object Arg [-NAME] : string - the name of this SNP Arg [-SOURCE] : string - the source of this SNP Arg [-SYNONYMS] : reference to hash with list reference values - keys are source names and values are lists of identifiers from that db. e.g.: {'dbSNP' => ['ss1231', '1231'], 'TSC' => ['1452']} Arg [-ANCESTRAL_ALLELES] : string - the ancestral allele of this SNP Arg [-ALLELES] : reference to list of Bio::EnsEMBL::Variation::Allele objects Arg [-VALIDATION_STATES] : reference to list of strings Arg [-MOLTYPE] : string - the moltype of this SNP Arg [-FIVE_PRIME_FLANKING_SEQ] : string - the five prime flanking nucleotide sequence Arg [-THREE_PRIME_FLANKING_SEQ] : string - the three prime flanking nucleotide sequence Arg [-FAILED_DESCRIPTION] : string - description why the variation failed Ensembl pipeline Example : $v = Bio::EnsEMBL::Variation::Variation->new (-name => 'rs123', -source => 'dbSNP'); Description: Constructor. Instantiates a new Variation object. Returntype : Bio::EnsEMBL::Variation::Variation Exceptions : none Caller : general Status : At Risk =cut sub new { my $caller = shift; my $class = ref($caller) || $caller; my ($dbID, $adaptor, $name, $src, $syns, $ancestral_allele, $alleles, $valid_states, $moltype, $five_seq, $three_seq, $failed_description) = rearrange([qw(dbID ADAPTOR NAME SOURCE SYNONYMS ANCESTRAL_ALLELE ALLELES VALIDATION_STATES MOLTYPE FIVE_PRIME_FLANKING_SEQ THREE_PRIME_FLANKING_SEQ FAILED_DESCRIPTION)],@_); # convert the validation state strings into a bit field # this preserves the same order and representation as in the database # and filters out invalid states my $vcode = 0; $valid_states ||= []; foreach my $vstate (@$valid_states) { $vcode |= $VSTATE2BIT{lc($vstate)} || 0; } return bless {'dbID' => $dbID, 'adaptor' => $adaptor, 'name' => $name, 'source' => $src, 'synonyms' => $syns || {}, 'ancestral_allele' => $ancestral_allele, 'alleles' => $alleles || [], 'validation_code' => $vcode, 'moltype' => $moltype, 'five_prime_flanking_seq' => $five_seq, 'three_prime_flanking_seq' => $three_seq, 'failed_description' => $failed_description}, $class; } =head2 name Arg [1] : string $newval (optional) The new value to set the name attribute to Example : $name = $obj->name() Description: Getter/Setter for the name attribute Returntype : string Exceptions : none Caller : general Status : Stable =cut sub name{ my $self = shift; return $self->{'name'} = shift if(@_); return $self->{'name'}; } =head2 get_all_Genes Args : None Example : $genes = $v->get_all_genes(); Description : Retrieves all the genes where this Variation has a consequence. ReturnType : reference to list of Bio::EnsEMBL::Gene Exceptions : None Caller : general Status : At Risk =cut sub get_all_Genes{ my $self = shift; my $genes; if (defined $self->{'adaptor'}){ my $UPSTREAM = 5000; my $DOWNSTREAM = 5000; my $vf_adaptor = $self->adaptor()->db()->get_VariationFeatureAdaptor(); my $vf_list = $vf_adaptor->fetch_all_by_Variation($self); #foreach vf, get the slice is on, us ethe USTREAM and DOWNSTREAM limits to get all the genes, and see if SNP is within the gene my $new_slice; my $gene_list; my $gene_hash; foreach my $vf (@{$vf_list}){ #expand the slice UPSTREAM and DOWNSTREAM $new_slice = $vf->feature_Slice()->expand($UPSTREAM,$DOWNSTREAM); #get the genes in the new slice $gene_list = $new_slice->get_all_Genes(); foreach my $gene (@{$gene_list}){ if (($vf->start >= $gene->seq_region_start - $UPSTREAM) && ($vf->start <= $gene->seq_region_end + $DOWNSTREAM) && ($vf->end <= $gene->seq_region_end + $DOWNSTREAM)){ #the vf is affecting the gene, add to the hash if not present already if (!exists $gene_hash->{$gene->dbID}){ $gene_hash->{$gene->dbID} = $gene; } } } } #and return all the genes push @{$genes}, values %{$gene_hash}; } return $genes; } =head2 get_all_synonyms Arg [1] : (optional) string $source - the source of the synonyms to return. Example : @dbsnp_syns = @{$v->get_all_synonyms('dbSNP')}; @all_syns = @{$v->get_all_synonyms()}; Description: Retrieves synonyms for this Variation. If a source argument is provided all synonyms from that source are returned, otherwise all synonyms are returned. Returntype : reference to list of strings Exceptions : none Caller : general Status : At Risk =cut sub get_all_synonyms { my $self = shift; my $source = shift; if($source) { return $self->{'synonyms'}->{$source} || [] } my @synonyms = map {@$_} values %{$self->{'synonyms'}}; return \@synonyms; } =head2 get_all_synonym_sources Arg [1] : none Example : my @sources = @{$v->get_all_synonym_sources()}; Description: Retrieves a list of all the sources for synonyms of this Variation. Returntype : reference to a list of strings Exceptions : none Caller : general Status : At Risk =cut sub get_all_synonym_sources { my $self = shift; my @sources = keys %{$self->{'synonyms'}}; return \@sources; } =head2 add_synonym Arg [1] : string $source Arg [2] : string $syn Example : $v->add_synonym('dbSNP', 'ss55331'); Description: Adds a synonym to this variation. Returntype : none Exceptions : throw if $source argument is not provided throw if $syn argument is not provided Caller : general Status : At Risk =cut sub add_synonym { my $self = shift; my $source = shift; my $syn = shift; throw("source argument is required") if(!$source); throw("syn argument is required") if(!$syn); $self->{'synonyms'}->{$source} ||= []; push @{$self->{'synonyms'}->{$source}}, $syn; return; } =head2 get_all_validation_states Arg [1] : none Example : my @vstates = @{$v->get_all_validation_states()}; Description: Retrieves all validation states for this variation. Current possible validation statuses are 'cluster','freq','submitter', 'doublehit', 'hapmap' Returntype : reference to list of strings Exceptions : none Caller : general Status : At Risk =cut sub get_all_validation_states { my $self = shift; my $code = $self->{'validation_code'}; # convert the bit field into an ordered array my @states; for(my $i = 0; $i < @VSTATES; $i++) { push @states, $VSTATES[$i] if((1 << $i) & $code); } return \@states; } =head2 add_validation_state Arg [1] : string $state Example : $v->add_validation_state('cluster'); Description: Adds a validation state to this variation. Returntype : none Exceptions : warning if validation state is not a recognised type Caller : general Status : At Risk =cut sub add_validation_state { my $self = shift; my $state = shift; # convert string to bit value and add it to the existing bitfield my $bitval = $VSTATE2BIT{lc($state)}; if(!$bitval) { warning("$state is not a recognised validation status. Recognised " . "validation states are: @VSTATES"); return; } $self->{'validation_code'} |= $bitval; return; } =head2 source Arg [1] : string $source (optional) The new value to set the source attribute to Example : $source = $v->source() Description: Getter/Setter for the source attribute Returntype : string Exceptions : none Caller : general Status : Stable =cut sub source{ my $self = shift; return $self->{'source'} = shift if(@_); return $self->{'source'}; } =head2 get_all_Alleles Arg [1] : none Example : @alleles = @{v->get_all_Alleles()}; Description: Retrieves all Alleles associated with this variation Returntype : reference to list of Bio::EnsEMBL::Variation::Allele objects Exceptions : none Caller : general Status : Stable =cut sub get_all_Alleles { my $self = shift; return $self->{'alleles'}; } =head2 add_Allele Arg [1] : Bio::EnsEMBL::Variation::Allele $allele Example : $v->add_Allele(Bio::EnsEMBL::Variation::Alelele->new(...)); Description: Associates an Allele with this variation Returntype : none Exceptions : throw on incorrect argument Caller : general Status : At Risk =cut sub add_Allele { my $self = shift; my $allele = shift; if(!ref($allele) || !$allele->isa('Bio::EnsEMBL::Variation::Allele')) { throw("Bio::EnsEMBL::Variation::Allele argument expected"); } push @{$self->{'alleles'}}, $allele; } =head2 ancestral_allele Arg [1] : string $ancestral_allele (optional) Example : $ancestral_allele = v->ancestral_allele(); Description: Getter/Setter ancestral allele associated with this variation Returntype : string Exceptions : none Caller : general Status : At Risk =cut sub ancestral_allele { my $self = shift; return $self->{'ancestral_allele'} = shift if(@_); return $self->{'ancestral_allele'}; } =head2 moltype Arg [1] : string $moltype (optional) The new value to set the moltype attribute to Example : $moltype = v->moltype(); Description: Getter/Setter moltype associated with this variation Returntype : string Exceptions : none Caller : general Status : At Risk =cut sub moltype { my $self = shift; return $self->{'moltype'} = shift if(@_); return $self->{'moltype'}; } =head2 five_prime_flanking_seq Arg [1] : string $newval (optional) The new value to set the five_prime_flanking_seq attribute to Example : $five_prime_flanking_seq = $obj->five_prime_flanking_seq() Description: Getter/Setter for the five_prime_flanking_seq attribute Returntype : string Exceptions : none Caller : general Status : Stable =cut sub five_prime_flanking_seq{ my $self = shift; #setter of the flanking sequence return $self->{'five_prime_flanking_seq'} = shift if(@_); #lazy-load the flanking sequence from the database if (!defined $self->{'five_prime_flanking_seq'} && $self->{'adaptor'}){ my $variation_adaptor = $self->adaptor()->db()->get_VariationAdaptor(); ($self->{'three_prime_flanking_seq'},$self->{'five_prime_flanking_seq'}) = @{$variation_adaptor->get_flanking_sequence($self->{'dbID'})}; } return $self->{'five_prime_flanking_seq'}; } =head2 three_prime_flanking_seq Arg [1] : string $newval (optional) The new value to set the three_prime_flanking_seq attribute to Example : $three_prime_flanking_seq = $obj->three_prime_flanking_seq() Description: Getter/Setter for the three_prime_flanking_seq attribute Returntype : string Exceptions : none Caller : general Status : Stable =cut sub three_prime_flanking_seq{ my $self = shift; #setter of the flanking sequence return $self->{'three_prime_flanking_seq'} = shift if(@_); #lazy-load the flanking sequence from the database if (!defined $self->{'three_prime_flanking_seq'} && $self->{'adaptor'}){ my $variation_adaptor = $self->adaptor()->db()->get_VariationAdaptor(); ($self->{'three_prime_flanking_seq'},$self->{'five_prime_flanking_seq'}) = @{$variation_adaptor->get_flanking_sequence($self->{'dbID'})}; } return $self->{'three_prime_flanking_seq'}; } =head2 get_all_IndividualGenotypes Args : none Example : $ind_genotypes = $var->get_all_IndividualGenotypes() Description: Getter for IndividualGenotypes for this Variation, returns empty list if there are none Returntype : listref of IndividualGenotypes Exceptions : none Caller : general Status : At Risk =cut sub get_all_IndividualGenotypes { my $self = shift; if (defined ($self->{'adaptor'})){ my $igtya = $self->{'adaptor'}->db()->get_IndividualGenotypeAdaptor(); return $igtya->fetch_all_by_Variation($self); } return []; } =head2 get_all_PopulationGenotypes Args : none Example : $pop_genotypes = $var->get_all_PopulationGenotypes() Description: Getter for PopulationGenotypes for this Variation, returns empty list if there are none. Returntype : listref of PopulationGenotypes Exceptions : none Caller : general Status : At Risk =cut sub get_all_PopulationGenotypes { my $self = shift; #simulate a lazy-load on demand situation, used by the Glovar team if (!defined($self->{'populationGenotypes'}) && defined ($self->{'adaptor'})){ my $pgtya = $self->{'adaptor'}->db()->get_PopulationGenotypeAdaptor(); return $pgtya->fetch_all_by_Variation($self); } return $self->{'populationGenotypes'}; } =head2 add_PopulationGenotype Arg [1] : Bio::EnsEMBL::Variation::PopulationGenotype Example : $v->add_PopulationGenotype($pop_genotype) Description : Adds another PopulationGenotype to the Variation object Exceptions : thrown on bad argument Caller : general Status : At Risk =cut sub add_PopulationGenotype{ my $self = shift; if (@_){ if(!ref($_[0]) || !$_[0]->isa('Bio::EnsEMBL::Variation::PopulationGenotype')) { throw("Bio::EnsEMBL::Variation::PopulationGenotype argument expected"); } #a variation can have multiple PopulationGenotypes push @{$self->{'populationGenotypes'}},shift; } } =head2 ambig_code Args : None Example : my $ambiguity_code = $v->ambig_code() Description : Returns the ambigutiy code for the alleles in the Variation ReturnType : String $ambiguity_code Exceptions : none Caller : General Status : At Risk =cut sub ambig_code{ my $self = shift; my $alleles = $self->get_all_Alleles(); #get all Allele objects my %alleles; #to get all the different alleles in the Variation map {$alleles{$_->allele}++} @{$alleles}; my $allele_string = join "|",keys %alleles; return &ambiguity_code($allele_string); } =head2 var_class Args : None Example : my $variation_class = $vf->var_class() Description : returns the class for the variation, according to dbSNP classification ReturnType : String $variation_class Exceptions : none Caller : General Status : At Risk =cut sub var_class{ my $self = shift; my $alleles = $self->get_all_Alleles(); #get all Allele objects my %alleles; #to get all the different alleles in the Variation map {$alleles{$_->allele}++} @{$alleles}; my $allele_string = join "|",keys %alleles; return &variation_class($allele_string); } =head2 failed_description Arg [1] : string $failed_description (optional) The new value to set the failed_description attribute to Example : $failed_description = $v->failed_description() Description: Getter/Setter for the failed_description attribute Returntype : string Exceptions : none Caller : general Status : Stable =cut sub failed_description{ my $self = shift; return $self->{'failed_description'} = shift if(@_); return $self->{'failed_description'}; } =head2 derived_allele_frequency Arg[1] : Bio::EnsEMBL::Variation::Population $population Example : $daf = $variation->derived_allele_frequency($population); Description: Gets the derived allele frequency for the population. The DAF is the frequency of the reference allele that is different from the allele in Chimp. If none of the alleles is the same as the ancestral, will return reference allele frequency Returntype : float Exceptions : none Caller : general Status : At Risk =cut sub derived_allele_frequency{ my $self = shift; my $population = shift; my $daf; if(!ref($population) || !$population->isa('Bio::EnsEMBL::Variation::Population')) { throw('Bio::EnsEMBL::Variation::Population argument expected.'); } my $ancestral_allele = $self->ancestral_allele(); if (defined $ancestral_allele){ #get reference allele my $vf_adaptor = $self->adaptor->db->get_VariationFeatureAdaptor(); my $vf = shift @{$vf_adaptor->fetch_all_by_Variation($self)}; my $ref_freq; #get allele in population my $alleles = $self->get_all_Alleles(); foreach my $allele (@{$alleles}){ if (($allele->allele eq $vf->ref_allele_string) and ($allele->population->name eq $population->name)){ $ref_freq = $allele->frequency; } } if ($ancestral_allele eq $vf->ref_allele_string){ $daf = 1 - $ref_freq } elsif ($ancestral_allele ne $vf->ref_allele_string){ $daf = $ref_freq; } } return $daf; } =head2 derived_allele Arg[1] : Bio::EnsEMBL::Variation::Population $population Example : $da = $variation->derived_allele($population); Description: Gets the derived allele for the population. Returntype : float Exceptions : none Caller : general Status : At Risk =cut sub derived_allele { my $self = shift(); my $population = shift(); my $population_dbID = $population->dbID(); my $ancestral_allele_str = $self->ancestral_allele(); if (not defined($ancestral_allele_str)) { return; } my $alleles = $self->get_all_Alleles(); my $derived_allele_str; foreach my $allele (@{$alleles}) { my $allele_population = $allele->population(); if (defined($allele_population) and $allele_population->dbID() == $population_dbID) { my $allele_str = $allele->allele(); if ($ancestral_allele_str ne $allele_str) { if (defined($derived_allele_str)) { return; } else { $derived_allele_str = $allele_str; } } } } return $derived_allele_str; } 1;