Raw content of Bio::EnsEMBL::Variation::LDFeatureContainer
# Ensembl module for Bio::EnsEMBL::Variation::LDFeatureContainer
#
# Copyright (c) 2004 Ensembl
#
=head1 NAME
Bio::EnsEMBL::Variation::LDFeatureContainer - A container with all the ld values for quick access
=head1 SYNOPSIS
# LDFeature Container representing all the LD values for a certain contig
$ldContainer = Bio::EnsEMBL::Variation::LDFeatureContainer->new
(-name => NT_085213,
-ldContainer => $ldhash,
-variation_features => $vfhash);
...
#get the d_prime values for a certain pair of variation_features
d_prime = $ldContainer->get_d_prime($variation_feature_1,$variation_feature_2);
#get the list of variation in the container
$variations = $ldContainer->get_variations();
...
=head1 DESCRIPTION
This is a class representing the LD information for a certain region
from the ensembl-variation database.
See B.
=head1 CONTACT
Post questions to the Ensembl development list: ensembl-dev@ebi.ac.uk
=head1 METHODS
=cut
use strict;
use warnings;
package Bio::EnsEMBL::Variation::LDFeatureContainer;
use Bio::EnsEMBL::Storable;
use Bio::EnsEMBL::Utils::Exception qw(throw warning);
use Bio::EnsEMBL::Utils::Argument qw(rearrange);
use vars qw(@ISA);
@ISA = qw(Bio::EnsEMBL::Storable);
=head2 new
Arg [-NAME] :
string - name of the feature object that the LD container refers to. (chr1,NT_08542,...)
Arg [-LDCONTAINER] :
reference - hash containing all the LD information present, with the key
(variation_feature_1-variation_feature_2) to access the information
Arg [-VARIATIONFEATURES] :
reference - hash containing all the Bio::EnsEMBL::Variation::VariationFeature objects that are present in the Container
Example :
$ldContainer = Bio::EnsEMBL::Variation::LDFeatureContainer->new
(-name => 'chr1'
-ldContainer => {'variation_feature_1-variation_feature_2' => { 'sample_id_1' =>
{ 'd_prime' => 0.5,
'r2' => 0.421,
'sample_count' => 120
},
'sample_id_2' =>
{ 'd_prime' => 0.3,
'r2' => 0.321,
'sample_count' => 35
}
}
}
-variationFeatures => hash of Bio::EnsEMBL::Variation::VariationFeature
);
Description: Constructor. Instantiates a new LDFeatureContainer object.
Returntype : Bio::EnsEMBL::Variation::LDFeatureContainer
Exceptions : none
Caller : general
Status : At Risk
=cut
sub new {
my $caller = shift;
my $class = ref($caller) || $caller;
my ($ldContainer,$name,$variationFeatures) = rearrange([qw(LDCONTAINER NAME VARIATIONFEATURES)], @_);
if (defined($ldContainer) && ref($ldContainer ne 'HASH')){
throw("Reference to a hash object expected as a LDContainer");
}
$ldContainer ||= {};
return bless {'name' => $name,
'ldContainer' => $ldContainer,
'variationFeatures' => $variationFeatures}, $class;
}
=head2 name
Arg [1] : string $newval (optional)
The new value to set the name attribute to
Example : $name = $obj->name()
Description: Getter/Setter for the name attribute.
Returntype : string
Exceptions : none
Caller : general
Status : At Risk
=cut
sub name{
my $self = shift;
return $self->{'name'} = shift if(@_);
return $self->{'name'};
}
=head2 get_variations
Example : $variations = $obj->get_variations()
Description : Gets all the variation objects contained in the LDFeatureContainer
ReturnType : list of Bio::EnsEMBL::Variation::Variation
Exceptions : none
Caller : general
Status : At Risk
=cut
sub get_variations{
my $self = shift;
my @variations;
foreach my $variation_feature (keys %{$self->{'variationFeatures'}}){
push @variations,$self->{'variationFeatures'}->{$variation_feature}->variation();
}
return \@variations;
}
=head2 get_r_square
Arg [1] : Bio::EnsEMBL::Variation::VariationFeature $variationFeature
Arg [2] : Bio::EnsEMBL::Variation::VariationFeature $variationFeature
Arg [3] : (optional) int - sample_id of population you want to get the r_square value
Example : $r_square = $obj->get_r_square($vf1,$vf2,$sample_id);
Description : Get the r_square value for a pair of variation features in the given population. If no population is provided,
return the r_square for the default population with more sample counts (in case more than 1)
ReturnType : float
Exceptions : throw on incorrect arguments
Caller : general
Status : At Risk
=cut
sub get_r_square{
my $self = shift;
my $variation_feature_1 = shift;
my $variation_feature_2 = shift;
my $sample_id = shift;
$sample_id ||= 0; #in case no population provided, to avoid warning in the hash
my $r_square;
my $key;
#check if the default poppulation has been calculated, otherwise, find it
if (! defined $self->{'_default_population'}){
$self->{'_default_population'} = $self->_get_major_population;
}
#first of all, check that both arguments have been properly provided
if (!ref($variation_feature_1) || !$variation_feature_1->isa('Bio::EnsEMBL::Variation::VariationFeature') || !ref($variation_feature_2) || !$variation_feature_2->isa('Bio::EnsEMBL::Variation::VariationFeature')){
throw("Bio::EnsEMBL::Variation::VariationFeature arguments expected");
}
else{
#check if the ldContainer does not contain pairwise information for the variation features provided
if (!exists $self->{'ldContainer'}->{$variation_feature_1->dbID() . '-' . $variation_feature_2->dbID()} && !exists $self->{'ldContainer'}->{$variation_feature_2->dbID() . '-' . $variation_feature_1->dbID()}){
warning("variation features have no pairwise ld information");
}
else{
#find out the key in the ld Hash: vf1 - vf2 or vf2 - vf1
if (exists $self->{'ldContainer'}->{$variation_feature_1->dbID() . '-' . $variation_feature_2->dbID()}){
$key = $variation_feature_1->dbID() . '-' . $variation_feature_2->dbID();
}
else{
$key = $variation_feature_2->dbID() . '-' . $variation_feature_1->dbID();
}
#and finally, if population provided or the only population
if (exists $self->{'ldContainer'}->{$key}->{$sample_id}){
$r_square = $self->{'ldContainer'}->{$key}->{$sample_id}->{'r2'}
}
else{
if (exists $self->{'ldContainer'}->{$key}->{$self->{'_default_population'}}){
#there was no population provided, return the r_square for the default population
$r_square = $self->{'ldContainer'}->{$key}->{$self->{'_default_population'}}->{'r2'};
}
else{
warning("variation features have no pairwise ld information for default population ", $self->{'_default_population'});
}
}
}
}
return $r_square;
}
=head2 get_d_prime
Arg [1] : Bio::EnsEMBL::Variation::VariationFeature $variationFeature
Arg [2] : Bio::EnsEMBL::Variation::VariationFeature $variationFeature
Arg [3] : (optional) int - sample_id of population you want to get the d_prime value
Example : $d_prime = $obj->get_d_prime($vf1,$vf2,$sample_id);
Description : Get the d_prime value for a pair of variation features for a known or unknown population. In case of an unknown population, the default
poulation is used
ReturnType : float
Exceptions : throw on incorrect arguments
Caller : general
Status : At Risk
=cut
sub get_d_prime{
my $self = shift;
my $variation_feature_1 = shift;
my $variation_feature_2 = shift;
my $sample_id = shift;
$sample_id ||= 0; #in case no population provided, to avoid warning in the hash
my $d_prime;
my $key;
if (! defined $self->{'_default_population'}){
$self->{'_default_population'} = $self->_get_major_population;
}
#first of all, check that both arguments have been properly provided
if (!ref($variation_feature_1) || !$variation_feature_1->isa('Bio::EnsEMBL::Variation::VariationFeature') || !ref($variation_feature_2) || !$variation_feature_2->isa('Bio::EnsEMBL::Variation::VariationFeature')){
throw("Bio::EnsEMBL::Variation::VariationFeature arguments expected");
}
else{
#check if the ldContainer does not contain pairwise information for the variation features provided
if (!exists $self->{'ldContainer'}->{$variation_feature_1->dbID() . '-' . $variation_feature_2->dbID()} && !exists $self->{'ldContainer'}->{$variation_feature_2->dbID() . '-' . $variation_feature_1->dbID()}){
warning("variation features have no pairwise ld information");
}
else{
#find out the key in the ld Hash: vf1 - vf2 or vf2 - vf1
if (exists $self->{'ldContainer'}->{$variation_feature_1->dbID() . '-' . $variation_feature_2->dbID()}){
$key = $variation_feature_1->dbID() . '-' . $variation_feature_2->dbID();
}
else{
$key = $variation_feature_2->dbID() . '-' . $variation_feature_1->dbID();
}
#and finally, if population provided or the only population
if (exists $self->{'ldContainer'}->{$key}->{$sample_id}){
$d_prime = $self->{'ldContainer'}->{$key}->{$sample_id}->{'d_prime'};
}
else{
if (exists $self->{'ldContainer'}->{$key}->{$self->{'_default_population'}}){
#there was no population provided, return the r_square for the default population
$d_prime = $self->{'ldContainer'}->{$key}->{$self->{'_default_population'}}->{'d_prime'};
}
else{
warning("variation features have no pairwise ld information for default population ", $self->{'_default_population'});
}
}
}
}
return $d_prime;
}
=head2 get_all_ld_values
Example : $ld_values = $obj->get_all_ld_values();
Description : Get all the information contained in the LDFeatureContainer object
ReturnType : reference to list of hashes [{variation1 => Bio::EnsEMBL::Variation::VariationFeature, variation2=>Bio::EnsEMBL::Variation::VariationFeature, d_prime=>d_prime, r2=>r2, sample_count=>sample_count, sample_id=>population_sample_id}]
Exceptions : no exceptions
Caller : general
Status : At Risk
=cut
sub get_all_ld_values{
my $self = shift;
my @ld_values; #contains ALL the ld values in the container
#the keys in the ldContainer hash
my $variation_feature_id_1;
my $variation_feature_id_2;
if (! defined $self->{'_default_population'}){
$self->{'_default_population'} = $self->_get_major_population;
}
foreach my $key_ld (keys %{$self->{'ldContainer'}}){
my %ld_value; #contains a single ld value in the container {variation_feature variation_feature d_prime r2 snp_distance_count}
($variation_feature_id_1, $variation_feature_id_2) = split /-/,$key_ld; #get the variation_features ids
if (exists $self->{'ldContainer'}->{$key_ld}->{$self->{'_default_population'}}){
#add the information to the ld_value hash
$ld_value{'variation1'} = $self->{'variationFeatures'}->{$variation_feature_id_1};
$ld_value{'variation2'} = $self->{'variationFeatures'}->{$variation_feature_id_2};
$ld_value{'d_prime'} = $self->{'ldContainer'}->{$key_ld}->{$self->{'_default_population'}}->{'d_prime'};
$ld_value{'r2'} = $self->{'ldContainer'}->{$key_ld}->{$self->{'_default_population'}}->{'r2'};
$ld_value{'sample_count'} = $self->{'ldContainer'}->{$key_ld}->{$self->{'_default_population'}}->{'sample_count'};
$ld_value{'sample_id'} = $self->{'_default_population'};
push @ld_values, \%ld_value;
}
}
return \@ld_values;
}
=head2 get_all_r_square_values
Example : $r_square_values = $obj->get_all_r_square_values();
Description : Get all r_square values contained in the LDFeatureContainer object
ReturnType : reference to list of [{variation1=>Bio::EnsEMBL::Variation::VariationFeature, variation2=>Bio::EnsEMBL::Variation::VariationFeature, r2=>r2, sample_id=>population_sample_id}]
Exceptions : no exceptions
Caller : general
Status : At Risk
=cut
sub get_all_r_square_values{
my $self = shift;
my @r_squares; #contains ALL the r2 values in the container
#the keys in the ldContainer hash
my $variation_feature_id_1;
my $variation_feature_id_2;
if (! defined $self->{'_default_population'}){
$self->{'_default_population'} = $self->_get_major_population;
}
foreach my $key_ld (keys %{$self->{'ldContainer'}}){
my %r_square; #contains a single r2 value in the container {variation_feature r2 population_id}
($variation_feature_id_1, $variation_feature_id_2) = split /-/,$key_ld; #get the variation_features ids
if (exists $self->{'ldContainer'}->{$key_ld}->{$self->{'_default_population'}}){
$r_square{'variation1'} = $self->{'variationFeatures'}->{$variation_feature_id_1};
$r_square{'variation2'} = $self->{'variationFeatures'}->{$variation_feature_id_2};
$r_square{'r2'} = $self->{'ldContainer'}->{$key_ld}->{$self->{'_default_population'}}->{'r2'};
$r_square{'sample_id'} = $self->{'_default_population'};
#and add all the ld information to the final list
push @r_squares, \%r_square;
}
}
return \@r_squares;
}
=head2 get_all_d_prime_values
Example : $d_prime_values = $obj->get_all_d_prime_values();
Description : Get all d_prime values contained in the LDFeatureContainer object
ReturnType : reference to list of [{variation1=>Bio::EnsEMBL::Variation::VariationFeature, variation2=>Bio::EnsEMBL::Variation::VariationFeature, d_prime=>d_prime, sample_id=>population_sample_id}]
Exceptions : no exceptions
Caller : general
Status : At Risk
=cut
sub get_all_d_prime_values{
my $self = shift;
my @d_primes; #contains ALL the d_prime values in the container
#the keys in the ldContainer hash
my $variation_feature_id_1;
my $variation_feature_id_2;
if (! defined $self->{'_default_population'}){
$self->{'_default_population'} = $self->_get_major_population;
}
foreach my $key_ld (keys %{$self->{'ldContainer'}}){
my %d_prime; #contains a single d_prime value in the container {variation_feature d_prime population_id}
($variation_feature_id_1, $variation_feature_id_2) = split /-/,$key_ld; #get the variation_features ids
#add the information to the ld_value array
if (exists $self->{'ldContainer'}->{$key_ld}->{$self->{'_default_population'}}){
$d_prime{'variation1'} = $self->{'variationFeatures'}->{$variation_feature_id_1};
$d_prime{'variation2'} = $self->{'variationFeatures'}->{$variation_feature_id_2};
$d_prime{'d_prime'} = $self->{'ldContainer'}->{$key_ld}->{$self->{'_default_population'}}->{'d_prime'};
$d_prime{'sample_id'} = $self->{'_default_population'};
#and add all the ld information to the final list if exists the value
push @d_primes, \%d_prime;
}
}
return \@d_primes;
}
=head2 get_all_populations
Arg [1] : (optional) Bio::EnsEMBL::Variation::VariationFeature $variationFeature
Arg [2] : (optional) Bio::EnsEMBL::Variation::VariationFeature $variationFeature
Example : $populations = $obj->get_all_populations($vf1,$vf2);
Description : If no arguments provided, returns ALL the populations present in the container. When 2 variation features provided, returns the
population/populations where these variation features occurs
ReturnType : ref to an array of int
Exceptions : throw on incorrect arguments
Caller : general
Status : At Risk
=cut
sub get_all_populations{
my $self = shift;
my $variation_feature_1 = shift;
my $variation_feature_2 = shift;
my %populations;
my @populations;
my $key;
#if no variation provided, return ALL the populations in the container
if (! defined($variation_feature_1) && ! defined($variation_feature_2)){
foreach my $key (keys %{$self->{'ldContainer'}}){
map {$populations{$_}++} keys %{$self->{'ldContainer'}->{$key}};
}
@populations = keys %populations;
}
else{
#first, check if both arguments have been properly provided
if (!ref($variation_feature_1) || !$variation_feature_1->isa('Bio::EnsEMBL::Variation::VariationFeature') || !ref($variation_feature_2) || !$variation_feature_2->isa('Bio::EnsEMBL::Variation::VariationFeature')){
throw("Bio::EnsEMBL::Variation::VariationFeature arguments expected");
}
#if the variation_features are correct, return the list of populations
else{
#find out the key in the ld Hash: vf1 - vf2 or vf2 - vf1
if (exists $self->{'ldContainer'}->{$variation_feature_1->dbID() . '-' . $variation_feature_2->dbID()}){
$key = $variation_feature_1->dbID() . '-' . $variation_feature_2->dbID();
}
else{
$key = $variation_feature_2->dbID() . '-' . $variation_feature_1->dbID();
}
@populations = keys %{$self->{'ldContainer'}->{$key}};
}
}
return \@populations;
}
#returns from the container the population_id with the maximum number of pairwise_ld
sub _get_populations {
my $self = shift;
my %populations;
foreach my $key (keys %{$self->{'ldContainer'}}){
map {$populations{$_}++} keys %{$self->{'ldContainer'}->{$key}};
}
my @sorted_populations = sort{$populations{$b} <=> $populations{$a}} keys %populations;
return @sorted_populations;
}
sub _get_major_population {
my( $pop ) = $_[0]->_get_populations;
return $pop;
}
1;