Raw content of Bio::EnsEMBL::Variation::DBSQL::ReadCoverageAdaptor
#
# Ensembl module for Bio::EnsEMBL::Variation::DBSQL::ReadCoverageAdaptor
#
# Copyright (c) 2005 Ensembl
#
# You may distribute this module under the same terms as perl itself
#
#
=head1 NAME
Bio::EnsEMBL::Variation::DBSQL::ReadCoverageAdaptor
=head1 SYNOPSIS
$vdb = Bio::EnsEMBL::Variation::DBSQL::DBAdaptor->new(...);
$db = Bio::EnsEMBL::DBSQL::DBAdaptor->new(...);
# tell the variation database where core database information can be
# be found
$vdb->dnadb($db);
$rca = $vdb->get_ReadCoverageAdaptor();
$sa = $db->get_SliceAdaptor();
$pa = $vdb->get_PopulationAdaptor();
# get read coverage in a region for a certain population in in a certain level
$slice = $sa->fetch_by_region('chromosome', 'X', 1e6, 2e6);
$population = $pa->fetch_by_name("UNKNOWN");
$level = 1;
foreach $rc (@{$vfa->fetch_all_by_Slice_Sample_depth($slice,$population,$level)}) {
print $rc->start(), '-', $rc->end(), "\n";
}
=head1 DESCRIPTION
This adaptor provides database connectivity for ReadCoverage objects.
Coverage information for reads can be obtained from the database using this
adaptor. See the base class BaseFeatureAdaptor for more information.
=head1 AUTHOR - Daniel Rios
=head1 CONTACT
Post questions to the Ensembl development list ensembl-dev@ebi.ac.uk
=head1 METHODS
=cut
use strict;
use warnings;
package Bio::EnsEMBL::Variation::DBSQL::ReadCoverageAdaptor;
use Bio::EnsEMBL::Variation::ReadCoverage;
use Bio::EnsEMBL::Mapper::RangeRegistry;
use Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor;
use Bio::EnsEMBL::Utils::Exception qw(throw warning);
our @ISA = ('Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor');
=head2 fetch_all_by_Slice_Sample_depth
Arg[0] : Bio::EnsEMBL::Slice $slice
Arg[1] : (optional) Bio::EnsEMBL::Variation::Sample $sample
Arg[2] : (optional) int $level
Example : my $features = $rca->fetch_all_by_Slice_Sample_depth($slice,$sample,$level); or
my $features = $rca->fetch_all_by_Slice_Sample_depth($slice, $sample); or
my $features = $rca->fetch_all_by_Slice_Sample_depth($slice, $level); or
my $features = $rca->fetch_all_by_Slice_Sample_depth($slice);
Description : Gets all the read coverage features for a given sample(strain or individual) in a certain level
in the provided slice
ReturnType : listref of Bio::EnsEMBL::Variation::ReadCoverage
Exceptions : thrown on bad arguments
Caller : general
Status : At Risk
=cut
sub fetch_all_by_Slice_Sample_depth{
my $self = shift;
my $slice = shift;
my @args = @_; #can contain individual and/or level
my $rcs;
if(!ref($slice) || !$slice->isa('Bio::EnsEMBL::Slice')) {
throw('Bio::EnsEMBL::Slice arg expected');
}
if (defined $args[0]){ #contains, at least, 1 parameter, either a Individual or the level
my $constraint;
my $levels = $self->get_coverage_levels();
if (defined $args[1]){ #contains both parameters, first the Individual and second the level
if(!ref($args[0]) || !$args[0]->isa('Bio::EnsEMBL::Variation::Sample')) {
throw('Bio::EnsEMBL::Variation::Sample arg expected');
}
if(!defined($args[0]->dbID())) {
throw("Sample arg must have defined dbID");
}
if ((grep {$args[1] == $_} @{$levels}) > 0){
$constraint = "rc.sample_id = " . $args[0]->dbID . " AND rc.level = " . $args[1];
# $constraint = "rc.sample_id = ? AND rc.level = ?";
# $self->bind_param_generic_fetch($args[0]->dbID,SQL_INTEGER);
# $self->bind_param_generic_fetch($args[1],SQL_INTEGER);
}
else{
warning("Level must be a number of: " . join(",", @{$levels}));
return [];
}
}
else{ #there is just 1 argument, can either be the Individual or the level
if (!ref($args[0])){
#it should just contain the level
if ((grep {$args[0] == $_} @{$levels}) > 0){
$constraint = "rc.level = " . $args[0];
#$constraint = "rc.level = ? ";
#$self->bind_param_generic_fetch($args[0],SQL_INTEGER);
}
else{
warning("Level must be a number of: " . join(",", @{$levels}));
return [];
}
}
else{
#it should contain the Individual
if (!$args[0]->isa('Bio::EnsEMBL::Variation::Sample')){
throw('Bio::EnsEMBL::Variation::Sample arg expected');
}
$constraint = "rc.sample_id = " . $args[0]->dbID;
#$constraint = "rc.sample_id = ?";
#$self->bind_param_generic_fetch($args[0]->dbID,SQL_INTEGER);
}
}
$rcs = $self->fetch_all_by_Slice_constraint($slice,$constraint);
return $rcs;
}
#call the method fetch_all_by_Slice
$rcs = $self->fetch_all_by_Slice($slice);
return $rcs;
}
#returns a list of regions that are covered by all the sample given
sub fetch_all_regions_covered{
my $self = shift;
my $slice = shift;
my $samples = shift; #listref of sample names to get the coverage from
my $ind_adaptor = $self->db->get_IndividualAdaptor;
my $range_registry = [];
my $max_level = scalar(@{$samples});
_initialize_range_registry($range_registry,$max_level);
foreach my $sample_name (@{$samples}){
my $sample = shift@{$ind_adaptor->fetch_all_by_name($sample_name)};
my $coverage = $self->fetch_all_by_Slice_Sample_depth($slice,$sample,1); #get coverage information
foreach my $cv_feature (@{$coverage}){
my $range = [$cv_feature->seq_region_start,$cv_feature->seq_region_end]; #store toplevel coordinates
_register_range_level($range_registry,$range,1,$max_level);
}
}
return $range_registry->[$max_level]->get_ranges(1);
}
=head2 get_coverage_levels
Args : none
Example : my @coverage_levels = @{$rca->fetch_coverage_levels()};
Description : Gets the read coverage depths calculated in the database
ReturnType : listref of integer
Exceptions : none
Caller : general
Status : At Risk
=cut
sub get_coverage_levels{
my $self = shift;
my @levels;
my $level_coverage;
my $sth = $self->prepare(qq{SELECT meta_value from meta where meta_key = 'read_coverage.coverage_level'
});
$sth->execute();
$sth->bind_columns(\$level_coverage);
while ($sth->fetch()){
push @levels, $level_coverage;
}
$sth->finish();
return \@levels;
}
sub _initialize_range_registry{
my $range_registry = shift;
my $max_level = shift;
foreach my $level (1..$max_level){
$range_registry->[$level] = Bio::EnsEMBL::Mapper::RangeRegistry->new();
}
return;
}
sub _register_range_level{
my $range_registry = shift;
my $range = shift;
my $level = shift;
my $max_level = shift;
return if ($level > $max_level);
my $rr = $range_registry->[$level];
my $pair = $rr->check_and_register(1,$range->[0],$range->[1]);
my $pair_inverted = _invert_pair($range,$pair);
return if (!defined $pair_inverted);
foreach my $inverted_range (@{$pair_inverted}){
_register_range_level($range_registry,$inverted_range,$level+1, $max_level);
}
}
#for a given range and the one covered, returns the inverted
sub _invert_pair{
my $range = shift; #initial range of the array
my $pairs = shift; #listref with the pairs that have been added to the range
my @inverted_pairs;
my $inverted;
my $rr = Bio::EnsEMBL::Mapper::RangeRegistry->new();
foreach my $pair (@{$pairs}){
$rr->check_and_register(1,$pair->[0],$pair->[1]);
}
return $rr->check_and_register(1,$range->[0],$range->[1]); #register again the range
}
sub _tables{ return (['read_coverage','rc']
)}
sub _columns{
return qw (rc.seq_region_id rc.seq_region_start rc.seq_region_end rc.level rc.sample_id);
}
sub _objs_from_sth{
my ($self, $sth, $mapper, $dest_slice) = @_;
my $sa = $self->db()->dnadb()->get_SliceAdaptor();
#
# This code is ugly because an attempt has been made to remove as many
# function calls as possible for speed purposes. Thus many caches and
# a fair bit of gymnastics is used.
#
my ($seq_region_id, $seq_region_start, $seq_region_end, $level, $sample_id);
my $seq_region_strand = 1; #assume all reads are in the + strand
$sth->bind_columns(\$seq_region_id, \$seq_region_start, \$seq_region_end, \$level, \$sample_id);
my @features;
my %seen_pops; #hash containing the populations seen
my %slice_hash;
my %sr_name_hash;
my %sr_cs_hash;
my $asm_cs;
my $cmp_cs;
my $asm_cs_vers;
my $asm_cs_name;
my $cmp_cs_vers;
my $cmp_cs_name;
if($mapper) {
$asm_cs = $mapper->assembled_CoordSystem();
$cmp_cs = $mapper->component_CoordSystem();
$asm_cs_name = $asm_cs->name();
$asm_cs_vers = $asm_cs->version();
$cmp_cs_name = $cmp_cs->name();
$cmp_cs_vers = $cmp_cs->version();
}
my $dest_slice_start;
my $dest_slice_end;
my $dest_slice_strand;
my $dest_slice_length;
if($dest_slice) {
$dest_slice_start = $dest_slice->start();
$dest_slice_end = $dest_slice->end();
$dest_slice_strand = $dest_slice->strand();
$dest_slice_length = $dest_slice->length();
}
my $read_coverage;
FEATURE: while ($sth->fetch()){
#get the population_adaptor object
my $pa = $self->db()->get_PopulationAdaptor();
my $ia = $self->db()->get_IndividualAdaptor();
#get the slice object
my $slice = $slice_hash{"ID:".$seq_region_id};
if(!$slice) {
$slice = $sa->fetch_by_seq_region_id($seq_region_id);
$slice_hash{"ID:".$seq_region_id} = $slice;
$sr_name_hash{$seq_region_id} = $slice->seq_region_name();
$sr_cs_hash{$seq_region_id} = $slice->coord_system();
}
#
# remap the feature coordinates to another coord system
# if a mapper was provided
#
if($mapper) {
my $sr_name = $sr_name_hash{$seq_region_id};
my $sr_cs = $sr_cs_hash{$seq_region_id};
($sr_name,$seq_region_start,$seq_region_end,$seq_region_strand) =
$mapper->fastmap($sr_name, $seq_region_start, $seq_region_end,
$seq_region_strand, $sr_cs);
#skip features that map to gaps or coord system boundaries
next FEATURE if(!defined($sr_name));
#get a slice in the coord system we just mapped to
if($asm_cs == $sr_cs || ($cmp_cs != $sr_cs && $asm_cs->equals($sr_cs))) {
$slice = $slice_hash{"NAME:$sr_name:$cmp_cs_name:$cmp_cs_vers"} ||=
$sa->fetch_by_region($cmp_cs_name, $sr_name,undef, undef, undef,
$cmp_cs_vers);
} else {
$slice = $slice_hash{"NAME:$sr_name:$asm_cs_name:$asm_cs_vers"} ||=
$sa->fetch_by_region($asm_cs_name, $sr_name, undef, undef, undef,
$asm_cs_vers);
}
}
#
# If a destination slice was provided convert the coords
# If the dest_slice starts at 1 and is foward strand, nothing needs doing
#
if($dest_slice) {
if($dest_slice_start != 1 || $dest_slice_strand != 1) {
if($dest_slice_strand == 1) {
$seq_region_start = $seq_region_start - $dest_slice_start + 1;
$seq_region_end = $seq_region_end - $dest_slice_start + 1;
} else {
my $tmp_seq_region_start = $seq_region_start;
$seq_region_start = $dest_slice_end - $seq_region_end + 1;
$seq_region_end = $dest_slice_end - $tmp_seq_region_start + 1;
$seq_region_strand *= -1;
}
#throw away features off the end of the requested slice
if($seq_region_end < 1 || $seq_region_start > $dest_slice_length) {
next FEATURE;
}
}
$slice = $dest_slice;
}
my $sample;
if($sample_id){
$sample = $seen_pops{$sample_id} ||= $pa->fetch_by_dbID($sample_id);
$sample = $seen_pops{$sample_id} ||= $ia->fetch_by_dbID($sample_id);
}
$read_coverage = Bio::EnsEMBL::Variation::ReadCoverage->new(-start => $seq_region_start,
-end => $seq_region_end,
-slice => $slice,
-adaptor => $self,
-level => $level,
-sample => $sample,
-strand => 1
);
push @features, $read_coverage;
}
$sth->finish();
return \@features;
}
1;