Bio::EnsEMBL::Variation::DBSQL ReadCoverageCollectionAdaptor
SummaryIncluded librariesPackage variablesSynopsisDescriptionGeneral documentationMethods
Toolbar
WebCvsRaw content
Summary
Bio::EnsEMBL::Variation::DBSQL::ReadCoverageCollectionAdaptor
Package variables
Privates (from "my" definitions)
%Printable = ( "\\"=>'\\', "\r"=>'r', "\n"=>'n', "\t"=>'t', "\""=>'"' )
Included modules
Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor
Bio::EnsEMBL::Mapper::RangeRegistry
Bio::EnsEMBL::Utils::Exception qw ( throw warning )
Bio::EnsEMBL::Variation::ReadCoverageCollection
Inherit
Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor
Synopsis
  $vdb = Bio::EnsEMBL::Variation::DBSQL::DBAdaptor->new(...);
$db = Bio::EnsEMBL::DBSQL::DBAdaptor->new(...);
# tell the variation database where core database information can be # be found $vdb->dnadb($db); $rca = $vdb->get_ReadCoverageCollectionAdaptor(); $sa = $db->get_SliceAdaptor(); $pa = $vdb->get_PopulationAdaptor(); # get read coverage in a region for a certain population in in a certain level $slice = $sa->fetch_by_region('chromosome', 'X', 1e6, 2e6); $population = $pa->fetch_by_name("UNKNOWN"); $level = 1; foreach $rc (@{$vfa->fetch_all_by_Slice_Sample($slice,$population,$level)}) { print $rc->start(), '-', $rc->end(), "\n"; }
Description
This adaptor provides database connectivity for ReadCoverageCollection objects.
Coverage information for reads can be obtained from the database using this
adaptor. See the base class BaseFeatureAdaptor for more information.
Methods
_fetch_all_by_Slice_WindowSize_SampleId
No description
Code
_find_min_max_score
No description
Code
_unpack_strings
No description
Code
escape
No description
Code
fetch_all_by_Slice_SampleId
No description
Code
Methods description
None available.
Methods code
_fetch_all_by_Slice_WindowSize_SampleIddescriptionprevnextTop
sub _fetch_all_by_Slice_WindowSize_SampleId {
  my ($self,$slice,$window_size,$sample_id) = @_;

  my $rcc = [];
  my $rcs = [];
  my $reads_string_avg=[];
  my $reads_string_min=[];
  my $reads_string_max=[];

  my ($window_start,$window_end);

  my $last_sql = (defined $sample_id) ? "AND sample_id = ?" : '';
  my $sql = qq{SELECT window_size,window_start,window_end,read_coverage_string_avg,read_coverage_string_min,read_coverage_string_max,sample_id
               FROM read_coverage_collection
               WHERE seq_region_id = ?
               AND window_size = ?
               AND window_end > ?
               AND window_start < ?
               $last_sql
              };

  my $sth = $self->prepare($sql);
  $sth->execute($slice->get_seq_region_id,$window_size,$slice->start,$slice->end,$sample_id) if defined $sample_id;
  $sth->execute($slice->get_seq_region_id,$window_size,$slice->start,$slice->end) if !defined $sample_id;
  #these bind_param are not working here
#$sth->bind_param(1,$slice->get_seq_region_id,SQL_INTEGER);
#$sth->bind_param(2,$window_size,SQL_INTEGER);
#$sth->bind_param(3,$slice->start,SQL_INTEGER);
#$sth->bind_param(4,$slice->end,SQL_INTEGER);
while (my @arrays = $sth->fetchrow_array()) { $window_size = $arrays[0]; $window_start = $arrays[1]; $window_end = $arrays[2]; $reads_string_avg = _unpack_strings($arrays[3]); $reads_string_min = _unpack_strings($arrays[4]); $reads_string_max = _unpack_strings($arrays[5]); $sample_id = $arrays[6] if $arrays[6]; #need to find the start_offset for slice_start and end_offset for slice_end
#in the read coverage collection row
#print "window_size is $window_size,window_start is $window_start,window_end is $window_end,avg is @$reads_string_avg,min is @$reads_string_min,max is @$reads_string_max\n";
for (my $j = 0; $j < @$reads_string_avg; $j++) { my $rc = {}; my $seq_region_start = $window_start + $window_size * $j ; my $seq_region_end = $seq_region_start + $window_size -1; next if ($seq_region_start > $slice->end or $seq_region_end < $slice->start); #covert seq_region_start and seq_region_end tp a start and end relative to the slice
my ($start,$end); if ($slice->strand == -1) { $start = $slice->end - $seq_region_end + 1; $end = $slice->end - $seq_region_start + 1; } else { $start = $seq_region_start - $slice->start + 1; $end = $seq_region_end - $slice->start +1; } $rc->{'start'} = $start; $rc->{'end'} = $end; $rc->{'seq_region_start'} = $seq_region_start; $rc->{'seq_region_end'} = $seq_region_end; $rc->{'strand'} = $slice->strand; $rc->{'read_coverage_avg'} = $reads_string_avg->[$j]; $rc->{'read_coverage_min'} = $reads_string_min->[$j]; $rc->{'read_coverage_max'} = $reads_string_max->[$j]; push @{$rcs}, $rc; } }#end while loop
foreach my $rc (@$rcs) { push @$rcc,$self-> _create_feature_fast('Bio::EnsEMBL::Variation::ReadCoverageCollection', {'adaptor' => $self, 'slice' => $slice, 'start' => $rc->{'start'}, 'end' => $rc->{'end'}, 'strand' => $rc->{'strand'}, 'window_size' => $window_size, 'window_start' => $window_start, 'window_end' => $window_end, 'seq_region_start' => $rc->{'seq_region_start'}, 'seq_region_end' => $rc->{'seq_region_end'}, 'seq_region_strand' => 1, 'sample_id' => (defined $sample_id) ? $sample_id : '', 'read_coverage_avg' => $rc->{'read_coverage_avg'}, 'read_coverage_min' => $rc->{'read_coverage_min'}, 'read_coverage_max' => $rc->{'read_coverage_max'}, }); } #sort into numerical order based on position
my @sorted_features = sort {$a->{start} <=> $b->{start}} @$rcc; return\@sorted_features;
}
_find_min_max_scoredescriptionprevnextTop
sub _find_min_max_score {
    my ($scores) = @_;
    my $min; 
    my $max;

    foreach my $score (@$scores) {
	#find min and max of diff scores
if (defined $score->read_coverage_min) { #if min hasn't been defined yet, then define min and max
unless (defined $min and defined $max) { $min = $score->read_coverage_min; $max = $score->read_coverage_max; } if ($min > $score->read_coverage_min) { $min = $score->read_coverage_min; } if ($max < $score->read_coverage_max) { $max = $score->read_coverage_max; } } } return ($min, $max); } 1;
}
_unpack_stringsdescriptionprevnextTop
sub _unpack_strings {
  my $string = shift;

  my $pack_type = "n" x (length($string)/2);
my @arrays = unpack($pack_type,$string); return\@ arrays;
}
escapedescriptionprevnextTop
sub escape {
($) {  local $_ = ( defined $_[0] ? $_[0] : '' );
  s/([\r\n\t\\\"])/\\$Printable{$1}/sg;
  return $_;
}
fetch_all_by_Slice_SampleIddescriptionprevnextTop
sub fetch_all_by_Slice_SampleId {
    my $self = shift;
    my ($slice,$sample_id,$display_size,$display_type,$window_size) = @_;

     my $rcc = [];

    if(!ref($slice) || !$slice->isa('Bio::EnsEMBL::Slice')) {
      throw('Bio::EnsEMBL::Slice arg expected');
    }
#    if (defined $sample) {
# if (!ref($sample) || !$sample->isa('Bio::EnsEMBL::Variation::Sample')) {
# throw('Bio::EnsEMBL::Variation::Sample arg expected');
# }
# if (! defined($sample->dbID())) {
# throw("Sample arg must have defined dbID");
# }
# }
#default display_size is 700
if (!defined $display_size) { $display_size = 700; } #default display_mode is AVERAGE
if (!defined $display_type) { $display_type = "AVERAGE"; } #set up bucket object for storing bucket_size number of scores
my $bucket_size = ($slice->length)/$display_size;
#default window size is the largest bucket that gives at least
#display_size values ie get speed but reasonable resolution
my @window_sizes = (50,500,5000); #my $num_windows = 100;
#check if valid window_size
my $found = 0; if (defined $window_size) { foreach my $win_size (@window_sizes) { if ($win_size == $window_size) { $found = 1; last; } } if (!$found) { warning("Invalid window_size $window_size"); return $rcc; } } if (!defined $window_size) { #set window_size to be the largest for when for loop fails
$window_size = $window_sizes[scalar(@window_sizes)-1]; for (my $i = 1; $i < scalar(@window_sizes); $i++) { if ($bucket_size < $window_sizes[$i]) { $window_size = $window_sizes[$i-1]; last; } } } #print "window_size is $window_size\n";
$rcc = $self->_fetch_all_by_Slice_WindowSize_SampleId($slice,$window_size,$sample_id) if defined $sample_id; $rcc = $self->_fetch_all_by_Slice_WindowSize_SampleId($slice,$window_size) if !$sample_id; if (scalar(@$rcc ==0)) { return $rcc; } #Find the min and max scores for y axis scaling. Save in first
#read_coverage_collection object
my ($min_y_axis, $max_y_axis) = _find_min_max_score($rcc); #add min and max scores to the first read_coverage_collection object
if ((scalar @$rcc) > 0) { $rcc->[0]->y_axis_min($min_y_axis); $rcc->[0]->y_axis_max($max_y_axis); } return ($rcc);
}
General documentation
AUTHOR - Yuan ChenTop
CONTACTTop
Post questions to the Ensembl development list ensembl-dev@ebi.ac.uk
fetch_all_by_Slice_SampleTop
    Arg[0]      : Bio::EnsEMBL::Slice $slice
Arg[1] : (optional) $sample_id
Arg[2] : (optional) int $display_size (default 700)
Arg[3] : (optional) int $display_type (one of "AVERAGE" or "MAX","MIN") (default "AVERAGE")
Arg[4] : (optional) int $window_size
Example : my $reads_coverages = $rcca->fetch_all_by_Slice_SampleId($slice,$sample_id,$display_size,$display_type,$window_size);
Window_size defines which set of pre-averaged scores to use.
Valid values are 50, 500 or 5000. There is no need to define
the window_size because the program will select the most
appropriate window_size to use based on the slice_length and the
display_size.
Description : Gets all the read coverage collections for a given sample(strain or individual) in a given slice
ReturnType : listref of Bio::EnsEMBL::Variation::ReadCoverageCollection
Exceptions : thrown on bad arguments
Caller : general
Status : At Risk