DensityFeatureAdaptor documentation.

sub _objs_from_sth {

  my ($self, $sth, $mapper, $dest_slice) = @_;

  #
  # This code is ugly because an attempt has been made to remove as many
  # function calls as possible for speed purposes.  Thus many caches,
  # and a fair bit of gymnastics have been used.
  #

  my $sa = $self->db()->get_SliceAdaptor();
  my $dta = $self->db()->get_DensityTypeAdaptor();

  my @features;
  my %dtype_hash;
  my %slice_hash;
  my %sr_name_hash;
  my %sr_cs_hash;

  my($density_feature_id, $seq_region_id, $seq_region_start, $seq_region_end,
     $density_value, $density_type_id );

  $sth->bind_columns(\$density_feature_id,\$ seq_region_id,\$ seq_region_start,\$
                     seq_region_end,\$ density_value,\$ density_type_id);

  my $asm_cs;
  my $cmp_cs;
  my $asm_cs_vers;
  my $asm_cs_name;
  my $cmp_cs_vers;
  my $cmp_cs_name;
  if($mapper) {
    $asm_cs = $mapper->assembled_CoordSystem();
    $cmp_cs = $mapper->component_CoordSystem();
    $asm_cs_name = $asm_cs->name();
    $asm_cs_vers = $asm_cs->version();
    $cmp_cs_name = $cmp_cs->name();
    $cmp_cs_vers = $cmp_cs->version();
  }

  my $dest_slice_start;
  my $dest_slice_end;
  my $dest_slice_strand;
  my $dest_slice_length;
  my $dest_slice_sr_name;
  my $dest_slice_sr_id;

  if($dest_slice) {
    $dest_slice_start  = $dest_slice->start();
    $dest_slice_end    = $dest_slice->end();
    $dest_slice_strand = $dest_slice->strand();
    $dest_slice_length = $dest_slice->length();
    $dest_slice_sr_name = $dest_slice->seq_region_name();
    $dest_slice_sr_id  = $dest_slice->get_seq_region_id();
  }

  FEATURE: while($sth->fetch()) {
    #get the density type object
    my $density_type = $dtype_hash{$density_type_id} ||=
      $dta->fetch_by_dbID($density_type_id);

    #get the slice object
    #need to get the internal_seq_region, if present
    $seq_region_id = $self->get_seq_region_id_internal($seq_region_id);
    my $slice = $slice_hash{"ID:".$seq_region_id};
    if(!$slice) {
      $slice = $sa->fetch_by_seq_region_id($seq_region_id);
      $slice_hash{"ID:".$seq_region_id} = $slice;
      $sr_name_hash{$seq_region_id} = $slice->seq_region_name();
      $sr_cs_hash{$seq_region_id} = $slice->coord_system();
    }

    my $sr_name = $sr_name_hash{$seq_region_id};
    my $sr_cs   = $sr_cs_hash{$seq_region_id};
 
   #
    # remap the feature coordinates to another coord system
    # if a mapper was provided
    #
    if($mapper) {

      my $len = $seq_region_end - $seq_region_start + 1;

      my @coords =
        $mapper->map($sr_name, $seq_region_start, $seq_region_end,1, $sr_cs);

      #filter out gaps
      @coords = grep {!$_->isa('Bio::EnsEMBL::Mapper::Gap')} @coords;

      #throw out density features mapped to gaps, or split
      next FEATURE if(@coords != 1);

      $seq_region_start  = $coords[0]->{'start'};
      $seq_region_end    = $coords[0]->{'end'};
      $seq_region_id     = $coords[0]->{'id'};

      if($density_type->value_type() eq 'sum') {
        #adjust density value so it reflects length of feature actually used
        my $newlen = $seq_region_end - $seq_region_start +1;
        $density_value *= $newlen/$len if($newlen != $len);
      }

      #get a slice in the coord system we just mapped to
#      if($asm_cs == $sr_cs || ($cmp_cs != $sr_cs && $asm_cs->equals($sr_cs))) {
        $slice = $slice_hash{"ID:".$seq_region_id} ||=
          $sa->fetch_by_seq_region_id($seq_region_id);
#      } else {
#        $slice = $slice_hash{"NAME:$sr_name:$asm_cs_name:$asm_cs_vers"} ||=
#          $sa->fetch_by_region($asm_cs_name, $sr_name, undef, undef, undef,
#                               $asm_cs_vers);
#      }
    }

    #
    # If a destination slice was provided convert the coords
    # If the dest_slice starts at 1 and is foward strand, nothing needs doing
    #
    if($dest_slice) {
      if($dest_slice_start != 1 || $dest_slice_strand != 1) {
        if($dest_slice_strand == 1) {
          $seq_region_start = $seq_region_start - $dest_slice_start + 1;
          $seq_region_end   = $seq_region_end   - $dest_slice_start + 1;
        } else {
          my $tmp_seq_region_start = $seq_region_start;
          $seq_region_start = $dest_slice_end - $seq_region_end + 1;
          $seq_region_end   = $dest_slice_end - $tmp_seq_region_start + 1;
        }
      }

      #throw away features entirely off the end of the requested slice
      if($seq_region_end < 1 || $seq_region_start > $dest_slice_length ||
	 ( $dest_slice_sr_id ne $seq_region_id )) {
	next FEATURE;
      }
      $slice = $dest_slice;
    }

    push( @features,
          $self->_create_feature( 'Bio::EnsEMBL::DensityFeature', {
                                    -dbID       => $density_feature_id,
                                    -adaptor    => $self,
                                    -start      => $seq_region_start,
                                    -end        => $seq_region_end,
                                    -seq_region => $slice,
                                    -density_value => $density_value,
                                    -density_type  => $density_type
                                  } ) );

  }

  return\@ features;

}

sub fetch_all_by_Slice {

  my ($self, $slice, $logic_name, $num_blocks, $interpolate, $max_ratio) = @_;

  if(defined($num_blocks) && $num_blocks < 1) {
    warning("Invalid number of density blocks [$num_blocks] requested.\n" .
	   "Returning empty list.");
    return [];
  }

  $num_blocks ||= 50;
  my $length = $slice->length();

  my $wanted_block_size = POSIX::ceil($length/$num_blocks);

  #
  # get out all of the density types and choose the one with the
  # block size closest to our desired block size
  #

  my $dta = $self->db()->get_DensityTypeAdaptor();

  my @dtypes = @{$dta->fetch_all_by_logic_name($logic_name)};
  if( ! @dtypes ){
    my @all_dtypes =  @{ $dta->fetch_all() };
    @all_dtypes or warning( "No DensityTypes in $dta" ) && return [];
    my $valid_list = join( ", ", map{$_->analysis->logic_name} @all_dtypes );
    warning( "No DensityTypes for logic name $logic_name. ".
	     "Select from $valid_list" );
    return [];
  }

  my $best_ratio   = undef;
  my $density_type = undef;
  my $best_ratio_large = undef;
  my $density_type_large = undef;
  my %dt_ratio_hash;

  foreach my $dt (@dtypes) {

    my $ratio;
    if( $dt->block_size() > 0 ) {
      $ratio = $wanted_block_size/$dt->block_size();
    } else {
      # This is only valid if the requested seq_region is the one the 
      # features are stored on. Please use sensibly. Or find better implementation.

      my $block_size = $slice->seq_region_length() / $dt->region_features();
      $ratio = $wanted_block_size / $block_size;
    }
    
    # we prefer to use a block size that's smaller than the required one
    # (better results on interpolation).
    # give larger bits a disadvantage and make them comparable
    if( $ratio < 1 ) {
      $ratio = 5/$ratio;
    }

    $dt_ratio_hash{ $ratio } = $dt;
  }

  $best_ratio = (sort {$a<=>$b} (keys %dt_ratio_hash))[0];
  
  #the ratio was not good enough, or this logic name was not in the
  #density type table, return an empty list
  if(!defined($best_ratio) ||
     (defined($max_ratio) && $best_ratio > $max_ratio)) {
    return [];
  }

  $density_type = $dt_ratio_hash{$best_ratio};

  my $constraint = "df.density_type_id = " . $density_type->dbID();

  my @features =
    @{$self->fetch_all_by_Slice_constraint($slice,$constraint)};

  return\@ features if(!$interpolate);

  #interpolate the features into new features of a different size
  my @out;
  #sort the features on start position
  @features = sort({$a->start() <=> $b->end()} @features);

  #resize the features that were returned
  my $start = 1;
  my $end   = $start+$wanted_block_size-1;

  my $value_type = $density_type->value_type();

  # create a new density type object for the interpolated features that
  # is not stored in the database
  $density_type = Bio::EnsEMBL::DensityType->new
    (-VALUE_TYPE => $value_type,
     -ANALYSIS   => $density_type->analysis(),
     -BLOCK_SIZE => $wanted_block_size);

  while($start < $length) {
#    $end = $length if($end > $length);

    my $density_value = 0.0;
    my ($f, $fstart, $fend, $portion);
    my @dvalues;

    #construct a new feature using all of the old density features that
    #we overlapped
    while(($f = shift(@features)) && $end > $f->{'start'}) {

      #what portion of this feature are we using to construct our new block?
      $fstart = ($f->{'start'} < $start) ? $start : $f->{'start'};
      $fend   = ($f->{'end'}   > $end  ) ? $end   : $f->{'end'};
      $fend   = $length if($fend > $length);

      if($value_type eq 'sum') {

        $portion = ($fend-$fstart+1)/$f->length();

        #take a percentage of density value, depending on how much of the
        #feature we overlapped
        $density_value += $portion * $f->{'density_value'};

      } elsif($value_type eq 'ratio') {

        #maintain a running total of the length used from each feature
        #and its value
        push(@dvalues,[$fend-$fstart+1,$f->{'density_value'}]);

      } else {
        throw("Unknown density value type [$value_type].");
      }

      #do not want to look at next feature if we only used part of this one:
      last if($fend < $f->{'end'});
    }

    #if we did not completely overlap the last feature, put it back on so
    #it can be partially used by the next block
    if(defined($f) && (!defined($fend) || $fend < $f->{'end'})) {
      unshift(@features, $f);
    }

    if($value_type eq 'ratio') {
      #take a weighted average of the all the density values of the features
      #used to construct this one
      my $total_len = $end - $start + 1;
      if($total_len > 0) {
        foreach my $pair (@dvalues) {
          my ($dlen, $dval) = @$pair;
          $density_value += $dval * ($dlen/$total_len);
        }
      }
    }

    # interpolated features are not stored in the db so do not set the dbID
    # or the adaptor
    push @out, Bio::EnsEMBL::DensityFeature->new
      (-seq_region    => $slice,
       -start         => $start,
       -end           => $end,
       -density_type  => $density_type,
       -density_value => $density_value);

    $start = $end + 1;
    $end  += $wanted_block_size;
  }

  return\@ out;

}

sub store {

  my ($self,@df) = @_;

  if( scalar(@df) == 0 ) {
    throw("Must call store with list of DensityFeatures");
  }
#mysql> desc density_feature;
#+--------------------+---------+------+-----+---------+----------------+
#| Field              | Type    | Null | Key | Default | Extra          |
#+--------------------+---------+------+-----+---------+----------------+
#| density_feature_id | int(11) |      | PRI | NULL    | auto_increment |
#| density_type_id    | int(11) |      | MUL | 0       |                |
#| seq_region_id      | int(11) |      |     | 0       |                |
#| seq_region_start   | int(11) |      |     | 0       |                |
#| seq_region_end     | int(11) |      |     | 0       |                |
#| density_value      | float   |      |     | 0       |                |
#+--------------------+---------+------+-----+---------+----------------+

  my $sth = $self->prepare
    ("INSERT INTO density_feature (seq_region_id, seq_region_start, " .
                                  "seq_region_end, density_type_id, " .
                                  "density_value) " .
     "VALUES (?,?,?,?,?)");

  my $db = $self->db();
  my $analysis_adaptor = $db->get_AnalysisAdaptor();

 FEATURE: foreach my $df ( @df ) {

    if( !ref $df || !$df->isa("Bio::EnsEMBL::DensityFeature") ) {
      throw("DensityFeature must be an Ensembl DensityFeature, " .
            "not a [".ref($df)."]");
    }
    
    # we dont store 0 value density features
    next if( $df->density_value == 0 );
    if($df->is_stored($db)) {
      warning("DensityFeature [".$df->dbID."] is already stored" .
              " in this database.");
      next FEATURE;
    }

    if(!defined($df->density_type)) {
      throw("A density type must be attached to the features to be stored.");
    }

    #store the density_type if it has not been stored yet

    if(!$df->density_type->is_stored($db)) {
      my $dta = $db->get_DensityTypeAdaptor();
      $dta->store($df->density_type());
    }

    my $original = $df;
    my $seq_region_id;
    ($df, $seq_region_id) = $self->_pre_store($df);

    $sth->bind_param(1,$seq_region_id,SQL_INTEGER);
    $sth->bind_param(2,$df->start,SQL_INTEGER);
    $sth->bind_param(3,$df->end,SQL_INTEGER);
    $sth->bind_param(4,$df->density_type->dbID,SQL_INTEGER);
    $sth->bind_param(5,$df->density_value,SQL_FLOAT);
    $sth->execute();

    $original->dbID($sth->{'mysql_insertid'});
    $original->adaptor($self);
  }

}

_columns	No description	Code
_objs_from_sth	No description	Code
_tables	No description	Code
fetch_Featureset_by_Slice	Description	Code
fetch_all_by_Slice	Description	Code
list_dbIDs	Description	Code
new	Description	Code
store	Description	Code