Bio::EnsEMBL::Collection - Abstract base class for feature collection
Included modules
Bio::EnsEMBL::Utils::Argument ( ' rearrange ' )
Bio::EnsEMBL::Utils::Exception ( ' throw ' )
  # Create an exon collection adaptor.
my $collection =
$registry->get_adaptor( 'Human', 'Core', 'ExonCollection' );
# Create a slice. my $slice = $slice_adaptor->fetch_by_region( 'Chromosome', '2', 1, 300_000 ); # Fetch the exons, represented as simple arrays of attributes, on # the slice. my @exons = @{ $collection->fetch_all_by_Slice($slice) }; # Fetch the exons on the slice and count them into 50 bins (each # bin contains the number of exons overlapping that bin). my @bins = @{ $collection->fetch_bins_by_Slice( -slice => $slice, -nbins => 50, -method => 'density' ) }; # Fetch the exons on the slice and calculate the coverage of 50 # bins (each bin contains the coverage of that bin by any exon as # a number between 0 and 1). my @bins = @{ $collection->fetch_bins_by_Slice( -slice => $slice, -nbins => 50, -method => 'coverage' ) };
This is the abstract base class for feature collections.
In short, this class replaces the object creation routines in
Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor with routines that creates
flat arrays. It also provides a way of binning features on a slice in
various different ways (see below).
A feature collection provides a compact representation of features of
a particular type on a slice. Each feature is represented by a short
array of attribute data. This data is divided into two halfs:
    Basic feature representation.
    Extended feature representation.
The basic feature representation is common to all features of any type
and consists of a minimal set of values. Each feature is represented by
an array that contains at least the following data (in this order)
    Ensembl internal database ID.
    Ensembl internal sequence region ID.
    Feature start position.
    Feature end position.
    Feature strand.
The module defines a number of constants that may be used in place
of the index numbers 0 to 4: FEATURE_DBID, FEATURE_SEQREGIONID,
$f->[Bio::EnsEMBL::Collection::FEATURE_END] will thus be the end
position for the feature that the array @{$f} represents.
The position of the feature is in the same coordinate system as the
slice used to fetch the features. A sub-class of this abstract base class will specify further
data to be added to the features in order to account for the
particular feature type. A feature from a gene feature collection
(Bio::EnsEMBL::Collection::Gene) might, for example, contain the Ensembl
Stable ID of the gene.
The extended feature representation is defined by the method
_create_feature() or _create_feature_fast() which is implemented by the
sub-class. The extended representation is also documented in the POD of
each sub-class. A light-weight feature is a feature whose representation does not
contain the extended feature representation.
Light-weight features may be fetched by setting the lightweight
attribute using the lightweight() method before calling e.g.
Methods code
sub _bin_features {
  my ( $this, @args ) = @_;

  my ( $slice, $nbins, $method_name, $features ) =
    rearrange( [ 'SLICE', 'NBINS', 'METHOD', 'FEATURES' ], @args );

  if ( !defined($features) || !@{$features} ) { return [] }

  if ( !defined($nbins) ) {
    throw('Missing NBINS argument');
  } elsif ( $nbins <= 0 ) {
    throw('Negative or zero NBINS argument');

  $method_name ||= 'count';
  if ( !exists( $VALID_BINNING_METHODS{$method_name} ) ) {
              "Invalid binning method '%s', valid methods are:\n\t%s\n",
              join( "\n\t", sort( keys(%VALID_BINNING_METHODS) ) ) ) );
  my $method = $VALID_BINNING_METHODS{$method_name};

  my $slice_start = $slice->start();

  my $bin_length = ( $slice->end() - $slice_start + 1 )/$nbins;
my @bins; if ( $method == 0 || # 'count' or 'density'
$method == 3 || # 'fractional_count' or 'weight'
$method == 4 # 'coverage'
) { # For binning methods where each bin contain numerical values.
@bins = map( 0, 1 .. $nbins ); } else { # For binning methods where each bin does not contain numerical
# values.
@bins = map( undef, 1 .. $nbins ); } my $feature_index = 0; my @bin_masks; foreach my $feature ( @{$features} ) { my $start_bin = int( ( $feature->[FEATURE_START] - $slice_start )/$bin_length );
my $end_bin = int( ( $feature->[FEATURE_END] - $slice_start )/$bin_length );
if ( $end_bin >= $nbins ) { # This might happen for the very last entry.
$end_bin = $nbins - 1; } if ( $method == 0 ) { # ----------------------------------------------------------------
# For 'count' and 'density'.
for ( my $bin_index = $start_bin; $bin_index <= $end_bin; ++$bin_index ) { ++$bins[$bin_index]; } } # Method 1 & 2 were deleted.
elsif ( $method == 3 ) { # ----------------------------------------------------------------
# For 'fractional_count' and 'weight'.
if ( $start_bin == $end_bin ) { ++$bins[$start_bin]; } else { my $feature_length = $feature->[FEATURE_END] - $feature->[FEATURE_START] + 1; # The first bin...
$bins[$start_bin] += ( ( $start_bin + 1 )*$bin_length - ( $feature->[FEATURE_START] - $slice_start ) )/
# The intermediate bins (if there are any)...
for ( my $bin_index = $start_bin + 1; $bin_index <= $end_bin - 1; ++$bin_index ) { $bins[$bin_index] += $bin_length/$feature_length;
} # The last bin...
$bins[$end_bin] += ( ( $feature->[FEATURE_END] - $slice_start ) - $end_bin*$bin_length + 1 )/$feature_length;
} ## end else [ if ( $start_bin == $end_bin)
} elsif ( $method == 4 ) { # ----------------------------------------------------------------
# For 'coverage'.
my $feature_start = $feature->[FEATURE_START] - $slice_start; my $feature_end = $feature->[FEATURE_END] - $slice_start; if ( !defined( $bin_masks[$start_bin] ) || ( defined( $bin_masks[$start_bin] ) && $bin_masks[$start_bin] != 1 ) ) { # Mask the $start_bin from the start of the feature to the end
# of the bin, or to the end of the feature (whichever occurs
# first).
my $bin_start = int( $start_bin*$bin_length ); my $bin_end = int( ( $start_bin + 1 )*$bin_length - 1 ); for ( my $pos = $feature_start; $pos <= $bin_end && $pos <= $feature_end; ++$pos ) { $bin_masks[$start_bin][ $pos - $bin_start ] = 1; } } for ( my $bin_index = $start_bin + 1; $bin_index <= $end_bin - 1; ++$bin_index ) { # Mark the middle bins between $start_bin and $end_bin as fully
# masked out.
$bin_masks[$bin_index] = 1; } if ( $end_bin != $start_bin ) { if ( !defined( $bin_masks[$end_bin] ) || ( defined( $bin_masks[$end_bin] ) && $bin_masks[$end_bin] != 1 ) ) { # Mask the $end_bin from the start of the bin to the end of
# the feature, or to the end of the bin (whichever occurs
# first).
my $bin_start = int( $end_bin*$bin_length ); my $bin_end = int( ( $end_bin + 1 )*$bin_length - 1 ); for ( my $pos = $bin_start; $pos <= $feature_end && $pos <= $bin_end; ++$pos ) { $bin_masks[$end_bin][ $pos - $bin_start ] = 1; } } } } ## end elsif ( $method == 4 )
} ## end foreach my $feature ( @{$features...
if ( $method == 4 ) { # ------------------------------------------------------------------
# For the 'coverage' method: Finish up by going through @bin_masks
# and sum up the arrays.
for ( my $bin_index = 0; $bin_index < $nbins; ++$bin_index ) { if ( defined( $bin_masks[$bin_index] ) ) { if ( !ref( $bin_masks[$bin_index] ) ) { $bins[$bin_index] = 1; } else { $bins[$bin_index] = scalar( grep ( defined($_), @{ $bin_masks[$bin_index] } ) )/
} } } } return\@ bins; } ## end sub _bin_features
sub _create_feature {
  my ( $this, $feature_type, $args ) = @_;

  my ( $dbid, $start, $end, $strand, $slice ) =
    rearrange( [ 'DBID', 'START', 'END', 'STRAND', 'SLICE' ],
               %{$args} );

  my $feature =
    [ $dbid, $slice->get_seq_region_id(), $start, $end, $strand ];

  return $feature;
sub _create_feature_fast {
  my ( $this, $feature_type, $args ) = @_;

  if ( !defined( $this->{'recent_slice'} )
       || $this->{'recent_slice'} ne $args->{'slice'} )
    $this->{'recent_slice'} = $args->{'slice'};
    $this->{'recent_slice_seq_region_id'} =

  my $feature = [ $args->{'dbID'},
                  $args->{'strand'} ];

  return $feature;

# ======================================================================
# Private methods
sub _remap {
  my ( $this, $features, $mapper, $slice ) = @_;

  if ( scalar( @{$features} ) > 0 ) {
    if ( $slice->get_seq_region_id() !=
         $features->[0][FEATURE_SEQREGIONID] )
      throw(   '_remap() in Bio::EnsEMBL::Collection '
             . 'was unexpectedly expected to be intelligent' );

  return $features;
sub fetch_bins_by_Slice {
  my ( $this, @args ) = @_;

  my ( $slice, $method, $nbins, $logic_name, $lightweight ) =
    rearrange( [ 'SLICE', 'METHOD', 'NBINS', 'LOGIC_NAME' ], @args );

  # Temporarily set the colleciton to be lightweight.
my $old_value = $this->lightweight(); $this->lightweight(1); my $bins = $this->_bin_features( -slice => $slice, -nbins => $nbins, -method => $method, -features => $this->fetch_all_by_Slice( $slice, $logic_name ) ); # Reset the lightweightness to whatever it was before.
$this->lightweight($old_value); return $bins; } # ======================================================================
# Specialization of methods in Bio::EnsEMBL::DBSQL::BaseFeatureAdaptor
sub lightweight {
  my ( $this, $value ) = @_;

  if ( defined($value) ) {
    $this->{'lightweight'} = ( $value != 0 );

  return $this->{'lightweight'} || 0;
