#
# Ensembl web module for EnsEMBL::Web::Text::DensityFeatureParser
#
# Cared for by James Smith <js5@sanger.ac.uk>
#
# Copyright James Smith
#
# You may distribute this module under the same terms as perl itself

=head1 NAME

EnsEMBL::Web::Text::DensityFeatureParser - Density from text based data (URL,file or text)

=head1 SYNOPSIS

    my $dfp = new EnsEMBL::Web::Text::DensityFeatureParser();
       $dfp->no_of_bins(150);
       $dfp->no_of_bins($chr_length/150);
       $dfp->filter( '1' );
       $dfp->current_key( 'default' );

    $dfp->parse( $DATA );

    print "@{[$dfp->feature_types]}\n";

=head1 DESCRIPTION

Handles text based data in standard forms (PSL, BED, ...) and computes chromosomal densities

=head1 CONTACT

James Smith <js5@sanger.ac.uk>

=head1 APPENDIX

The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _

=cut

package EnsEMBL::Web::Text::DensityFeatureParser;

use EnsEMBL::Web::Text::FeatureParser;

@EnsEMBL::Web::Text::DensityFeatureParser::ISA =
  qw(EnsEMBL::Web::Text::FeatureParser);

use strict;
use warnings;
no warnings "uninitialized";

=head2 no_of_bins

 Title   : no_of_bins
 Usage   : $dfp->no_of_bins( 150 ) or $X = $dfp->no_of_bins();
 Function: Get setter for the number of bins to display
 Returns : integer (no of bins)
 Args    : integer - no of bins [optional - setting]

=cut

sub no_of_bins {
  my $self = shift;
  $self->{'_no_of_bins'} = shift if @_;
  return $self->{'_no_of_bins'};
}
=head2 bin_size

 Title   : bin_size
 Usage   : $dfp->bin_size( 150 ) or $X = $dfp->bin_size();
 Function: Get setter for the size of each bin
 Returns : number (size of bin)
 Args    : number - size of bin [optional - setting]

=cut

sub bin_size {
  my $self = shift;
  $self->{'_bin_size'} = shift if @_;
  return $self->{'_bin_size'};
}
=head2 store_feature

 Title   : store_feature
 Usage   : $dfp->store_feature( $type, $feature );
 Function: Adds a feature of given type ($type) to the data, updating the density information
           about the feature and updating the total count for that feature
 Returns : hash ref
 Args    : none

=cut

sub store_feature {
  my ( $self, $key, $feature ) = @_;
  my( $chr, $start, $end ) = ( $feature->seqname, $feature->rawstart, $feature->rawend );
  $start = int($start / $self->{'_bin_size'} );
  $end = int( $end / $self->{'_bin_size'} );
  $end = $self->{'_no_of_bins'} - 1 if $end >= $self->{'_no_of_bins'};
  $self->{'_bins'}{$key}{$chr} ||= [ map { 0 } 1..$self->{'_no_of_bins'} ];
  foreach( $start..$end ) {
    $self->{'_bins'}{$key}{$chr}[$_]++; 
  }
  $self->{'_counts'}{$key}++;
}

=head2 max_values

 Title   : counts
 Usage   : $dfp->max_values
 Function: Returns a hashref of the feature types (key) and max density (value)
 Returns : hash ref
 Args    : none

=cut

sub max_values {
  my $self = shift;
  my %max_value = map {($_,0)} keys %{ $self->{'_counts'} };
  foreach my $type ( $self->feature_types ) {
    foreach my $chr ( keys %{$self->{'_bins'}{$type}} ) {
      foreach ( @{$self->{'_bins'}{$type}{$chr}} ) {
        $max_value{$type} = $_ if $_>$max_value{$type};
      }
    }
  }
  return \%max_value;
}

=head2 feature_types

 Title   : feature_types
 Usage   : $dfp->feature_types
 Function: Return a list of the feature types
 Returns : List
 Args    : none

=cut

sub feature_types    { return keys %{$_[0]{'_counts'}}; }

=head2 features_of_type

 Title   : features_of_type
 Usage   : $dfp->features_of_type( 'type_1' );
 Function: Return a hash reference to chromosomes which have features of type "type_1"
           Each hash ref is keyed on chromosome - and each entry is an array of densities
 Returns : hash ref
 Args    : string (type)

=cut

sub features_of_type { return $_[0]{'_bins'}{$_[1]}; }

=head2 counts

 Title   : counts
 Usage   : $dfp->counts
 Function: Returns a hashref of the feature types (key) and no. of features (value)
 Returns : hash ref
 Args    : none

=cut

sub counts           { return $_[0]{'_counts'}; }

1;