Raw content of Bio::EnsEMBL::Funcgen::ResultSet
#
# Ensembl module for Bio::EnsEMBL::Funcgen::ResultSet
#
# You may distribute this module under the same terms as Perl itself
=head1 NAME
Bio::EnsEMBL::ResultSet - A module to represent ResultSet.
=head1 SYNOPSIS
use Bio::EnsEMBL::Funcgen::ResultSet;
my $result_set = Bio::EnsEMBL::Funcgen::ResultSet->new(
-dbid => $dbid,
-analysis => $analysis,
-table_name => 'experimental_chip',
-table_id => $ec_id,
-type => 'result',
);
=head1 DESCRIPTION
A ResultSet object provides access to a set raw results from an Experiment. A set will be one or more
contiguous chips to be treated as one set, with the same analysis. Duplicate sets will form a separate
result set, as will the same raw data analysed or normalised in a different manner.
=head1 AUTHOR
This module was created by Nathan Johnson.
This module is part of the Ensembl project: /
=head1 CONTACT
Post comments or questions to the Ensembl development list: ensembl-dev@ebi.ac.uk
=head1 METHODS
=cut
#To do
#Change add_table_id to add_ExperimentalChip_Channel?
use strict;
use warnings;
package Bio::EnsEMBL::Funcgen::ResultSet;
use Bio::EnsEMBL::Utils::Argument qw( rearrange );
use Bio::EnsEMBL::Utils::Exception qw( throw );
use Bio::EnsEMBL::Funcgen::Set;
use vars qw(@ISA);
@ISA = qw(Bio::EnsEMBL::Funcgen::Set);
=head2 new
Arg [-ANALYSIS] :
Example : my $feature = Bio::EnsEMBL::Funcgen::ResultSet->new(
-dbid => $dbid,
-analysis => $analysis,
-table_name => 'experimental_chip',
-table_id => $ec_id,
-result_feature_set => 1,
);
Description: Constructor for ResultSet objects.
Returntype : Bio::EnsEMBL::Funcgen::ResultSet
Exceptions : Throws if no experiment_id defined
Caller : General
Status : At risk
=cut
sub new {
my $caller = shift;
my $class = ref($caller) || $caller;
my $self = $class->SUPER::new(@_);
my ($table_name, $table_id, $rf_set)
= rearrange(['TABLE_NAME', 'TABLE_ID', 'RESULLT_FEATURE_SET'], @_);
$self->{'table_id_hash'} = {};
#maybe don't need tha analysis args as mandatory as we're testing in the adaptor store method
if (! $table_name){
throw("Need to pass the following arg:\t-table_name");
}
#do we need some control of creating new objects with dbID and adding result_groups/feature_sets and them storing/updating them
#potential for someone to create one from new using a duplicate dbID and then linking incorrect data to a pre-existing ResultGroup
#we need to verify that each table_name/id in the set is from the same experiment
$self->table_name($table_name);
$self->add_table_id($table_id) if $table_id;
$self->result_feature_set($rf_set) if $rf_set;
return $self;
}
#methods
#set wide display label(predicted_feature) + more wordy label for wiggle tracks?
#defined by experiment type i.e. time course would require timepoint in display label
#deal with this dynamically or have display_label in table
#Need call on type, or fetch all would
#_get_ec_ids or contigsets?
#this should now be an intrinsic part of this class/adaptor
#cell line
#feature_type
#displayable...should have one for the whole set and one for each raw and predicted?
#have analysis as arg? Or do we get all analysis sets?
#we need to be able to set analyses for ResultSets dynamically from DB
#pick up all ResultSets
#displayable field in ResultSets also?
#If we have mixed types in the same experiment then we could get promoter features and histone wiggle tracks displayed togeter
#Not v.good for display purposes? We may want to separate the promoter and histone tracks, or we may want ll the experiment data together but of mixed types.
#We need to be able to pull back the experiment type for each set, therefore this needs setting on an ec level, not an experiment level.
#This is also v.reliant on putting contig set info in place, otherwise we may get mixed chip types in same set.
#get_raw_analysis_name
#get_predicted_feature_analysis_name
#set ResultFeatures and PredictedFeatures in hash keyed by analysis_name?
=head2 result_feature_set
Arg [1] : optional - boolean 0 or 1.
Example : if($rset->result_feature_set){ ...use result_feature table ...};
Description: Getter and setter for the result_feature_set attribute.
Returntype : boolean
Exceptions : None
Caller : General
Status : At Risk
=cut
sub result_feature_set{
my $self = shift;
$self->{'result_feature_set'} = shift if @_;;
return $self->{'result_feature_set'};
}
=head2 table_name
Arg [1] : (optional) string - table_name (experimental_chip or channel)
Example : $result_set->experiment_id($exp_id);
Description: Getter and setter for the table_name for this ResultSet.
Returntype : string
Exceptions : None
Caller : General
Status : At Risk
=cut
sub table_name{
my $self = shift;
if (@_){
if($self->{'table_name'} && ($self->{'table_name'} ne $_[0])){
throw("Cannot mix table name/types of a ResultSet");
}
$self->{'table_name'} = $_[0];
}
return $self->{'table_name'};
}
=head2 add_table_id
Example : $result_set->add_table_id($ec_id, $cc_id);
Description: Caches table_id chip_channel_id to the ResultSet.
The unique chip_channel_id is used to key into the result table,
it also reduces redundancy and enable mapping of results to chips
rather than just the ResultSet. This enables result retrieval
based on chips in the same set which have a differing status.
Returntype : None
Exceptions : Throws if no table_id defined
Caller : General
Status : At Risk
=cut
sub add_table_id {
my ($self, $table_id, $cc_id) = @_;
if (! defined $table_id){
throw("Need to pass a table_id");
}else{
if((exists $self->{'table_id_hash'}->{$table_id}) && (defined $self->{'table_id_hash'}->{$table_id})){
throw("You are attempting to redefine a chip_channel_id which is already defined");
}
$self->{'table_id_hash'}->{$table_id} = $cc_id;
}
return;
}
=head2 table_ids
Example : $result_set->feature_group_id($fg_id);
Description: Getter and setter for the feature_group_id for this ResultSet.
Returntype : int
Exceptions : None
Caller : General
Status : At Risk
=cut
sub table_ids {
my $self = shift;
return [ keys %{$self->{'table_id_hash'}} ];
}
=head2 chip_channel_ids
Example : my @rset_cc_ids = @{$result_set->chip_channel_ids()};
Description: Getter for the chip channel ids for this ResultSet.
Returntype : arrayref
Exceptions : None
Caller : General
Status : At Risk
=cut
sub chip_channel_ids {
my $self = shift;
return [ values %{$self->{'table_id_hash'}} ];
}
=head2 contains
Example : if($result_set->contains($chip_or_channel)){...do some chip or channel erpartions here...};
Description: Returns true if the given Channel or ExperimentalChip is part of this ResultSet
Returntype : boolean
Exceptions : warns if ResultSet table name is not of argument type
Caller : General
Status : At Risk
=cut
sub contains{
my ($self, $chip_channel) = @_;
my $contains = 0;
my @tables = $chip_channel->adaptor->_tables();
my ($table_name, undef) = @{$tables[0]};
if($table_name ne $self->table_name()){
warn("ResultSet(".$self->table_name().") cannot contain ${table_name}s");
}else{
$contains = 1 if (exists $self->{'table_id_hash'}->{$chip_channel->dbID()});
}
return $contains;
}
=head2 get_chip_channel_id
Arg [1] : int - ExperimentalChip dbID
Example : $result_set->get_chip_channel_id($ec_id);
Description: Retrieves a chip_channel_id from the cahce given an ExperimentalChip dbID
Returntype : int
Exceptions : none
Caller : General
Status : At Risk
=cut
sub get_chip_channel_id{
my ($self, $table_id) = @_;
return (exists $self->{'table_id_hash'}->{$table_id}) ? $self->{'table_id_hash'}->{$table_id} : undef;
}
=head2 get_ExperimentalChips
Example : my @ecs = @{$result_set->get_ExperimentalChips()};
Description: Retrieves a chip_channel_id from the cahce given an ExperimentalChip dbID
Returntype : Listref of ExperimentalChip object
Exceptions : warns is not an experimental_chip ResultSet
Caller : General
Status : At Risk
=cut
sub get_ExperimentalChips{
my $self = shift;
if(! defined $self->{'experimental_chips'}){
my $ec_adaptor = $self->adaptor->db->get_ExperimentalChipAdaptor();
if($self->table_name() eq "experimental_chip"){
foreach my $ec_id(@{$self->table_ids()}){
#warn "Getting ec with id $ec_id";
push @{$self->{'experimental_chips'}}, $ec_adaptor->fetch_by_dbID($ec_id);
#should this be hashed on chip_channel_id?
}
}else{
#warn("Retrieving ExperimentalChips for a Channel ResultSet");
my %echips;
my $chan_adaptor = $self->adaptor->db->get_ChannelAdaptor();
foreach my $chan_id(@{$self->table_ids()}){
my $chan = $chan_adaptor->fetch_by_dbID($chan_id);
$echips{$chan->experimental_chip_id} ||= $ec_adaptor->fetch_by_dbID($chan->experimental_chip_id);
}
@{$self->{'experimental_chips'}} = values %echips;
}
}
return $self->{'experimental_chips'};
}
=head2 get_replicate_set_by_chip_channel_id
Arg[0] : int - chip_channel_id
Example : my $rep_set_name = $result_set->get_replicate_set_by_chip_channel_id($cc_id);
Description: Retrieves the replicate set name defined by the corresponding ExperimentalChip
Returntype : String - replicate set name
Exceptions :
Caller : General
Status : At Risk - implement for Channels?
=cut
sub get_replicate_set_by_chip_channel_id{
my ($self, $cc_id) = @_;
if( ! defined $self->{'_replicate_cache'}){
warn "Generating replicate cache!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!";
foreach my $ec (@{$self->get_ExperimentalChips()}){
$self->{'_replicate_cache'}{$self->get_chip_channel_id($ec->dbID())} = $ec->replicate();
}
}
#warn here of absent replicate info?
return (exists $self->{'_replicate_cache'}{$cc_id}) ? $self->{'_replicate_cache'}{$cc_id} : undef;
}
=head2 get_result_table
Example : my $result_table = $rset->get_result_table();
Description: Getter for the federated result table name for this ResultSet.
Returntype : String
Exceptions : None
Caller : General
Status : At Risk - extend to use bins
=cut
sub get_result_table{
my $self = shift;
#This method should be extended to use bins if we pass a range
my $table;
return 'result';
my @exp_ids = @{$self->get_experiment_ids()};
if($#exp_ids >0){
$table = 'result';
}
else{
#$table = 'experiment_'.$exp_ids[0].'_result';
$table ='result';
}
return $table;
}
=head2 display_label
Example : print $rset->display_label();
Description: Getter for the display_label attribute for this ResultSet.
This is more appropriate for teh predicted_features of the set.
Use the individual display_labels for each raw result set.
Returntype : str
Exceptions : None
Caller : General
Status : At Risk
=cut
sub display_label {
my $self = shift;
if(! $self->{'display_label'}){
#This should display some info about the chip set/duplicte set if there is more than one set of data for a feature_set!!!!!!!!!!!!!!!
#Some tomfoolery here to accomdate sets which we do not know the feature or cell type for.
#should we make cell_type and feature_type mandatory?
if(defined $self->feature_type()){
$self->{'display_label'} = $self->feature_type->name()." - ";
}else{
$self->{'display_label'} = "FEATURE TYPE NOT KNOWN - ";
}
if(defined $self->cell_type()){
$self->{'display_label'} .= ($self->cell_type()->display_label()) ? $self->cell_type->display_label() : $self->cell_type->name();
}else{
$self->{'display_label'} .= "CELL TYPE NOT KNOWN";
}
$self->{'display_label'} .= " Enriched Sites";
}
return $self->{'display_label'};
}
=head2 get_displayable_ResultFeatures_by_Slice
Arg[1] : Bio::EnsEMBL::Slice
Arg[2] : Boolean - with probe flag, will nest Probe object in ResultFeature
Example : my @results = @{$ResultSet->get_all_displayable_ResultFeatures_by_Slice($slice)};
Description: Gets all the displayable ResultFeatures for a given Slice.
Returntype : Arrayref of ResultFeatures
Exceptions : None
Caller : General
Status : At Risk
=cut
sub get_displayable_ResultFeatures_by_Slice{
my ($self, $slice, $with_probe) = @_;
return $self->get_ResultFeatures_by_Slice($slice, 'DISPLAYABLE', $with_probe);
}
=head2 get_ResultFeatures_by_Slice
Arg[1] : Bio::EnsEMBL::Slice
Arg[2] : string - Status name e.g. 'DISPLAYABLE'
Arg[3] : Boolean - with probe flag, will nest Probe object in ResultFeature
Example : my @rfs_with_rpobe = @{$ResultSet->get_all_ResultFeatures_by_Slice($slice, undef, 1)};
Description: Gets all the ResultFeatures for a given Slice.
Returntype : Arrayref of ResultFeatures
Exceptions : None
Caller : General
Status : At Risk
=cut
sub get_ResultFeatures_by_Slice{
my ($self, $slice, $status, $with_probe) = @_;
return $self->adaptor->fetch_ResultFeatures_by_Slice_ResultSet($slice, $self, $status, $with_probe);
}
=head2 log_label
Example : print $rset->log_label();
Description: Get a string of the unique key fields for logging purposes
Returntype : string
Exceptions : None
Caller : General
Status : At Risk
=cut
sub log_label {
my $self = shift;
my $label;
if(defined $self->feature_type()){
$label = $self->feature_type->name.":";
}else{
$label = "Unknown FeatureType:";
}
if(defined $self->cell_type()){
$label .= $self->cell_type->name;
}else{
$label .= "Uknown CellType";
}
return $self->name.":".$self->analysis->logic_name.":".$label;
}
1;