Raw content of Bio::EnsEMBL::Compara::ConservationScore
#
# Ensembl module for Bio::EnsEMBL::Compara::ConservationScore
#
# Cared for by Kathryn Beal
#
# Copyright Ewan Birney
#
# You may distribute this module under the same terms as perl itself
# POD documentation - main docs before the code
=head1 NAME
Bio::EnsEMBL::Compara::ConservationScore - Stores conservation scores
=head1 SYNOPSIS
use Bio::EnsEMBL::Compara::ConservationScore;
my $conservation_score = new Bio::EnsEMBL::Compara::ConservationScore(
-genomic_align_block => $gab,
-window_size => $win_size,
-position => $pos,
-observed_score => $obs_scores,
-expected_score => $exp_scores,
-diff_score => $diff_scores);
SET VALUES
$conservation_score->genomic_align_block($gab);
$conservation_score->window_size(10);
$conservation_score->position(1);
$conservation_score->observed_score($observed_scores);
$conservation_score->expected_score($expected_scores);
$conservation_score->diff_score($diff_scores);
$conservation_score->y_axis_min(0);
$conservation_score->y_axis_max(100);
GET VALUES
$gab = $conservation_score->genomic_align_block;
$win_size = $conservation_score->window_size;
$pos = $conservation_score->position;
$obs_scores = $conservation_score->observed_score;
$exp_scores = $conservation_score->expected_score;
$diff_scores = $conservation_score->diff_score;
$y_axis_min = $conservation_score->y_axis_min;
$y_axis_max = $conservation_score->y_axis_max;
=head1 DESCRIPTION
Object for storing conservation scores. The scores are averaged over different
window sizes to speed up drawing over large regions. The scores are packed as
floats and stored in a string. The scores can be stored and retrieved in
either a packed or unpacked format. The unpacked format is as a space delimited
string eg ("0.123 0.456 0.789"). The packed format is a single precision float
(4 bytes). It is recommended to use the unpacked format.
=head1 AUTHOR - Kathryn Beal
This modules is part of the Ensembl project
Email kbeal@ebi.ac.uk
=head1 CONTACT
This modules is part of the EnsEMBL project ()
Questions can be posted to the ensembl-dev mailing list:
ensembl-dev@ebi.ac.uk
=head1 APPENDIX
The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _
=cut
# Let the code begin...
package Bio::EnsEMBL::Compara::ConservationScore;
use strict;
use Bio::EnsEMBL::Utils::Argument qw(rearrange);
use Bio::EnsEMBL::Utils::Exception qw(warning deprecate throw);
#store as 4 byte float
my $pack_size = 4;
my $pack_type = "f";
=head2 new (CONSTRUCTOR)
Arg [-ADAPTOR]
: Bio::EnsEMBL::Compara::DBSQL::ConservationScore $adaptor
(the adaptor for connecting to the database)
Arg [-GENOMIC_ALIGN_BLOCK] (opt)
: Bio::EnsEMBL::Compara::DBSQL::GenomicAlignBlock $genomic_align_block
(the Bio::EnsEMBL::Compara::DBSQL::GenomicAlignBlock of the multiple
alignment)
Arg [-GENOMIC_ALIGN_BLOCK_ID] (opt)
: int $genomic_align_block_id
(the database internal ID of the $genomic_align_block)
Arg [-WINDOW_SIZE] (opt)
: int $window_size
(window size used to average the scores over)
Arg [-POSITION] (opt)
: int $position
(position of the first score in alignment coordinates)
Arg [-SEQ_REGION_POS] (opt)
: int $seq_region_pos
(position of the first score in species coordinates)
Arg [-EXPECTED_SCORE]
: string $expected_score
(packed or unpacked string of expected scores)
Arg [-DIFF_SCORE]
: string $diff_score
(packed or unpacked string of the difference between the observed and
expected scores)
Arg [-PACKED] (opt)
: boolean $packed
(whether the scores are packed (1) or unpacked (0))
Arg [Y_AXIS_MIN] (opt)
: float $y_axis_min
(minimum score value used for display)
Arg [Y_AXIS_MAX] (opt)
: float $y_axis_max
(maximum score value used for display)
Example :
my $conservation_score = new Bio::EnsEMBL::Compara::ConservationScore(
-genomic_align_block => $gab,
-window_size => $win_size,
-position => $pos,
-expected_score => $exp_scores,
-diff_score => $diff_scores);
Description: Creates a new ConservationScore object
Returntype : Bio::EnsEMBL::Compara::ConservationScore
Exceptions : none
Caller : general
Status : At risk
=cut
sub new {
my($class, @args) = @_;
my $self = {};
bless $self,$class;
my ($adaptor, $genomic_align_block, $genomic_align_block_id,
$window_size, $position, $seq_region_pos,
$expected_score, $diff_score, $packed, $y_axis_min, $y_axis_max) =
rearrange([qw(
ADAPTOR GENOMIC_ALIGN_BLOCK GENOMIC_ALIGN_BLOCK_ID
WINDOW_SIZE POSITION SEQ_REGION_POS
EXPECTED_SCORE DIFF_SCORE PACKED Y_AXIS_MIN
Y_AXIS_MAX)],
@args);
$self->adaptor($adaptor) if (defined($adaptor));
$self->genomic_align_block($genomic_align_block) if (defined($genomic_align_block));
$self->genomic_align_block_id($genomic_align_block_id) if (defined($genomic_align_block_id));
$self->window_size($window_size) if (defined($window_size));
$self->position($position) if (defined($position));
$self->seq_region_pos($seq_region_pos) if (defined($seq_region_pos));
$self->expected_score($expected_score) if (defined($expected_score));
$self->diff_score($diff_score) if (defined($diff_score));
$self->y_axis_min($y_axis_min) if (defined($y_axis_min));
$self->y_axis_max($y_axis_max) if (defined($y_axis_max));
if (defined($packed)) {
$self->packed($packed);
} else {
$self->packed(0);
}
return $self;
}
=head2 new_fast
Arg [1] : hash reference $hashref
Example : none
Description: This is an ultra fast constructor which requires knowledge of
the objects internals to be used.
Returntype :
Exceptions : none
Caller :
Status : At risk
=cut
sub new_fast {
my ($class, $hashref) = @_;
return bless $hashref, $class;
}
=head2 adaptor
Arg [1] : Bio::EnsEMBL::DBSQL::ConservationScoreAdaptor $adaptor
Example : $conservation_score->adaptor($adaptor);
Description: Getter/Setter for the adaptor this object used for database
interaction
Returntype : Bio::EnsEMBL::DBSQL::ConservationScoreAdaptor object
Exceptions : thrown if the argument is not a
Bio::EnsEMBL::DBSQL::ConservationScoreAdaptor object
Caller : general
Status : At risk
=cut
sub adaptor {
my ( $self, $adaptor ) = @_;
if (defined($adaptor)) {
throw("$adaptor is not a Bio::EnsEMBL::Compara::DBSQL::ConservationScoreAdaptor object")
unless ($adaptor->isa("Bio::EnsEMBL::Compara::DBSQL::ConservationScoreAdaptor"));
$self->{'adaptor'} = $adaptor;
}
return $self->{'adaptor'};
}
=head2 genomic_align_block
Arg [1] : Bio::EnsEMBL::Compara::GenomicAlignBlock $genomic_align_block
Example : my $genomic_align_block = $conservation_score->genomic_align_block();
Example : $conservation_score->genomic_align_block($genomic_align_block);
Description: Getter/Setter for the genomic_align_block attribute
Returntype : Bio::EnsEMBL::Compara::GenomicAlignBlock object. If no
argument is given, the genomic_align_block is not defined but
if both the genomic_align_block_id and the adaptor are, it tries
to fetch the data using the genomic_align_block_id.
Exceptions : thrown if $genomic_align_block is not a
Bio::EnsEMBL::Compara::GenomicAlignBlock object or if
$genomic_align_block does not match a previously defined
genomic_align_block_id
Warning : warns if getting data from other sources fails.
Caller : general
Status : At risk
=cut
sub genomic_align_block {
my ($self, $genomic_align_block) = @_;
if (defined($genomic_align_block)) {
throw("$genomic_align_block is not a Bio::EnsEMBL::Compara::GenomicAlignBlock object")
unless ($genomic_align_block->isa("Bio::EnsEMBL::Compara::GenomicAlignBlock"));
if ($self->{'genomic_align_block_id'}) {
throw("dbID of genomic_align_block object does not match previously defined".
" genomic_align_block_id. If you want to override a".
" Bio::EnsEMBL::Compara::ConservationScore object, you can reset the ".
"genomic_align_block_id using \$conservation_score->genomic_align_block_id(0)")
if ($self->{'genomic_align_block'}->dbID != $self->{'genomic_align_block_id'});
}
$self->{'genomic_align_block'} = $genomic_align_block;
} elsif (!defined($self->{'genomic_align_block'})) {
# Try to get the genomic_align_block from other sources...
if (defined($self->genomic_align_block_id) and defined($self->{'adaptor'})) {
# ...from the genomic_align_block_id. Uses genomic_align_block_id function
# and not the attribute in the clause because the attribute can be retrieved from other
# sources if it has not been set before.
my $genomic_align_block_adaptor = $self->{'adaptor'}->db->get_GenomicAlignBlockAdaptor;
$self->{'genomic_align_block'} = $genomic_align_block_adaptor->fetch_by_dbID(
$self->{'genomic_align_block_id'});
} else {
warning("Fail to get data from other sources in Bio::EnsEMBL::Compara::GenomicAlign->genomic_align_block".
" You either have to specify more information (see perldoc for".
" Bio::EnsEMBL::Compara::GenomicAlign) or to set it up directly");
}
}
return $self->{'genomic_align_block'};
}
=head2 genomic_align_block_id
Arg [1] : (opt) integer genomic_align_block_id
Example : my $genomic_align_block_id = $conservation_score->genomic_align_block_id();
Example : $conservation_score->genomic_align_block_id($genomic_align_block_id);
Description: Getter/Setter for the genomic_align_block_id attribute. If no
argument is given and the genomic_align_block_id is not defined,
it tries to get the data from other sources like the
corresponding Bio::EnsEMBL::Compara::GenomicAlignBlock object or
the database using the dbID of the Bio::EnsEMBL::Compara::GenomicAlign object.
Returntype : integer
Exceptions : thrown if $genomic_align_block_id does not match a previously
defined genomic_align_block
Warning : warns if getting data from other sources fails.
Caller : general
Status : At risk
=cut
sub genomic_align_block_id {
my ($self, $genomic_align_block_id) = @_;
if (defined($genomic_align_block_id)) {
$self->{'genomic_align_block_id'} = ($genomic_align_block_id or undef);
if (defined($self->{'genomic_align_block'}) and $self->{'genomic_align_block_id'}) {
warning("Defining both genomic_align_block_id and genomic_align_block");
throw("genomic_align_block_id does not match previously defined genomic_align_block object")
if ($self->{'genomic_align_block'} and
$self->{'genomic_align_block'}->dbID != $self->{'genomic_align_block_id'});
}
} elsif (!($self->{'genomic_align_block_id'})) {
# Try to get the ID from other sources...
if (defined($self->{'genomic_align_block'}) and defined($self->{'genomic_align_block'}->dbID)) {
# ...from the corresponding Bio::EnsEMBL::Compara::GenomicAlignBlock object
$self->{'genomic_align_block_id'} = $self->{'genomic_align_block'}->dbID;
} elsif (defined($self->{'adaptor'}) and defined($self->{'dbID'})) {
# ...from the database using the dbID of the Bio::EnsEMBL::Compara::GenomicAlign object
$self->adaptor->retrieve_all_direct_attributes($self);
} else {
warning("Fail to get data from other sources in Bio::EnsEMBL::Compara::GenomicAlign->genomic_align_block_id".
" You either have to specify more information (see perldoc for".
" Bio::EnsEMBL::Compara::GenomicAlign) or to set it up directly");
}
}
return $self->{'genomic_align_block_id'};
}
=head2 window_size
Arg [1] : (opt) integer window_size
Example : my $window_size = $conservation_score->window_size();
Example : $conservation_score->window_size(1);
Description: Getter/Setter for the window_size of this conservation score
Returntype : integer, Returns 1 if value not defined
Exceptions : none
Caller : general
Status : At risk
=cut
sub window_size {
my ($self, $window_size) = @_;
if(defined $window_size) {
$self->{'window_size'} = $window_size;
}
$self->{'window_size'}='1' unless(defined($self->{'window_size'}));
return $self->{'window_size'};
}
=head2 position
Arg [1] : (opt) integer
Example : $conservation_score->position(1);
Description: Getter/Setter for the alignment position of the first score
Returntype : integer. Return 1 if value not defined
Exceptions : none
Caller : general
Status : At risk
=cut
sub position {
my ($self, $position) = @_;
if(defined $position) {
$self->{'position'} = $position;
}
$self->{'position'}='1' unless(defined($self->{'position'}));
return $self->{'position'};
}
=head2 start
Example : $conservation_score->start();
Description: Wrapper round position call
Returntype : integer. Return 1 if value not defined
Exceptions : none
Caller : general
Status : At risk
=cut
sub start {
my $self = shift;
return $self->position;
}
=head2 end
Example : $conservation_score->end();
Description: wrapper around position
Returntype : integer. Return 1 if value not defined
Exceptions : none
Caller : general
Status : At risk
=cut
sub end {
my $self = shift;
return $self->position;
}
=head2 seq_region_pos
Arg [1] : (opt) integer
Example : $conservation_score->seq_region_pos(1);
Description: Getter/Setter for the species position of the first score
Returntype : integer.
Exceptions : none
Caller : general
Status : At risk
=cut
sub seq_region_pos {
my ($self, $seq_region_pos) = @_;
if(defined $seq_region_pos) {
$self->{'seq_region_pos'} = $seq_region_pos;
}
return $self->{'seq_region_pos'};
}
=head2 observed_score
Example : my $obs_score = $conservation_score->observed_score();
Description: Getter for the observed score string (no setter functionality)
Returntype : double
Exceptions : none
Caller : general
Status : At risk
=cut
sub observed_score {
my ($self) = @_;
return ($self->expected_score - $self->diff_score);
}
=head2 expected_score
Arg [1] : (opt) string of expected scores (can be either packed or space
delimited)
Example : $conservation_score->expected_score("3.85 2.54 1.56");
Example : my $exp_score = $conservation_score->expected_score();
Description: Getter/Setter for the expected score string
Returntype : string (either packed or space delimited)
Exceptions : none
Caller : general
Status : At risk
=cut
sub expected_score {
my ($self, $expected_score) = @_;
if (defined $expected_score) {
$self->{'expected_score'} = $expected_score;
}
return $self->{'expected_score'};
}
=head2 diff_score
Arg [1] : (opt) string of difference scores (expected - observed)
(can be either packed or space delimited)
Example : $conservation_score->diff_score("1.85 -2.54 1.56");
Example : my $diff_score = $conservation_score->diff_score();
Description: Getter/Setter for the difference score string
Returntype : string (either packed or space delimited)
Exceptions : none
Caller : general
Status : At risk
=cut
sub diff_score {
my ($self, $diff_score) = @_;
if (defined $diff_score) {
$self->{'diff_score'} = $diff_score;
}
return $self->{'diff_score'};
}
=head2 score
Arg [1] : (opt) string of difference scores (expected - observed)
(can be either packed or space delimited)
Example : $conservation_score->diff_score("1.85 -2.54 1.56");
Example : my $diff_score = $conservation_score->diff_score();
Description: alias for diff score
Returntype : string (either packed or space delimited)
Exceptions : none
Caller : general
Status : At risk
=cut
sub score {
my ($self, $diff_score) = @_;
return $self->diff_score($diff_score);
}
=head2 y_axis_min
Arg [1] : (opt) float
Example : $conservation_score->y_axis_min(-0.5);
Example : $y_axis_min = $conservation_score->y_axis_min;
Description: Getter/Setter for the minimum score
Returntype : float
Exceptions : none
Caller : general
Status : At risk
=cut
sub y_axis_min {
my ($self, $y_axis_min) = @_;
if (defined $y_axis_min) {
$self->{'y_axis_min'} = $y_axis_min;
}
return $self->{'y_axis_min'};
}
=head2 y_axis_max
Arg [1] : (opt) float
Example : $conservation_score->y_axis_max(2.45);
Example : $y_axis_max = $conservation_score->y_axis_min;
Description: Getter/Setter for the maximum score
Returntype : float
Exceptions : none
Caller : general
Status : At risk
=cut
sub y_axis_max {
my ($self, $y_axis_max) = @_;
if (defined $y_axis_max) {
$self->{'y_axis_max'} = $y_axis_max;
}
return $self->{'y_axis_max'};
}
=head2 packed
Arg [1] : (opt) boolean
Example : $conservation_score->packed(1);
Example : $packed = $conservation_score->packed;
Description: Getter/Setter for the whether the scores are packed or space
delimited
Returntype : boolean
Exceptions : none
Caller : general
Status : At risk
=cut
sub packed {
my ($self, $packed) = @_;
if($packed) {
$self->{'packed'} = $packed;
}
return $self->{'packed'};
}
=head2 reverse
Example : $conservation_score->reverse;
Description: reverse scores and position in the ConservationScore object
Returntype : none
Exceptions : none
Caller : general
Status : At risk
=cut
sub reverse {
my ($self, $genomic_align_block_length) = @_;
my $num_scores = 0;
return if (!defined($self->score));
if ($self->packed) {
$num_scores = length($self->score)/$pack_size;
} else {
my @scores = split ' ', $self->score;
$num_scores = scalar(@scores);
}
#swap position orientation and reverse position in alignment
my $end = $self->position + (($num_scores - 1) * $self->window_size);
#10.10.06 +1 so position starts at 1 not 0
#$self->position($self->genomic_align_block->length - $end);
if (!defined($genomic_align_block_length)) {
$genomic_align_block_length = $self->genomic_align_block->length;
}
$self->position($genomic_align_block_length - $end + 1);
#swap position orientation and reverse seq_region_pos in alignment
if (defined $self->seq_region_pos) {
$end = $self->seq_region_pos + (($num_scores - 1) * $self->window_size);
#10.10.06 +1 so position starts at 1 not 0
#$self->seq_region_pos($self->genomic_align_block->length - $end);
$self->seq_region_pos($self->genomic_align_block->length - $end + 1);
}
#reverse score strings
$self->expected_score(_reverse_score($self->expected_score, $num_scores, $self->packed));
$self->diff_score(_reverse_score($self->diff_score, $num_scores, $self->packed));
}
=head2 _reverse_score
Arg [1] : string $score_str (string of scores)
Arg [2] : int $num_scores (number of scores in the string)
Arg [3] : boolean $packed (whether the scores are packed or not)
Example : _reverse_score($self->expected_score, $num_scores, $self->packed)
Description: internal method used by reverse to reverse the score strings
Returntype : string
Exceptions : none
Caller : general
Status : At risk
=cut
sub _reverse_score {
my ($score_str, $num_scores, $packed) = @_;
my $rev_str;
if ($packed) {
for (my $i = $num_scores-1; $i >=0; $i--) {
my $value = substr $score_str, $i*$pack_size, $pack_size;
$rev_str .= $value;
}
} else {
my @scores = split ' ', $score_str;
my $rev_str;
for (my $i = $num_scores-1; $i >= 0; $i--) {
$rev_str .= $scores[$i];
}
}
return $rev_str;
}
=head2 _print
Example : $conservation_score->_print;
Description: print the contents of the ConservationScore object
Returntype : none
Exceptions : none
Caller : general
Status : At risk
=cut
sub _print {
my ($self, $FILEH) = @_;
# my $verbose = verbose;
# verbose(0);
my $exp_score = 0;
if ($self->expected_score) {
$exp_score = $self->expected_score;
}
$FILEH ||= \*STDOUT;
print $FILEH
"Bio::EnsEMBL::Compara::GenomicAlignBlock object ($self)
genomic_align_block = " . ($self->genomic_align_block) . "
genomic_align_block_id = " . ($self->genomic_align_block_id) . "
window_size = " . ($self->window_size) . "
position = " . ($self->position) . "
seq_region_pos = " . ($self->seq_region_pos) . "
diff_score = " . ($self->diff_score) . "
expected_score = $exp_score \n";
}
1;