Raw content of Bio::EnsEMBL::Compara::Production::HomologySet
#
# You may distribute this module under the same terms as perl itself
#
# POD documentation - main docs before the code
=pod
=head1 NAME
Bio::EnsEMBL::Compara::Production::HomologySet
=cut
=head1 SYNOPSIS
An abstract data class for holding an arbitrary collection of
Hhomology objects and providing set operations and cross-reference
operations to compare to another HomologySet object.
=cut
=head1 DESCRIPTION
A 'set' object of Homology objects. Uses Homology::homology_key to identify
unique homologies and Member::stable_id to identify unique genes.
Is used for comparing HomologySet objects with each other and building comparison
matrixes.
Not really a production object, but more an abstract data type for use by
post analysis scripts. Placed in Production since I could not think of a better location.
The design of this object essentially was within the homology_diff.pl script
but has now been formalized into a proper object design.
General use is like:
$homology_set1 = new Bio::EnsEMBL::Compara::Production::HomologySet;
$homology_set1->add(@{$homologyDBA->fetch_all_by_MethodLinkSpeciesSet($mlss1));
$homology_set2 = new Bio::EnsEMBL::Compara::Production::HomologySet;
$homology_set2->add(@{$homologyDBA->fetch_all_by_MethodLinkSpeciesSet($mlss2));
$crossref = $homology_set1->crossref_homologies_by_type($homology_set2);
$homology_set1->print_conversion_stats($homology_set2,$crossref);
=cut
=head1 CONTACT
Contact Jessica Severin on module implemetation/design detail: jessica@ebi.ac.uk
Contact Abel Ureta-Vidal on EnsEMBL/Compara: abel@ebi.ac.uk
Contact Ewan Birney on EnsEMBL in general: birney@sanger.ac.uk
=cut
=head1 APPENDIX
The rest of the documentation details each of the object methods.
Internal methods are usually preceded with a _
=cut
package Bio::EnsEMBL::Compara::Production::HomologySet;
use strict;
use Bio::EnsEMBL::Compara::Production::GeneSet;
use Bio::EnsEMBL::Compara::Homology;
use Time::HiRes qw(time gettimeofday tv_interval);
use Bio::EnsEMBL::Compara::Graph::CGObject;
our @ISA = qw(Bio::EnsEMBL::Compara::Graph::CGObject);
sub init {
my $self = shift;
$self->SUPER::init;
$self->clear;
return $self;
}
sub dealloc {
my $self = shift;
#$self->unlink_all_neighbors;
return $self->SUPER::dealloc;
}
sub clear {
my $self = shift;
$self->{'conversion_hash'} = {};
$self->{'gene_set'} = new Bio::EnsEMBL::Compara::Production::GeneSet;
$self->{'homology_hash'} = {};
$self->{'gene_to_homologies'} = {};
$self->{'types'} = {};
}
sub add {
my $self = shift;
my @homology_list = @_;
foreach my $homology (@homology_list) {
next if(defined($self->{'homology_hash'}->{$homology->homology_key}));
#printf("HomologySet add: %s\n", $homology->homology_key);
my ($gene1, $gene2) = @{$homology->gene_list};
$self->{'homology_hash'}->{$homology->homology_key} = $homology;
$self->{'gene_set'}->add($gene1);
$self->{'gene_set'}->add($gene2);
my $description = $homology->description;
if (scalar @{$homology->method_link_species_set->species_set} == 1) {
my ($gdb) = @{$homology->method_link_species_set->species_set};
$description .= "_".$gdb->dbID;
}
$self->{'types'}->{$description}++;
$self->{'gene_to_homologies'}->{$gene1->stable_id} = []
unless(defined($self->{'gene_to_homologies'}->{$gene1->stable_id}));
$self->{'gene_to_homologies'}->{$gene2->stable_id} = []
unless(defined($self->{'gene_to_homologies'}->{$gene2->stable_id}));
push @{$self->{'gene_to_homologies'}->{$gene1->stable_id}}, $homology;
push @{$self->{'gene_to_homologies'}->{$gene2->stable_id}}, $homology;
}
return $self;
}
sub merge {
my $self = shift;
my $other_set = shift;
$self->add(@{$other_set->list});
return $self;
}
### homology types ie description ###
sub types {
my $self = shift;
my @types = keys(%{$self->{'types'}});
return \@types;
}
sub count_for_type {
my $self = shift;
my $type = shift;
my $count = $self->{'types'}->{$type};
$count=0 unless(defined($count));
return $count;
}
### homology ###
sub size {
my $self = shift;
return scalar(keys(%{$self->{'homology_hash'}}));
}
sub list {
my $self = shift;
my @homologies = values(%{$self->{'homology_hash'}});
return \@homologies;
}
sub has_homology {
my $self = shift;
my $homology = shift;
return 1 if(defined($self->{'homology_hash'}->{$homology->homology_key}));
return 0;
}
sub find_homology_like {
my $self = shift;
my $homology = shift;
return $self->{'homology_hash'}->{$homology->homology_key};
}
sub subset_containing_genes {
my $self = shift;
my $gene_set = shift;
my $newset = new Bio::EnsEMBL::Compara::Production::HomologySet;
foreach my $homology (@{$self->list}) {
foreach my $gene (@{$homology->gene_list}) {
if($gene_set->includes($gene)) {
$newset->add($homology);
}
}
}
return $newset;
}
sub homologies_for_gene {
my $self = shift;
my $gene = shift;
my $homologies = $self->{'gene_to_homologies'}->{$gene->stable_id};
return $homologies if($homologies);
return [];
}
sub best_homology_for_gene {
my $self = shift;
my $gene = shift;
my $ordered_types = shift; #hashref type=>rank
my $best_homology = undef;
my $best_rank = undef;
#$gene->print_member;
foreach my $homology (@{$self->homologies_for_gene($gene)}) {
#$homology->print_homology;
my $rank = $ordered_types->{$homology->description};
if(!defined($best_rank) or ($rank and ($rank<$best_rank))) {
$best_homology = $homology;
$best_rank = $rank;
}
}
#if($best_homology) { printf("BEST: "); $best_homology->print_homology; }
return $best_homology;
}
### gene ###
sub gene_set {
my $self = shift;
return $self->{'gene_set'};
}
### debug printing ###
sub print_stats {
my $self = shift;
printf("%d unique genes\n", $self->gene_set->size);
printf("%d unique homologies\n", $self->size);
foreach my $type (@{$self->types}) {
printf("%10d : %s\n", $self->count_for_type($type), $type);
}
}
1;