Raw content of Bio::EnsEMBL::Compara::Production::GeneSet # # You may distribute this module under the same terms as perl itself # # POD documentation - main docs before the code =pod =head1 NAME Bio::EnsEMBL::Compara::Production::GeneSet =cut =head1 SYNOPSIS An abstract data class for holding an arbitrary collection of (ENSEMBLGENE)Member objects and providing set operations and cross-reference operations to compare to another GeneSet object. Also used by HomologySet. =cut =head1 DESCRIPTION A 'set' object of Gene objects. Uses Member::stable_id to identify unique genes. Is used for comparing GeneSet objects with each other and building comparison matrixes. Not really a production object, but more an abstract data type for use by post analysis scripts. Placed in Production since I could not think of a better location. The design of this object essentially was within the homology_diff.pl script but has now been formalized into a proper object design. =cut =head1 CONTACT Contact Jessica Severin on module implemetation/design detail: jessica@ebi.ac.uk Contact Abel Ureta-Vidal on EnsEMBL/Compara: abel@ebi.ac.uk Contact Ewan Birney on EnsEMBL in general: birney@sanger.ac.uk =cut =head1 APPENDIX The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _ =cut package Bio::EnsEMBL::Compara::Production::GeneSet; use strict; use Bio::EnsEMBL::Compara::Member; use Bio::EnsEMBL::Compara::Graph::CGObject; our @ISA = qw(Bio::EnsEMBL::Compara::Graph::CGObject); sub init { my $self = shift; $self->SUPER::init; $self->clear; return $self; } sub dealloc { my $self = shift; return $self->SUPER::dealloc; } sub clear { my $self = shift; $self->{'gene_hash'} = {}; } sub add { my $self = shift; my @gene_list = @_; foreach my $gene (@gene_list) { next if(defined($self->{'gene_hash'}->{$gene->stable_id})); $self->{'gene_hash'}->{$gene->stable_id} = $gene; } return $self; } sub merge { my $self = shift; my $other_set = shift; $self->add(@{$other_set->list}); return $self; } ### gene ### sub size { my $self = shift; return scalar(@{$self->list}); } sub list { my $self = shift; my @genes = values(%{$self->{'gene_hash'}}); return \@genes; } sub includes { my $self = shift; my $gene = shift; return 1 if(defined($self->{'gene_hash'}->{$gene->stable_id})); return 0; } sub find_gene_like { my $self = shift; my $gene = shift; return $self->{'gene_hash'}->{$gene->stable_id}; } ### debug printing ### sub print_stats { my $self = shift; printf("%d unique genes\n", $self->size); } sub hashref_by_genome { my $self = shift; my %types; foreach my $gene (@{$self->list}) { unless(defined($types{$gene->genome_db_id})) { $types{$gene->genome_db_id} = new Bio::EnsEMBL::Compara::Production::GeneSet; } $types{$gene->genome_db_id}->add($gene); } return \%types; } ############################################ # # set theory operations # ############################################ sub relative_complement { my $self = shift; my $other_set = shift; #genes in other_set that are not in my set my $new_set = new Bio::EnsEMBL::Compara::Production::GeneSet; foreach my $gene (@{$other_set->list}) { unless($self->includes($gene)) { $new_set->add($gene); } } return $new_set; } sub intersection { my $self = shift; my $other_set = shift; my $new_set = new Bio::EnsEMBL::Compara::Production::GeneSet; foreach my $gene (@{$self->list}) { if($other_set->includes($gene)) { $new_set->add($gene); } } return $new_set; } 1;