Bio::EnsEMBL::Compara::Production HomologySet
SummaryIncluded librariesPackage variablesSynopsisDescriptionGeneral documentationMethods
Toolbar
WebCvsRaw content
Summary
Bio::EnsEMBL::Compara::Production::HomologySet
Package variables
No package variables defined.
Included modules
Bio::EnsEMBL::Compara::Graph::CGObject
Bio::EnsEMBL::Compara::Homology
Bio::EnsEMBL::Compara::Production::GeneSet
Time::HiRes qw ( time gettimeofday tv_interval )
Inherit
Bio::EnsEMBL::Compara::Graph::CGObject
Synopsis
An abstract data class for holding an arbitrary collection of
Hhomology objects and providing set operations and cross-reference
operations to compare to another HomologySet object.
Description
A 'set' object of Homology objects. Uses Homology::homology_key to identify
unique homologies and Member::stable_id to identify unique genes.
Is used for comparing HomologySet objects with each other and building comparison
matrixes.
Not really a production object, but more an abstract data type for use by
post analysis scripts. Placed in Production since I could not think of a better location.
The design of this object essentially was within the homology_diff.pl script
but has now been formalized into a proper object design.
General use is like:
$homology_set1 = new Bio::EnsEMBL::Compara::Production::HomologySet;
$homology_set1->add(@{$homologyDBA->fetch_all_by_MethodLinkSpeciesSet($mlss1));

$homology_set2 = new Bio::EnsEMBL::Compara::Production::HomologySet;
$homology_set2->add(@{$homologyDBA->fetch_all_by_MethodLinkSpeciesSet($mlss2));
  $crossref = $homology_set1->crossref_homologies_by_type($homology_set2);
$homology_set1->print_conversion_stats($homology_set2,$crossref);
Methods
add
No description
Code
best_homology_for_gene
No description
Code
clear
No description
Code
count_for_type
No description
Code
dealloc
No description
Code
find_homology_like
No description
Code
gene_set
No description
Code
has_homology
No description
Code
homologies_for_gene
No description
Code
init
No description
Code
list
No description
Code
merge
No description
Code
print_stats
No description
Code
size
No description
Code
subset_containing_genes
No description
Code
types
No description
Code
Methods description
None available.
Methods code
adddescriptionprevnextTop
sub add {
  my $self = shift;
  my @homology_list = @_; 
  
  foreach my $homology (@homology_list) {

    next if(defined($self->{'homology_hash'}->{$homology->homology_key}));
    #printf("HomologySet add: %s\n", $homology->homology_key);
my ($gene1, $gene2) = @{$homology->gene_list}; $self->{'homology_hash'}->{$homology->homology_key} = $homology; $self->{'gene_set'}->add($gene1); $self->{'gene_set'}->add($gene2); my $description = $homology->description; if (scalar @{$homology->method_link_species_set->species_set} == 1) { my ($gdb) = @{$homology->method_link_species_set->species_set}; $description .= "_".$gdb->dbID; } $self->{'types'}->{$description}++; $self->{'gene_to_homologies'}->{$gene1->stable_id} = [] unless(defined($self->{'gene_to_homologies'}->{$gene1->stable_id})); $self->{'gene_to_homologies'}->{$gene2->stable_id} = [] unless(defined($self->{'gene_to_homologies'}->{$gene2->stable_id})); push @{$self->{'gene_to_homologies'}->{$gene1->stable_id}}, $homology; push @{$self->{'gene_to_homologies'}->{$gene2->stable_id}}, $homology; } return $self;
}
best_homology_for_genedescriptionprevnextTop
sub best_homology_for_gene {
  my $self = shift;
  my $gene = shift;
  my $ordered_types = shift; #hashref type=>rank
my $best_homology = undef; my $best_rank = undef; #$gene->print_member;
foreach my $homology (@{$self->homologies_for_gene($gene)}) { #$homology->print_homology;
my $rank = $ordered_types->{$homology->description}; if(!defined($best_rank) or ($rank and ($rank<$best_rank))) { $best_homology = $homology; $best_rank = $rank; } } #if($best_homology) { printf("BEST: "); $best_homology->print_homology; }
return $best_homology; } ### gene ###
}
cleardescriptionprevnextTop
sub clear {
  my $self = shift;
    
  $self->{'conversion_hash'} = {};
  $self->{'gene_set'} = new Bio::EnsEMBL::Compara::Production::GeneSet;
  $self->{'homology_hash'} = {};
  $self->{'gene_to_homologies'} = {};
  $self->{'types'} = {};
}
count_for_typedescriptionprevnextTop
sub count_for_type {
  my $self = shift;
  my $type = shift;
  my $count = $self->{'types'}->{$type};
  $count=0 unless(defined($count));
  return $count;
}

### homology ###
}
deallocdescriptionprevnextTop
sub dealloc {
  my $self = shift;
  #$self->unlink_all_neighbors;
return $self->SUPER::dealloc;
}
find_homology_likedescriptionprevnextTop
sub find_homology_like {
  my $self = shift;
  my $homology = shift;
  return $self->{'homology_hash'}->{$homology->homology_key};
}
gene_setdescriptionprevnextTop
sub gene_set {
  my $self = shift;
  return $self->{'gene_set'};
}

### debug printing ###
}
has_homologydescriptionprevnextTop
sub has_homology {
  my $self = shift;
  my $homology = shift;
  return 1 if(defined($self->{'homology_hash'}->{$homology->homology_key}));
  return 0;
}
homologies_for_genedescriptionprevnextTop
sub homologies_for_gene {
  my $self = shift;
  my $gene = shift;
  my $homologies = $self->{'gene_to_homologies'}->{$gene->stable_id};
  return $homologies if($homologies);
  return [];
}
initdescriptionprevnextTop
sub init {
  my $self = shift;
   $self->SUPER::init;
   $self->clear;
  return $self;
}
listdescriptionprevnextTop
sub list {
  my $self = shift;
  my @homologies = values(%{$self->{'homology_hash'}});
  return\@ homologies;
}
mergedescriptionprevnextTop
sub merge {
  my $self = shift;
  my $other_set = shift;
  
  $self->add(@{$other_set->list});
  return $self;
}


### homology types ie description ###
}
print_statsdescriptionprevnextTop
sub print_stats {
  my $self = shift;
  
  printf("%d unique genes\n", $self->gene_set->size);
  printf("%d unique homologies\n", $self->size);
  foreach my $type (@{$self->types}) {
    printf("%10d : %s\n", $self->count_for_type($type), $type);
  }
}



1;
}
sizedescriptionprevnextTop
sub size {
  my $self = shift;
  return scalar(keys(%{$self->{'homology_hash'}}));
}
subset_containing_genesdescriptionprevnextTop
sub subset_containing_genes {
  my $self = shift;
  my $gene_set = shift;
  my $newset = new Bio::EnsEMBL::Compara::Production::HomologySet;
  foreach my $homology (@{$self->list}) {
    foreach my $gene (@{$homology->gene_list}) {
      if($gene_set->includes($gene)) {
        $newset->add($homology);
      }
    }
  }
  return $newset;
}
typesdescriptionprevnextTop
sub types {
  my $self = shift;
  my @types = keys(%{$self->{'types'}});
  return\@ types;
}
General documentation
CONTACTTop
  Contact Jessica Severin on module implemetation/design detail: jessica@ebi.ac.uk
Contact Abel Ureta-Vidal on EnsEMBL/Compara: abel@ebi.ac.uk
Contact Ewan Birney on EnsEMBL in general: birney@sanger.ac.uk
APPENDIXTop
The rest of the documentation details each of the object methods.
Internal methods are usually preceded with a _