Bio::EnsEMBL::Compara::Production
HomologySet
Toolbar
Summary
Bio::EnsEMBL::Compara::Production::HomologySet
Package variables
No package variables defined.
Included modules
Time::HiRes qw ( time gettimeofday tv_interval )
Inherit
Synopsis
An abstract data class for holding an arbitrary collection of
Hhomology objects and providing set operations and cross-reference
operations to compare to another HomologySet object.
Description
A 'set' object of Homology objects. Uses Homology::homology_key to identify
unique homologies and Member::stable_id to identify unique genes.
Is used for comparing HomologySet objects with each other and building comparison
matrixes.
Not really a production object, but more an abstract data type for use by
post analysis scripts. Placed in Production since I could not think of a better location.
The design of this object essentially was within the homology_diff.pl script
but has now been formalized into a proper object design.
General use is like:
$homology_set1 = new Bio::EnsEMBL::Compara::Production::HomologySet;
$homology_set1->add(@{$homologyDBA->fetch_all_by_MethodLinkSpeciesSet($mlss1));
$homology_set2 = new Bio::EnsEMBL::Compara::Production::HomologySet;
$homology_set2->add(@{$homologyDBA->fetch_all_by_MethodLinkSpeciesSet($mlss2));
$crossref = $homology_set1->crossref_homologies_by_type($homology_set2);
$homology_set1->print_conversion_stats($homology_set2,$crossref);
Methods
add | No description | Code |
best_homology_for_gene | No description | Code |
clear | No description | Code |
count_for_type | No description | Code |
dealloc | No description | Code |
find_homology_like | No description | Code |
gene_set | No description | Code |
has_homology | No description | Code |
homologies_for_gene | No description | Code |
init | No description | Code |
list | No description | Code |
merge | No description | Code |
print_stats | No description | Code |
size | No description | Code |
subset_containing_genes | No description | Code |
types | No description | Code |
Methods description
None available.
Methods code
sub add
{ my $self = shift;
my @homology_list = @_;
foreach my $homology (@homology_list) {
next if(defined($self->{'homology_hash'}->{$homology->homology_key}));
my ($gene1, $gene2) = @{$homology->gene_list};
$self->{'homology_hash'}->{$homology->homology_key} = $homology;
$self->{'gene_set'}->add($gene1);
$self->{'gene_set'}->add($gene2);
my $description = $homology->description;
if (scalar @{$homology->method_link_species_set->species_set} == 1) {
my ($gdb) = @{$homology->method_link_species_set->species_set};
$description .= "_".$gdb->dbID;
}
$self->{'types'}->{$description}++;
$self->{'gene_to_homologies'}->{$gene1->stable_id} = []
unless(defined($self->{'gene_to_homologies'}->{$gene1->stable_id}));
$self->{'gene_to_homologies'}->{$gene2->stable_id} = []
unless(defined($self->{'gene_to_homologies'}->{$gene2->stable_id}));
push @{$self->{'gene_to_homologies'}->{$gene1->stable_id}}, $homology;
push @{$self->{'gene_to_homologies'}->{$gene2->stable_id}}, $homology;
}
return $self; } |
sub best_homology_for_gene
{ my $self = shift;
my $gene = shift;
my $ordered_types = shift;
my $best_homology = undef;
my $best_rank = undef;
foreach my $homology (@{$self->homologies_for_gene($gene)}) {
my $rank = $ordered_types->{$homology->description};
if(!defined($best_rank) or ($rank and ($rank<$best_rank))) {
$best_homology = $homology;
$best_rank = $rank;
}
}
return $best_homology;
}
} |
sub clear
{ my $self = shift;
$self->{'conversion_hash'} = {};
$self->{'gene_set'} = new Bio::EnsEMBL::Compara::Production::GeneSet;
$self->{'homology_hash'} = {};
$self->{'gene_to_homologies'} = {};
$self->{'types'} = {}; } |
sub count_for_type
{ my $self = shift;
my $type = shift;
my $count = $self->{'types'}->{$type};
$count=0 unless(defined($count));
return $count;
}
} |
sub dealloc
{ my $self = shift;
return $self->SUPER::dealloc; } |
sub find_homology_like
{ my $self = shift;
my $homology = shift;
return $self->{'homology_hash'}->{$homology->homology_key}; } |
sub gene_set
{ my $self = shift;
return $self->{'gene_set'};
}
} |
sub has_homology
{ my $self = shift;
my $homology = shift;
return 1 if(defined($self->{'homology_hash'}->{$homology->homology_key}));
return 0; } |
sub homologies_for_gene
{ my $self = shift;
my $gene = shift;
my $homologies = $self->{'gene_to_homologies'}->{$gene->stable_id};
return $homologies if($homologies);
return []; } |
sub init
{ my $self = shift;
$self->SUPER::init;
$self->clear;
return $self; } |
sub list
{ my $self = shift;
my @homologies = values(%{$self->{'homology_hash'}});
return\@ homologies; } |
sub merge
{ my $self = shift;
my $other_set = shift;
$self->add(@{$other_set->list});
return $self;
}
} |
sub print_stats
{ my $self = shift;
printf("%d unique genes\n", $self->gene_set->size);
printf("%d unique homologies\n", $self->size);
foreach my $type (@{$self->types}) {
printf("%10d : %s\n", $self->count_for_type($type), $type);
}
}
1; } |
sub size
{ my $self = shift;
return scalar(keys(%{$self->{'homology_hash'}})); } |
sub subset_containing_genes
{ my $self = shift;
my $gene_set = shift;
my $newset = new Bio::EnsEMBL::Compara::Production::HomologySet;
foreach my $homology (@{$self->list}) {
foreach my $gene (@{$homology->gene_list}) {
if($gene_set->includes($gene)) {
$newset->add($homology);
}
}
}
return $newset; } |
sub types
{ my $self = shift;
my @types = keys(%{$self->{'types'}});
return\@ types; } |
General documentation
The rest of the documentation details each of the object methods.
Internal methods are usually preceded with a _