Bio::EnsEMBL::Analysis::Tools::Algorithms
GeneCluster
Toolbar
Summary
GeneCluster
Package variables
No package variables defined.
Included modules
Inherit
Synopsis
Description
This object holds one or more genes which has been clustered according to
comparison criteria external to this class (for instance, in the
methods compare and _compare_Genes methods of the class GeneComparison).
Each GeneCluster object holds the IDs of the genes clustered and the beginning and end coordinates
of each one (taken from the start and end coordinates of the first and last exon in the correspondig
get_all_Exons array)
Methods
Methods description
function to get the end position of a gene - it reads the gene object and it returns the end position of the last exon |
function to get the start position of a gene - it reads the gene object and it returns the start position of the first exon |
internal function that returns the length of the translateable exons |
it returns the number of genes in the GeneCluster object |
it returns the array of genes in the GeneCluster object |
Name : $self->get_TranscriptCluster() Arg[0] : Bio::EnsEMBL::Analysis::Tools::Algorithms::TranscriptCluster Function : gets a GeneCluster and converts it by building a TranscriptCluster, than clusters the exons of all Transcripts and returns an array-ref to Bio::EnsEMBL::ExonCluster-objects Returnval : Aref of Bio::EnsEMBL::Analysis::Tools::Algorithms::TranscriptCluster objects |
Name : $self->get_coding_exon_clustering_from_gene_cluster() Arg[0] : Bio::EnsEMBL::Analysis::Tools::Algorithms::GeneCluster; Function : gets a GeneCluster and converts it by building a TranscriptCluster, using only coding exons for the clustering. It then clusters the coding exons of all Transcripts and returns an array-ref to Bio::EnsEMBL::ExonCluster-objects Returnval : Aref of Bio::EnsEMBL::Analysis::Tools::Algorithms::ExonCluster objects |
Name : $self->get_exon_clustering_from_gene_cluster() Arg[0] : Bio::EnsEMBL::Analysis::Tools::Algorithms::GeneCluster; Function : gets a GeneCluster and converts it by building a TranscriptCluster, than clusters the exons of all Transcripts and returns an array-ref to Bio::EnsEMBL::ExonCluster-objects Returnval : Aref of Bio::EnsEMBL::Analysis::Tools::Algorithms::ExonCluster objects |
new() initializes the attributes: $self->{'_benchmark_types'} $self->{'_prediction_types'} $self->{'_benchmark_genes'} $self->{'_prediction_genes'} |
function to include one or more genes in the cluster. Useful when creating a cluster. It takes as argument an array of genes, it returns nothing. |
it returns a string containing the information about the genes contained in the GeneCluster object |
Methods code
sub _get_end
{ my ($self,$gene) = @_;
my @exons = @{ $gene->get_all_Exons };
my $end;
if ($exons[0]->strand == 1) {
@exons = sort {$a->start <=> $b->start} @exons;
$end = $exons[$#exons]->end;
} else {
@exons = sort {$b->start <=> $a->start} @exons; $end = $exons[$#exons]->start; } return $end; } |
sub _get_start
{ my ($self,$gene) = @_;
my @exons = @{ $gene->get_all_Exons };
my $st;
if ($exons[0]->strand == 1) {
@exons = sort {$a->start <=> $b->start} @exons;
$st = $exons[0]->start;
} else {
@exons = sort {$b->start <=> $a->start} @exons; $st = $exons[0]->end; } return $st;
}
} |
sub _translateable_exon_length
{ my ($trans)= @_;
my @exons = $trans->translateable_exons;
my $length = 0;
foreach my $ex (@exons) {
$length += $ex->length;
}
return $length;
}
} |
sub end
{ my ($self) = @_;
if (!defined($self->{_cached_end})) {
my $end;
foreach my $gene ($self->get_Genes) {
my $this_end = $gene->end;
unless ( $end ){
$end = $this_end;
}
if ( $this_end > $end ){
$end = $this_end;
}
}
$self->{_cached_end} = $end;
}
return $self->{_cached_end}; } |
sub gene_Types
{ my ($self, $set_name, $types) = @_;
$self->{'_types_sets'}{$set_name} = $types;
return $self->{'_types_sets'}{$set_name};
}
} |
sub get_Gene_Count
{ my $self = shift @_;
my @genes = $self->get_Genes;
return scalar(@genes);
}
} |
sub get_Genes
{ my $self = shift @_;
my @genes;
if (!defined( $self->{'_gene_sets'} ) ) {
$self->warning("The gene array you try to retrieve is empty");
@genes = ();
}
foreach my $set_name (keys %{$self->{'_gene_sets'}}) {
push( @genes, @{ $self->{'_gene_sets'}{$set_name} } );
}
return @genes; } |
sub get_Genes_by_Set()
{ my ($self,$set) = @_;
unless ($set){
throw( "must provide a set");
}
my @selected_genes;
if ($self->{v}){
for (keys %{ $self->{_gene_sets} } ) {
print " i know the following sets : $_\n" ;
}
}
if (!defined($self->{'_gene_sets'}{$set})) {
}else{
push @selected_genes, @{$self->{'_gene_sets'}{$set}};
}
return @selected_genes;
}
} |
sub get_Genes_by_Type()
{ my ($self,$types) = @_;
unless ($types){
throw( "must provide a type");
}
my @genes = $self->get_Genes; my @selected_genes;
foreach my $type ( @{ $types } ){
push ( @selected_genes, grep { $_->biotype eq $type } @genes );
}
return @selected_genes;
}
} |
sub get_Genes_of_Type()
{ my ($self,$type) = @_;
unless ($type){
throw( "must provide a type");
}
my @genes = $self->get_Genes; my @selected_genes;
push ( @selected_genes, grep { $_->biotype eq $type } @genes );
return @selected_genes; } |
sub get_Genes_ref
{ my $self = shift @_;
my @genes;
if (!defined( $self->{'_gene_sets'} ) ) {
$self->warning("The gene array you try to retrieve is empty");
@genes = ();
}
foreach my $set_name (keys %{$self->{'_gene_sets'}}) {
push( @genes, @{ $self->{'_gene_sets'}{$set_name} } );
}
return\@ genes; } |
sub get_TranscriptCluster
{ my ($self, $ignore_strand) = @_;
my $tc = Bio::EnsEMBL::Analysis::Tools::Algorithms::TranscriptCluster->new() ;
print "building new TranscriptCluster\n" if $self->{v};
foreach my $gene ( $self->get_Genes ) {
if( ref($gene)=~m/Gene/){
foreach my $trans (@{$gene->get_all_Transcripts}) {
if ($gene->strand ne $trans->strand ) {
throw("Weird - gene is on other strand than transcript\n") ;
}
for (@{ $trans->get_all_Exons} ) {
if ($_->strand ne $trans->strand ) {
print $trans->biotype . " " . $trans->seq_region_start . " "
. $trans->seq_region_end . " " . $trans->seq_region_strand ."\n" ;
print $_->biotype . " " . $_->seq_region_start . " " . $_->seq_region_end . " " .$_->seq_region_strand . "\n";
throw("Weird - exon is on other strand than transcript\n") ;
}
}
$trans->biotype($gene->biotype) ;
print "Adding transcript " . $trans->stable_id . "\n" if $self->{v};
$tc->put_Transcripts($ignore_strand, $trans);
$tc->register_biotype($gene->biotype) ;
}
} else {
warning("Not having a Bio::EnsEMBL::Gene-object : clustering $gene\n") ;
$gene->sort ;
$tc->put_Transcripts($ignore_strand, $gene) ;
}
}
return $tc; } |
sub get_all_Exons
{ my $self = shift @_;
my @exons;
if (!defined( $self->{'_gene_sets'} ) ) {
$self->warning("The gene array you try to retrieve exons for is empty");
@exons = ();
}
foreach my $set_name (keys %{$self->{'_gene_sets'}}) {
foreach my $gene (@{ $self->{'_gene_sets'}{$set_name} }) {
push @exons, @{$gene->get_all_Exons};
}
}
return\@ exons;
}
} |
sub get_coding_TranscriptCluster
{ my ($self, $ignore_strand) = @_;
my $tc = Bio::EnsEMBL::Analysis::Tools::Algorithms::TranscriptCluster->new() ;
print "building new TranscriptCluster using only coding Exons\n" if $self->{v};
foreach my $gene ( $self->get_Genes ) {
if( ref($gene)=~m/Gene/){
foreach my $trans (@{$gene->get_all_Transcripts}) {
if ($gene->strand ne $trans->strand ) {
throw("Weird - gene is on other strand than transcript\n") ;
}
for (@{ $trans->get_all_translateable_Exons} ) {
if ($_->strand ne $trans->strand ) {
print $trans->biotype . " " . $trans->seq_region_start . " "
. $trans->seq_region_end . " " . $trans->seq_region_strand ."\n" ;
print $_->biotype . " " . $_->seq_region_start . " " . $_->seq_region_end . " " .$_->seq_region_strand . "\n";
throw("Weird - exon is on other strand than transcript\n") ;
}
}
$trans->biotype($gene->biotype) ;
$trans->sort;
print "Adding transcript " . $trans->stable_id . "\n" if $self->{v};
$tc->{'_types_sets'} = $self->{'_types_sets'};
$tc->put_Transcripts($ignore_strand, $trans);
$tc->register_biotype($gene->biotype) ;
}
} else {
warning("Not having a Bio::EnsEMBL::Gene-object : clustering $gene\n") ;
$gene->sort ;
$tc->put_Transcripts($ignore_strand, $gene) ;
}
}
return $tc;
}
1; } |
sub get_coding_exon_clustering_from_gene_cluster
{ my ($self, $ignore_strand) = @_ ;
my @clg = sort {$a->start <=> $b->start} $self->get_Genes ;
my $tc = $self->get_coding_TranscriptCluster ($ignore_strand);
my @exon_clusters = $tc->get_coding_ExonCluster($ignore_strand) ;
if (defined $tc->strand && $tc->strand eq '1') {
@exon_clusters = sort { $a->start <=> $b->start } @exon_clusters ;
} elsif (defined $tc->strand && $tc->strand eq '-1') {
@exon_clusters = sort { $b->start <=> $a->start } @exon_clusters ;
} else {
warning("TranscriptCluster's strand is not defined. Are you ignoring strand?");
@exon_clusters = sort { $a->start <=> $b->start } @exon_clusters ;
}
return\@ exon_clusters ; } |
sub get_exon_clustering_from_gene_cluster
{ my ($self, $ignore_strand) = @_ ;
my @clg = sort {$a->start <=> $b->start} $self->get_Genes ;
my $tc = $self->get_TranscriptCluster ($ignore_strand);
my @exon_clusters = $tc->get_ExonCluster_using_all_Exons($ignore_strand) ;
if (defined $tc->strand && $tc->strand eq '1') {
@exon_clusters = sort { $a->start <=> $b->start } @exon_clusters ;
} elsif (defined $tc->strand && $tc->strand eq '-1') {
@exon_clusters = sort { $b->start <=> $a->start } @exon_clusters ;
} else {
warning("TranscriptCluster's strand is not defined. Are you ignoring strand?");
@exon_clusters = sort { $a->start <=> $b->start } @exon_clusters ;
}
return\@ exon_clusters ; } |
sub get_sets_included
{ my $self = shift;
my @included_sets;
foreach my $set_name ( keys %{$self->{'_types_sets'}}) {
if (defined( $self->{'_gene_sets'}{$set_name})) {
push @included_sets,$set_name;
}
}
return\@ included_sets;
}
} |
sub new
{ my ($class,$ignore_strand, $whatever)=@_;
if (ref($class)){
$class = ref($class);
}
my $self = {};
bless($self,$class);
$self->{_ignore_strand} = $ignore_strand;
if ($whatever){
throw( "Can't pass an object to new() method. Use put_Genes() to include Bio::EnsEMBL::Gene in cluster");
}
$self->{_cached_start} = undef;
$self->{_cached_end} = undef;
$self->{_cached_strand} = undef;
$self->{v} = 0 ; return $self;
}
} |
sub put_Genes
{ my ($self, $ignore_strand, @new_genes)= @_;
if ( !defined( $self->{'_types_sets'} ) ){
throw( "Cluster lacks references to gene-types, unable to put the gene");
}
GENE:
foreach my $gene (@new_genes){
throw("undef for gene. Cannot put_Genes") if (!$gene);
my $gene_biotype = $gene->biotype;
foreach my $set_name ( keys %{$self->{'_types_sets'}}) {
my $set = $self->{'_types_sets'}{$set_name};
foreach my $type ( @{$set} ){
if ($gene_biotype eq $type) {
push ( @{ $self->{'_gene_sets'}{$set_name} }, $gene );
if (defined($self->{_cached_start})) {
my $gene_start = $gene->start;
if ($gene_start < $self->{_cached_start}) {
$self->{_cached_start} = $gene_start;
}
}
if (defined($self->{_cached_end})) {
my $gene_end = $gene->end;
if ($gene_end > $self->{_cached_end}) {
$self->{_cached_end} = $gene_end;
}
}
if (!$self->{_ignore_strand}) {
if (defined($self->{_cached_strand})) {
my $gene_strand = $gene->start;
if ($gene_strand != $self->{_cached_strand}) {
throw("You have a cluster with genes on opposite strands");
}
}
}
next GENE;
}
}
}
throw("Failed putting gene of type " . $gene->biotype . "\n");
} } |
sub start
{ my ($self) = @_;
if (!defined($self->{_cached_start})) {
my $start;
foreach my $gene ($self->get_Genes) {
my $this_start = $gene->start;
unless ( $start ){
$start = $this_start;
}
if ( $this_start < $start ){
$start = $this_start;
}
}
$self->{_cached_start} = $start;
}
return $self->{_cached_start};
}
} |
sub strand
{ my $self = shift;
if ($self->{_ignore_strand}) {
print "Strand called, but ignoring\n";
return 0;
}
if (!defined($self->{_cached_strand})) {
my @genes = $self->get_Genes;
unless (@genes){
$self->warning("cannot retrieve the strand in a cluster with no genes");
}
my $strand;
foreach my $gene (@genes){
if (ref($gene)=~m/Gene/) {
foreach my $transcript (@{$gene->get_all_Transcripts}){
unless (defined($strand)) {
$strand = $transcript->start_Exon->strand;
next;
}
if ( $transcript->start_Exon->strand != $strand ){
throw("You have a cluster with genes on opposite strands");
}
}
}else {
unless (defined($strand)) {
$strand = $gene->start_Exon->strand;
}
if ( $gene->start_Exon->strand != $strand ){
throw("You have a cluster with genes on opposite strands");
}
}
}
$self->{_cached_strand} = $strand;
}
return $self->{_cached_strand};
}
} |
sub to_String
{ my $self = shift @_;
my $data='';
foreach my $gene ( $self->get_Genes ){
my @exons = @{ $gene->get_all_Exons };
$data .= sprintf "Id: %-16s" , $gene->stable_id;
$data .= sprintf "Contig: %-20s" , $exons[0]->contig->id;
$data .= sprintf "Exons: %-3d" , scalar(@exons);
$data .= sprintf "Start: %-9d" , $self->_get_start($gene);
$data .= sprintf "End: %-9d" , $self->_get_end ($gene);
$data .= sprintf "Strand: %-2d\n" , $exons[0]->strand;
}
return $data;
}
} |
General documentation
We can get the genes in each cluster of one type.
We pass one string identifying the genetype.
The difference with get_Genes_by_Type is that that as an arrayref as argument.
We can get the genes in each cluster of a given type.
We pass an arrayref containing the types we want to retrieve.