package EnsEMBL::Web::Object::Gene;
use strict;
use warnings;
no warnings "uninitialized";
use EnsEMBL::Web::Proxy::Object;
use EnsEMBL::Web::Proxy::Factory;
use EnsEMBL::Web::Cache;
use Bio::EnsEMBL::Compara::GenomeDB;
use Time::HiRes qw(time);
use base qw(EnsEMBL::Web::Object);
our $MEMD = new EnsEMBL::Web::Cache;
sub _filename {
my $self = shift;
my $name = sprintf '%s-gene-%d-%s-%s',
$self->species,
$self->species_defs->ENSEMBL_VERSION,
$self->get_db,
$self->Obj->stable_id;
$name =~ s/[^-\w\.]/_/g;
return $name;
}
sub availability {
my $self = shift;
my $hash = $self->_availability;
if( $self->Obj->isa('Bio::EnsEMBL::ArchiveStableId') ) {
$hash->{'history'} = 1;
} elsif( $self->Obj->isa('Bio::EnsEMBL::Gene') ) {
my $rows = $self->table_info( $self->get_db, 'stable_id_event' )->{'rows'};
$hash->{'history'} = $rows ? 1 : 0;
$hash->{'gene'} = 1;
$hash->{'core'} = $self->get_db eq 'core' ? 1 : 0;
my $compara_db = $self->database('compara');
my $res = 0;
if ($compara_db) {
my $compara_dbh = $compara_db->get_MemberAdaptor->dbc->db_handle;
($res) = $compara_dbh->selectrow_array(
'select stable_id from family_member fm, member as m where fm.member_id=m.member_id and stable_id=? limit 1',
{}, $self->Obj->stable_id
);
}
$hash->{'family'} = $res ? 1 : 0;
} elsif( $self->Obj->isa('Bio::EnsEMBL::Compara::Family' ) ) {
$hash->{'family'} = 1;
}
return $hash;
}
sub analysis {
my $self = shift;
return $self->Obj->analysis;
}
sub counts {
my $self = shift;
my $obj = $self->Obj;
return {} unless $obj->isa('Bio::EnsEMBL::Gene');
my $key = '::COUNTS::GENE::'.
$ENV{ENSEMBL_SPECIES} .'::'.
$self->core_objects->{parameters}{db} .'::'.
$self->core_objects->{parameters}{g} .'::';
my $counts;
$counts = $MEMD->get($key) if $MEMD;
unless ($counts) {
$counts = {};
$counts->{'transcripts'} = @{$obj->get_all_Transcripts};
$counts->{'exons'} = @{$obj->get_all_Exons};
$counts->{'similarity_matches'} = $self->count_xrefs;
my $compara_db = $self->database('compara');
if($compara_db) {
my $compara_dbh = $compara_db->get_MemberAdaptor->dbc->db_handle;
if($compara_dbh) {
my %res = map { @$_ } @{$compara_dbh->selectall_arrayref('
select ml.type, count(*) as N
from member as m, homology_member as hm, homology as h,
method_link as ml, method_link_species_set as mlss
where m.stable_id = ? and hm.member_id = m.member_id and
h.homology_id = hm.homology_id and
mlss.method_link_species_set_id = h.method_link_species_set_id and
ml.method_link_id = mlss.method_link_id and
( ml.type = "ENSEMBL_ORTHOLOGUES" or
ml.type = "ENSEMBL_PARALOGUES" and
h.description = "within_species_paralog" )
group by type', {}, $obj->stable_id
)};
#warn keys %res;
$counts->{'orthologs'} = $res{'ENSEMBL_ORTHOLOGUES'};
$counts->{'paralogs'} = $res{'ENSEMBL_PARALOGUES'};
my ($res) = $compara_dbh->selectrow_array(
'select count(*) from family_member fm, member as m where fm.member_id=m.member_id and stable_id=?',
{}, $obj->stable_id
);
$counts->{'families'} = $res;
}
($counts->{'alignments'}) = $self->count_alignments if $self->get_db eq 'core';
}
$MEMD->set($key, $counts, undef, 'COUNTS') if $MEMD;
}
return $counts;
}
sub count_xrefs {
my $self = shift;
my $type = $self->get_db;
my $dbc = $self->database($type)->dbc;
#xrefs on the gene
my $xrefs_c = 0;
my $sql = qq(
SELECT x.display_label, edb.db_name, edb.status
FROM gene g, object_xref ox, xref x, external_db edb
WHERE g.gene_id = ox.ensembl_id
AND ox.xref_id = x.xref_id
AND x.external_db_id = edb.external_db_id
AND ox.ensembl_object_type = 'Gene'
AND g.gene_id = ?);
my $sth = $dbc->prepare($sql);
$sth->execute($self->Obj->dbID);
while (my ($label,$db_name,$status) = $sth->fetchrow_array) {
#these filters are taken directly from Component::_sort_similarity_links
#code duplication needs removing, and some of these may well not be needed any more
next if ($status eq 'ORTH'); # remove all orthologs
next if (lc($db_name) eq 'medline'); # ditch medline entries - redundant as we also have pubmed
next if ($db_name =~ /^flybase/i && $type =~ /^CG/ ); # Ditch celera genes from FlyBase
next if ($db_name eq 'Vega_gene'); # remove internal links to self and transcripts
next if ($db_name eq 'Vega_transcript');
next if ($db_name eq 'Vega_translation');
next if ($db_name eq 'GO');
next if ($db_name eq 'OTTP') && $label =~ /^\d+$/; #ignore xrefs to vega translation_ids
$xrefs_c++;
}
return $xrefs_c;
}
sub count_gene_supporting_evidence {
#count all supporting_features and transcript_supporting_features for the gene
#- not used in the tree but keep the code just in case we change our minds again!
my $self = shift;
my $obj = $self->Obj;
my $o_type = $self->get_db;
my $evi_count = 0;
my %c;
foreach my $trans (@{$obj->get_all_Transcripts()}) {
foreach my $evi (@{$trans->get_all_supporting_features}) {
my $hit_name = $evi->hseqname;
$c{$hit_name}++;
}
foreach my $exon (@{$trans->get_all_Exons()}) {
foreach my $evi (@{$exon->get_all_supporting_features}) {
my $hit_name = $evi->hseqname;
$c{$hit_name}++;
}
}
}
return scalar(keys(%c));
}
sub get_external_dbs {
#retrieve a summary of the external_db table from species defs
my $self = shift;
my $db = $self->get_db;
my $db_type = 'DATABASE_'.uc($db);
my $sd = $self->species_defs;
return $sd->databases->{$db_type}{'external_dbs'};
}
sub get_gene_supporting_evidence {
#get supporting evidence for the gene: transcript_supporting_features support the
#whole transcript or the translation, supporting_features provide depth the the evidence
my $self = shift;
my $obj = $self->Obj;
my $species = $self->species;
my $dbentry_adap = Bio::EnsEMBL::Registry->get_adaptor($species, "core", "DBEntry");
my $o_type = $self->get_db;
my $e;
foreach my $trans (@{$obj->get_all_Transcripts()}) {
my $tsi = $trans->stable_id;
my %t_hits;
my %vega_evi;
EVI:
foreach my $evi (@{$trans->get_all_supporting_features}) {
my $name = $evi->hseqname;
my $db_name = $dbentry_adap->get_db_name_from_external_db_id($evi->external_db_id);
#save details of evidence for vega genes for later since we need to combine them
#before we can tellif they match the CDS / UTR
if ($o_type eq 'vega') {
push @{$vega_evi{$name}{'data'}}, $evi;
$vega_evi{$name}->{'db_name'} = $db_name;
$vega_evi{$name}->{'evi_type'} = ref($evi);
next EVI;
}
#for e! genes...
#use coordinates to check if the transcript evidence supports the CDS, UTR, or just the transcript
#for protein features give some leeway in matching to transcript - +- 3 bases
if ($evi->isa('Bio::EnsEMBL::DnaPepAlignFeature')) {
if ( (abs($trans->coding_region_start-$evi->seq_region_start) < 4)
|| (abs($trans->coding_region_end-$evi->seq_region_end) < 4)) {
$e->{$tsi}{'evidence'}{'CDS'}{$name} = $db_name;
$t_hits{$name}++;
}
else {
$e->{$tsi}{'evidence'}{'UNKNOWN'}{$name} = $db_name;
$t_hits{$name}++;
}
}
elsif ( $trans->coding_region_start == $evi->seq_region_start
|| $trans->coding_region_end == $evi->seq_region_end ) {
$e->{$tsi}{'evidence'}{'CDS'}{$name} = $db_name;
$t_hits{$name}++;
}
elsif ( $trans->seq_region_start == $evi->seq_region_start
|| $trans->seq_region_end == $evi->seq_region_end ) {
$e->{$tsi}{'evidence'}{'UTR'}{$name} = $db_name;
$t_hits{$name}++;
}
else {
$e->{$tsi}{'evidence'}{'UNKNOWN'}{$name} = $db_name;
$t_hits{$name}++;
}
}
$e->{$tsi}{'logic_name'} = $trans->analysis->logic_name;
#make a note of the hit_names of the supporting_features (but don't bother for vega db genes)
if ($o_type ne 'vega') {
foreach my $exon (@{$trans->get_all_Exons()}) {
foreach my $evi (@{$exon->get_all_supporting_features}) {
my $hit_name = $evi->hseqname;
if (! exists($t_hits{$hit_name})) {
$e->{$tsi}{'extra_evidence'}{$hit_name}++;
}
}
}
}
#look at vega evidence to see if it can be assigned to 'CDS' 'UTR' etc
while ( my ($hit_name,$rec) = each %vega_evi ) {
my ($min_start,$max_end) = (1e8,1);
my $db_name = $rec->{'db_name'};
my $evi_type = $rec->{'evi_type'};
foreach my $hit (@{$rec->{'data'}}) {
$min_start = $hit->seq_region_start <= $min_start ? $hit->seq_region_start : $min_start;
$max_end = $hit->seq_region_end >= $max_end ? $hit->seq_region_end : $max_end;
}
if ($evi_type eq 'Bio::EnsEMBL::DnaPepAlignFeature') {
#protein evidence supports CDS
$e->{$tsi}{'evidence'}{'CDS'}{$hit_name} = $db_name;
}
else {
if ($min_start < $trans->coding_region_start && $max_end > $trans->coding_region_end) {
#full length DNA evidence supports CDS
$e->{$tsi}{'evidence'}{'CDS'}{$hit_name} = $db_name;
}
if ( $max_end < $trans->coding_region_start
|| $min_start > $trans->coding_region_end
|| $trans->seq_region_start == $min_start
|| $trans->seq_region_end == $max_end ) {
#full length DNA evidence or that exclusively in the UTR supports the UTR
$e->{$tsi}{'evidence'}{'UTR'}{$hit_name} = $db_name;
}
elsif (! $e->{$tsi}{'evidence'}{'CDS'}{$hit_name}) {
$e->{$tsi}{'evidence'}{'UNKNOWN'}{$hit_name} = $db_name;
}
}
}
}
return $e;
}
#generate URLs for evidence links
sub add_evidence_links {
my $self = shift;
my $ids = shift;
my $links = [];
foreach my $hit_name (sort keys %$ids) {
my $db_name = $ids->{$hit_name};
my $display = $self->get_ExtURL_link( $hit_name, $db_name, $hit_name );
push @{$links}, [$display,$hit_name];
}
return $links;
}
sub get_slice_object {
my $self = shift;
my $slice = $self->Obj->feature_Slice->expand( $self->param('flank5_display'), $self->param('flank3_display') );
return 1 unless $slice;
my $T = new EnsEMBL::Web::Proxy::Object( 'Slice', $slice, $self->__data );
# $T->highlight_display( $self->Obj->get_all_Exons );
return $T;
}
sub get_Slice {
my( $self, $context, $ori ) = @_;
my $db = $self->get_db ;
my $dba = $self->DBConnection->get_DBAdaptor($db);
my $slice = $self->Obj->feature_Slice;
if( $context =~ /(\d+)%/ ) {
$context = $slice->length * $1 / 100;
}
if( $ori && $slice->strand != $ori ) {
$slice = $slice->invert();
}
return $slice->expand( $context, $context );
}
sub gene_name {
my $self = shift;
my( $disp_id ) = $self->display_xref;
return $disp_id || $self->stable_id;
}
sub short_caption {
my $self = shift;
return $self->type_name.': '.$self->gene_name;
}
sub caption {
my $self = shift;
my( $disp_id ) = $self->display_xref;
my $caption = $self->type_name.': ';
if( $disp_id ) {
$caption .= "$disp_id (".$self->stable_id.")";
} else {
$caption .= $self->stable_id;
}
return $caption;
}
sub type_name { my $self = shift; return $self->species_defs->translate('Gene'); }
sub gene { my $self = shift; return $self->Obj; }
sub stable_id { my $self = shift; return $self->Obj->stable_id; }
sub feature_type { my $self = shift; return $self->Obj->type; }
sub source { my $self = shift; return $self->Obj->source; }
sub version { my $self = shift; return $self->Obj->version; }
sub logic_name { my $self = shift; return $self->Obj->analysis->logic_name; }
sub coord_system { my $self = shift; return $self->Obj->slice->coord_system->name; }
sub seq_region_type { my $self = shift; return $self->coord_system; }
sub seq_region_name { my $self = shift; return $self->Obj->slice->seq_region_name; }
sub seq_region_start { my $self = shift; return $self->Obj->start; }
sub seq_region_end { my $self = shift; return $self->Obj->end; }
sub seq_region_strand { my $self = shift; return $self->Obj->strand; }
sub feature_length { my $self = shift; return $self->Obj->feature_Slice->length; }
sub get_external_id {
my( $self, $type ) = @_;
my $links = $self->get_database_matches($self->gene);
my $ext_id;
foreach my $link (@$links) {
$ext_id = $link->primary_id if ($link->database eq $type);
}
return $ext_id;
}
sub get_database_matches {
my $self = shift;
my @DBLINKS;
eval { @DBLINKS = @{$self->Obj->get_all_DBLinks};};
return \@DBLINKS || [];
}
sub get_all_transcripts{
my $self = shift;
unless ($self->{'data'}{'_transcripts'}){
foreach my $transcript (@{$self->gene()->get_all_Transcripts}){
my $transcriptObj = EnsEMBL::Web::Proxy::Object->new(
'Transcript', $transcript, $self->__data
);
$transcriptObj->gene($self->gene);
push @{$self->{'data'}{'_transcripts'}} , $transcriptObj;
}
}
return $self->{'data'}{'_transcripts'};
}
sub get_all_families {
my $self = shift;
my $families;
if (ref($self->gene) =~ /Family/) { ## No gene in URL, so CoreObjects fetches a family instead
## Explicitly set db connection, as registry is buggy!
my $family = $self->gene;
my $dba = $self->database('core', $self->species);
my $genome_db = Bio::EnsEMBL::Compara::GenomeDB->new($dba);
my $members = $family->get_all_Members;
my $info = {'description' => $family->description};
my $genes = [];
foreach my $member (@$members) {
$member->genome_db($genome_db);
my $gene = $member->gene;
push @$genes, $gene if $gene;
}
$info->{'genes'} = $genes;
$info->{'count'} = @$genes;
$families->{$self->param('family')} = {'info' => $info};
}
else {
foreach my $transcript (@{$self->get_all_transcripts}) {
my $trans_families = $transcript->get_families;
while (my ($id, $info) = each (%$trans_families)) {
if (exists $families->{$id}) {
push @{$families->{$id}{'transcripts'}}, $transcript;
}
else {
my @A = keys %$info;
warn "INFO @A";
$families->{$id} = {'info' => $info, 'transcripts' => [$transcript]};
}
}
}
}
return $families;
}
sub create_family {
my ($self, $id) = @_;
my $databases = $self->database('compara') ;
my $family_adaptor;
eval{ $family_adaptor = $databases->get_FamilyAdaptor };
if ($@){ warn($@); return {} }
return $family_adaptor->fetch_by_stable_id($id);
}
sub member_by_source {
my ($self, $family, $source) = @_;
return $family->get_Member_Attribute_by_source($source) || [];
}
sub chromosome {
my $self = shift;
return undef if lc($self->coord_system) ne 'chromosome';
return $self->Obj->slice->seq_region_name;
}
sub display_xref {
my $self = shift;
return undef if $self->Obj->isa('Bio::EnsEMBL::Compara::Family');
return undef if $self->Obj->isa('Bio::EnsEMBL::ArchiveStableId');
my $trans_xref = $self->Obj->display_xref();
return undef unless $trans_xref;
return ($trans_xref->display_id, $trans_xref->dbname, $trans_xref->primary_id, $trans_xref->db_display_name, $trans_xref->info_text );
}
sub mod_date {
my $self = shift;
my $time = $self->gene()->modified_date;
return $self->date_format( $time,'%d/%m/%y' ), $self->date_format( $time, '%y/%m/%d' );
}
sub created_date {
my $self = shift;
my $time = $self->gene()->created_date;
return $self->date_format( $time,'%d/%m/%y' ), $self->date_format( $time, '%y/%m/%d' );
}
sub get_db {
my $self = shift;
my $db = $self->param('db') || 'core';
return $db eq 'est' ? 'otherfeatures' : $db;
}
sub get_author_name {
my $self = shift;
my $attribs = $self->Obj->get_all_Attributes('author');
if (@$attribs) {
return $attribs->[0]->value;
} else {
return undef;
}
}
sub gene_type {
my $self = shift;
my $db = $self->get_db;
my $type = '';
if( $db eq 'core' ){
$type = ucfirst(lc($self->Obj->status))." ".$self->Obj->biotype;
$type =~ s/_/ /;
$type ||= $self->db_type;
} elsif ($db eq 'vega') {
my $biotype = ($self->Obj->biotype eq 'tec') ? uc($self->Obj->biotype) : ucfirst(lc($self->Obj->biotype));
$type = ucfirst(lc($self->Obj->status))." $biotype";
$type =~ s/_/ /g;
$type =~ s/unknown //i;
return $type;
} else {
$type = $self->logic_name;
}
$type ||= $db;
if( $type !~ /[A-Z]/ ){ $type = ucfirst($type) } #All lc, so format
return $type;
}
sub date_format {
my( $self, $time, $format ) = @_;
my( $d,$m,$y) = (localtime($time))[3,4,5];
my %S = ('d'=>sprintf('%02d',$d),'m'=>sprintf('%02d',$m+1),'y'=>$y+1900);
(my $res = $format ) =~s/%(\w)/$S{$1}/ge;
return $res;
}
sub location_string {
my $self = shift;
return sprintf( "%s:%s-%s", $self->seq_region_name, $self->seq_region_start, $self->seq_region_end );
}
sub get_contig_location {
my $self = shift;
my ($pr_seg) = @{$self->Obj->project('seqlevel')};
return undef unless $pr_seg;
return (
$self->neat_sr_name( $pr_seg->[2]->coord_system->name, $pr_seg->[2]->seq_region_name ),
$pr_seg->[2]->seq_region_name,
$pr_seg->[2]->start
);
}
sub get_alternative_locations {
my $self = shift;
my @alt_locs = map { [ $_->slice->seq_region_name, $_->start, $_->end, $_->slice->coord_system->name ] }
@{$self->Obj->get_all_alt_locations};
return \@alt_locs;
}
sub get_homology_matches{
my( $self,$homology_source,$homology_description,$compara_db ) = @_;
$homology_source = "ENSEMBL_HOMOLOGUES" unless defined $homology_source;
$homology_description = "ortholog" unless defined $homology_description;
$compara_db ||= 'compara';
unless( $self->{'homology_matches'}{$homology_source.'::'.$homology_description} ) {
my %homologues = %{$self->fetch_homology_species_hash($homology_source, $homology_description, $compara_db)};
unless( keys %homologues ) {
$self->{'homology_matches'}{$homology_source.'::'.$homology_description} = {};
return {};
}
my $gene = $self->Obj;
my $geneid = $gene->stable_id;
my %homology_list;
my $adaptor_call = $self->param('gene_adaptor') || 'get_GeneAdaptor';
# hash to convert descriptions into more readable form
my %desc_mapping = (
'ortholog_one2one' => '1-to-1',
'apparent_ortholog_one2one' => '1-to-1 (apparent)',
'ortholog_one2many' => '1-to-many',
'between_species_paralog' => 'paralogue (between species)',
'ortholog_many2many' => 'many-to-many',
'within_species_paralog' => 'paralog (within species)'
);
foreach my $displayspp (keys (%homologues)){
( my $spp = $displayspp ) =~ tr/ /_/;
my $order=0;
foreach my $homology (@{$homologues{$displayspp}}){
my ($homologue, $homology_desc, $homology_subtype, $query_perc_id, $target_perc_id, $dnds_ratio) = @{$homology};
next unless ($homology_desc =~ /$homology_description/);
my $homologue_id = $homologue->stable_id;
$homology_desc = $desc_mapping{$homology_desc}; # mapping to more readable form
$homology_desc = "no description" unless (defined $homology_desc);
$homology_list{$displayspp}{$homologue_id}{'homology_desc'} = $homology_desc ;
$homology_list{$displayspp}{$homologue_id}{'homology_subtype'} = $homology_subtype ;
$homology_list{$displayspp}{$homologue_id}{'spp'} = $displayspp ;
$homology_list{$displayspp}{$homologue_id}{'sp_common'} = $homologue->taxon ? $homologue->taxon->common_name : '';
$homology_list{$displayspp}{$homologue_id}{'description'} = $homologue->description || 'No description';
$homology_list{$displayspp}{$homologue_id}{'order'} = $order ;
$homology_list{$displayspp}{$homologue_id}{'query_perc_id'} = $query_perc_id ;
$homology_list{$displayspp}{$homologue_id}{'target_perc_id'} = $target_perc_id ;
$homology_list{$displayspp}{$homologue_id}{'homology_dnds_ratio'} = $dnds_ratio;
$homology_list{$displayspp}{$homologue_id}{'display_id'} = $homologue->display_label || 'Novel Ensembl prediction';
$order++;
}
}
$self->{'homology_matches'}{$homology_source.'::'.$homology_description} = \%homology_list;
}
return $self->{'homology_matches'}{$homology_source.'::'.$homology_description};
}
sub fetch_homology_species_hash {
my $self = shift;
my $homology_source = shift;
my $homology_description = shift;
my $compara_db = shift || 'compara';
$homology_source = "ENSEMBL_HOMOLOGUES" unless (defined $homology_source);
$homology_description= "ortholog" unless (defined $homology_description);
my $geneid = $self->stable_id;
my $database = $self->database($compara_db) ;
my %homologues;
return {} unless $database;
$self->timer_push( 'starting to fetch' , 6 );
my $member_adaptor = $database->get_MemberAdaptor;
my $query_member = $member_adaptor->fetch_by_source_stable_id("ENSEMBLGENE",$geneid);
return {} unless defined $query_member ;
my $homology_adaptor = $database->get_HomologyAdaptor;
# It is faster to get all the Homologues and discard undesired entries
# my $homologies_array = $homology_adaptor->fetch_all_by_Member_method_link_type($query_member,$homology_source);
my $homologies_array = $homology_adaptor->fetch_all_by_Member($query_member);
$self->timer_push( 'fetched' , 6 );
# Strategy: get the root node (this method gets the whole lineage without getting sister nodes)
# We use right - left indexes to get the order in the hierarchy.
my %classification;
if (my $taxon = $query_member->taxon) {
my $node = $taxon->root();
while ($node){
$node->get_tagvalue('scientific name');
# Found a speed boost with nytprof -- avilella
# $classification{$node->get_tagvalue('scientific name')} = $node->right_index - $node->left_index;
$classification{$node->{_tags}{'scientific name'}} = $node->{_right_index} - $node->{_left_index};
$node = $node->children->[0];
}
}
$self->timer_push( 'classification' , 6 );
foreach my $homology (@{$homologies_array}){
next unless ($homology->description =~ /$homology_description/);
my ($query_perc_id, $target_perc_id, $genome_db_name, $target_member, $dnds_ratio);
foreach my $member_attribute (@{$homology->get_all_Member_Attribute}) {
my ($member, $attribute) = @{$member_attribute};
if ($member->stable_id eq $query_member->stable_id) {
$query_perc_id = $attribute->perc_id;
} else {
$target_perc_id = $attribute->perc_id;
$genome_db_name = $member->genome_db->name;
$target_member = $member;
$dnds_ratio = $homology->dnds_ratio;
}
}
push (@{$homologues{$genome_db_name}}, [ $target_member, $homology->description, $homology->subtype, $query_perc_id, $target_perc_id, $dnds_ratio ]);
}
$self->timer_push( 'homologies hacked', 6 );
foreach my $species_name (keys %homologues){
@{$homologues{$species_name}} = sort {$classification{$a->[2]} <=> $classification{$b->[2]}} @{$homologues{$species_name}};
}
return \%homologues;
}
sub get_disease_matches{
my $self = shift;
my %disease_list;
my $disease_adaptor;
return undef unless ($disease_adaptor = $self->database('disease'));
my %omim_disease = ();
my @diseases = $disease_adaptor->disease_name_by_ensembl_gene($self->gene());
foreach my $disease (@diseases){
next unless $disease;
my $desc = $disease->name;
foreach my $loc ($disease->each_Location){
my $omim_id = $loc->db_id;
push @{$omim_disease{$desc}}, $omim_id;
}
}
return \%omim_disease ;
}
sub get_compara_Member{
# Returns the Bio::EnsEMBL::Compara::Member object
# corresponding to this gene
my $self = shift;
my $compara_db = shift || 'compara';
# Catch coderef
my $cachekey = "_compara_member_$compara_db";
my $error = sub{ warn($_[0]); $self->{$cachekey}=0; return 0};
unless( defined( $self->{$cachekey} ) ){ # Look in cache
# Prepare the adaptors
my $compara_dba = $self->database( $compara_db ) || &$error( "No compara db" );
my $member_adaptor = $compara_dba->get_adaptor('Member') || &$error( "Cannot COMPARA->get_adaptor('Member')" );
# Fetch the object
my $id = $self->stable_id;
my $member = $member_adaptor->fetch_by_source_stable_id('ENSEMBLGENE',$id) || &$error( "<h3>No compara ENSEMBLGENE member for $id</h3>" );
# Update the cache
$self->{$cachekey} = $member;
}
# Return cached value
return $self->{$cachekey};
}
sub get_ProteinTree{
# Returns the Bio::EnsEMBL::Compara::ProteinTree object
# corresponding to this gene
my $self = shift;
my $compara_db = shift || 'compara';
# Where to keep the cached data
my $cachekey = "_protein_tree_$compara_db";
# Catch coderef
my $error = sub{ warn($_[0]); $self->{$cachekey}=0; return 0};
unless( defined( $self->{$cachekey} ) ){ # Look in cache
# Fetch the objects
my $member = $self->get_compara_Member($compara_db)
|| &$error( "No compara member for this gene" );
my $tree_adaptor = $member->adaptor->db->get_adaptor('ProteinTree')
|| &$error( "Cannot COMPARA->get_adaptor('ProteinTree')" );
my $tree = $tree_adaptor->fetch_by_Member_root_id($member, 0)
|| &$error( "No compara tree for ENSEMBLGENE $member" );
# Update the cache
$self->{$cachekey} = $tree;
}
# Return cached value
return $self->{$cachekey};
}
#----------------------------------------------------------------------
sub get_das_factories {
my $self = shift;
return [ $self->__data->{_object}->adaptor()->db()->_each_DASFeatureFactory ];
}
sub get_das_features_by_name {
my $self = shift;
my $name = shift || die( "Need a source name" );
my $scope = shift || '';
my $data = $self->__data;
my $cache = $self->Obj;
$cache->{_das_features} ||= {}; # Cache
my %das_features;
foreach my $dasfact( @{$self->get_das_factories} ){
my $type = $dasfact->adaptor->type;
next if $dasfact->adaptor->type =~ /^ensembl_location/;
my $name = $dasfact->adaptor->name;
next unless $name;
my $dsn = $dasfact->adaptor->dsn;
my $url = $dasfact->adaptor->url;
# Construct a cache key : SOURCE_URL/TYPE
# Need the type to handle sources that serve multiple types of features
my $key = $url || $dasfact->adaptor->protocol .'://'.$dasfact->adaptor->domain;
if ($key =~ m!/das$!) {
$key .= "/$dsn";
}
$key .= "/$type";
unless( $cache->{_das_features}->{$key} ) { ## No cached values - so grab and store them!!
my $featref = ($dasfact->fetch_all_by_ID($data->{_object}, $data ))[1];
$cache->{_das_features}->{$key} = $featref;
}
$das_features{$name} = $cache->{_das_features}->{$key};
}
return @{ $das_features{$name} || [] };
}
sub get_das_features_by_slice {
my $self = shift;
my $name = shift || die( "Need a source name" );
my $slice = shift || die( "Need a slice" );
my $cache = $self->Obj;
$cache->{_das_features} ||= {}; # Cache
my %das_features;
foreach my $dasfact( @{$self->get_das_factories} ){
my $type = $dasfact->adaptor->type;
next unless $dasfact->adaptor->type =~ /^ensembl_location/;
my $name = $dasfact->adaptor->name;
next unless $name;
my $dsn = $dasfact->adaptor->dsn;
my $url = $dasfact->adaptor->url;
# Construct a cache key : SOURCE_URL/TYPE
# Need the type to handle sources that serve multiple types of features
my $key = $url || $dasfact->adaptor->protocol .'://'.$dasfact->adaptor->domain;
$key .= "/$dsn/$type";
unless( $cache->{_das_features}->{$key} ) { ## No cached values - so grab and store them!!
my $featref = ($dasfact->fetch_all_Features( $slice, $type ))[0];
$cache->{_das_features}->{$key} = $featref;
}
$das_features{$name} = $cache->{_das_features}->{$key};
}
return @{ $das_features{$name} || [] };
}
sub get_gene_slices {
my( $self, $master_config, @slice_configs ) = @_;
foreach my $array ( @slice_configs ) {
if($array->[1] eq 'normal') {
my $slice= $self->get_Slice( $array->[2], 1 );
$self->__data->{'slices'}{ $array->[0] } = [ 'normal', $slice, [], $slice->length ];
} else {
$self->__data->{'slices'}{ $array->[0] } = $self->get_munged_slice( $master_config, $array->[2], 1 );
}
}
}
# Calls for GeneSNPView
# Valid user selections
sub valids {
my $self = shift;
my %valids = (); ## Now we have to create the snp filter....
foreach( $self->param() ) {
$valids{$_} = 1 if $_=~/opt_/ && $self->param( $_ ) eq 'on';
}
return \%valids;
}
sub getVariationsOnSlice {
my( $self, $slice, $subslices, $gene ) = @_;
my $sliceObj = EnsEMBL::Web::Proxy::Object->new(
'Slice', $slice, $self->__data
);
my ($count_snps, $filtered_snps, $context_count) = $sliceObj->getFakeMungedVariationFeatures($subslices,$gene);
$self->__data->{'sample'}{"snp_counts"} = [$count_snps, scalar @$filtered_snps];
$self->__data->{'SNPS'} = $filtered_snps;
return ($count_snps, $filtered_snps, $context_count);
}
sub get_source {
my $self = shift;
my $default = shift;
my $vari_adaptor = $self->Obj->adaptor->db->get_db_adaptor('variation');
unless ($vari_adaptor) {
warn "ERROR: Can't get variation adaptor";
return ();
}
if ($default) {
return $vari_adaptor->get_VariationAdaptor->get_default_source();
}
else {
return $vari_adaptor->get_VariationAdaptor->get_all_sources();
}
}
sub store_TransformedTranscripts {
my( $self ) = @_;
my $offset = $self->__data->{'slices'}{'transcripts'}->[1]->start -1;
foreach my $trans_obj ( @{$self->get_all_transcripts} ) {
my $transcript = $trans_obj->Obj;
my ($raw_coding_start,$coding_start);
if (defined( $transcript->coding_region_start )) {
$raw_coding_start = $transcript->coding_region_start;
$raw_coding_start -= $offset;
$coding_start = $raw_coding_start + $self->munge_gaps( 'transcripts', $raw_coding_start );
}
else {
$coding_start = undef;
}
my ($raw_coding_end,$coding_end);
if (defined( $transcript->coding_region_end )) {
$raw_coding_end = $transcript->coding_region_end;
$raw_coding_end -= $offset;
$coding_end = $raw_coding_end + $self->munge_gaps( 'transcripts', $raw_coding_end );
}
else {
$coding_end = undef;
}
my $raw_start = $transcript->start;
my $raw_end = $transcript->end ;
my @exons = ();
foreach my $exon (@{$transcript->get_all_Exons()}) {
my $es = $exon->start - $offset;
my $ee = $exon->end - $offset;
my $O = $self->munge_gaps( 'transcripts', $es );
push @exons, [ $es + $O, $ee + $O, $exon ];
}
$trans_obj->__data->{'transformed'}{'exons'} = \@exons;
$trans_obj->__data->{'transformed'}{'coding_start'} = $coding_start;
$trans_obj->__data->{'transformed'}{'coding_end'} = $coding_end;
$trans_obj->__data->{'transformed'}{'start'} = $raw_start;
$trans_obj->__data->{'transformed'}{'end'} = $raw_end;
}
}
sub store_TransformedSNPS {
my $self = shift;
my $valids = $self->valids;
foreach my $trans_obj ( @{$self->get_all_transcripts} ) {
my $T = $trans_obj->stable_id;
my $snps = {};
foreach my $S ( @{$self->__data->{'SNPS'}} ) {
foreach( @{$S->[2]->get_all_TranscriptVariations||[]} ) {
next unless $T eq $_->transcript->stable_id;
foreach my $type ( @{ $_->consequence_type || []} ) {
next unless $valids->{'opt_'.lc($type)};
$snps->{ $S->[2]->dbID } = $_;
last;
}
}
}
$trans_obj->__data->{'transformed'}{'snps'} = $snps;
}
}
sub store_TransformedDomains {
my( $self, $key ) = @_;
my %domains;
my $offset = $self->__data->{'slices'}{'transcripts'}->[1]->start -1;
foreach my $trans_obj ( @{$self->get_all_transcripts} ) {
my $transcript = $trans_obj->Obj;
next unless $transcript->translation;
foreach my $pf ( @{$transcript->translation->get_all_ProteinFeatures($key)} ) {
## rach entry is an arry containing the actual pfam hit, and mapped start and end co-ordinates
my @A = ($pf);
foreach( $transcript->pep2genomic( $pf->start, $pf->end ) ) {
my $O = $self->munge_gaps( 'transcripts', $_->start - $offset, $_->end - $offset) - $offset;
push @A, $_->start + $O, $_->end + $O;
}
push @{$trans_obj->__data->{'transformed'}{lc($key).'_hits'}}, \@A;
}
}
}
sub munge_gaps {
my( $self, $slice_code, $bp, $bp2 ) = @_;
my $subslices = $self->__data->{'slices'}{ $slice_code }[2];
foreach( @$subslices ) {
if( $bp >= $_->[0] && $bp <= $_->[1] ) {
return defined($bp2) && ($bp2 < $_->[0] || $bp2 > $_->[1] ) ? undef : $_->[2] ;
}
}
return undef;
}
sub get_munged_slice {
my $self = shift;
my $master_config = shift;
my $slice = $self->get_Slice( @_ );
my $gene_stable_id = $self->stable_id;
my $length = $slice->length();
my $munged = '0' x $length;
my $CONTEXT = $self->param( 'context' )|| 100;
my $EXTENT = $CONTEXT eq 'FULL' ? 1000 : $CONTEXT;
## first get all the transcripts for a given gene...
my @ANALYSIS = ( $self->get_db() eq 'core' ? (lc($self->species_defs->AUTHORITY)||'ensembl') : 'otter' );
@ANALYSIS = qw(ensembl havana ensembl_havana_gene) if $ENV{'ENSEMBL_SPECIES'} eq 'Homo_sapiens';
# my $features = [map { @{ $slice->get_all_Genes($_)||[]} } @ANALYSIS ];
my $features = $slice->get_all_Genes( undef, $self->param('opt_db') );
my @lengths;
if( $CONTEXT eq 'FULL' ) {
@lengths = ( $length );
} else {
foreach my $gene ( grep { $_->stable_id eq $gene_stable_id } @$features ) {
foreach my $X ('1') { ## 1 <- exon+flanking, 2 <- exon only
my $extent = $X == 1 ? $EXTENT : 0;
foreach my $transcript (@{$gene->get_all_Transcripts()}) {
foreach my $exon (@{$transcript->get_all_Exons()}) {
my $START = $exon->start - $extent;
my $EXON_LEN = $exon->end-$exon->start + 1 + 2 * $extent;
substr( $munged, $START-1, $EXON_LEN ) = $X x $EXON_LEN;
}
}
}
}
@lengths = map { length($_) } split /(0+)/, $munged;
}
## @lengths contains the sizes of gaps and exons(+- context)
$munged = undef;
my $collapsed_length = 0;
my $flag = 0;
my $subslices = [];
my $pos = 0;
foreach(@lengths,0) {
if ($flag=1-$flag) {
push @$subslices, [ $pos+1, 0, 0 ] ;
$collapsed_length += $_;
} else {
$subslices->[-1][1] = $pos;
}
$pos+=$_;
}
## compute the width of the slice image within the display
my $PIXEL_WIDTH =
($self->param('image_width')||800) -
( $self->param( 'label_width' ) || 100 ) -
3 * ( $self->param( 'margin' ) || 5 );
## Work out the best size for the gaps between the "exons"
my $fake_intron_gap_size = 11;
my $intron_gaps = ((@lengths-1)/2);
if( $intron_gaps * $fake_intron_gap_size > $PIXEL_WIDTH * 0.75 ) {
$fake_intron_gap_size = int( $PIXEL_WIDTH * 0.75 / $intron_gaps );
}
## Compute how big this is in base-pairs
my $exon_pixels = $PIXEL_WIDTH - $intron_gaps * $fake_intron_gap_size;
my $scale_factor = $collapsed_length / $exon_pixels;
my $padding = int($scale_factor * $fake_intron_gap_size) + 1;
$collapsed_length += $padding * $intron_gaps;
## Compute offset for each subslice
my $start = 0;
foreach(@$subslices) {
$_->[2] = $start - $_->[0];
$start += $_->[1]-$_->[0]-1 + $padding;
}
return [ 'munged', $slice, $subslices, $collapsed_length ];
}
sub generate_query_hash {
my $self = shift;
return {
'gene' => $self->stable_id,
'db' => $self->get_db,
};
}
# Calls for HistoryView
sub get_archive_object {
my $self = shift;
my $id = $self->stable_id;
my $archive_adaptor = $self->database('core')->get_ArchiveStableIdAdaptor;
my $archive_object = $archive_adaptor->fetch_by_stable_id($id);
return $archive_object;
}
sub get_latest_incarnation {
my $self = shift;
return $self->Obj->get_latest_incarnation;
}
=head2 get_all_associated_archived
Arg1 : data object
Description : fetches all associated archived IDs
Return type : Arrayref of
Bio::EnsEMBL::ArchiveStableId archived gene
Bio::EnsEMBL::ArchiveStableId archived transcript
Bio::EnsEMBL::ArchiveStableId archived translation (optional)
String peptide sequence (optional)
=cut
sub get_all_associated_archived {
my $self = shift;
return $self->Obj->get_all_associated_archived;
}
=head2 history
Arg1 : data object
Description : gets the archive id history tree based around this ID
Return type : listref of Bio::EnsEMBL::ArchiveStableId
As every ArchiveStableId knows about it's successors, this is
a linked tree.
=cut
sub history {
my $self = shift;
my $archive_adaptor = $self->database('core')->get_ArchiveStableIdAdaptor;
return unless $archive_adaptor;
my $history = $archive_adaptor->fetch_history_tree_by_stable_id($self->stable_id);
return $history;
}
# Calls for GeneRegulationView
sub get_fg_db {
my $self = shift;
my $slice = $self->get_Slice( @_ );
my $fg_db = undef;
my $db_type = 'funcgen';
unless($slice->isa("Bio::EnsEMBL::Compara::AlignSlice::Slice")) {
$fg_db = $slice->adaptor->db->get_db_adaptor($db_type);
if(!$fg_db) {
warn("Cannot connect to $db_type db");
return [];
}
}
return $fg_db;
}
sub feature_sets {
my $self = shift;
my $fg_db = $self->get_fg_db;
my @fsets;
my $feature_set_adaptor = $fg_db->get_FeatureSetAdaptor;
my @sources;
my $spp = $ENV{'ENSEMBL_SPECIES'};
if ($spp eq 'Homo_sapiens'){
@sources = ('RegulatoryFeatures', 'miRanda miRNA targets', 'cisRED search regions', 'cisRED motifs', 'VISTA enhancer set');
} elsif ($spp eq 'Mus_musculus'){
@sources = ('cisRED search regions', 'cisRED motifs');
}
elsif ($spp eq 'Drosophila_melanogaster'){
@sources = ('BioTIFFIN motifs', 'REDfly CRMs', 'REDfly TFBSs');
}
foreach my $name ( @sources){
push @fsets, $feature_set_adaptor->fetch_by_name($name);
}
return \@fsets;
}
sub reg_factors {
my $self = shift;
my $gene = $self->gene;
my $fsets = $self->feature_sets;
my $fg_db= $self->get_fg_db;
my $ext_feat_adaptor = $fg_db->get_ExternalFeatureAdaptor;
my $factors = $ext_feat_adaptor->fetch_all_by_Gene_FeatureSets($gene, $fsets, 1);
return $factors;
}
sub reg_features {
my $self = shift;
my $gene = $self->gene;
my $fg_db= $self->get_fg_db;
my $slice = $self->get_Slice( @_ );
my $reg_feat_adaptor = $fg_db->get_RegulatoryFeatureAdaptor;
my $feats = $reg_feat_adaptor->fetch_all_by_Slice($slice);
return $feats;
}
=head2 vega_projection
Arg[1] : EnsEMBL::Web::Proxy::Object
Arg[2] : Alternative assembly name
Example : my $v_slices = $object->ensembl_projection($alt_assembly)
Description : map an object to an alternative (vega) assembly
Return type : arrayref
=cut
sub vega_projection {
my $self = shift;
my $alt_assembly = shift;
my $alt_projection = $self->Obj->feature_Slice->project('chromosome', $alt_assembly);
my @alt_slices = ();
foreach my $seg (@{ $alt_projection }) {
my $alt_slice = $seg->to_Slice;
push @alt_slices, $alt_slice;
}
return \@alt_slices;
}
=head2 get_similarity_hash
Arg[1] : none
Example : $similarity_matches = $webobj->get_similarity_hash
Description : Returns an arrayref of hashes containing similarity matches
Return type : an array ref
=cut
sub get_similarity_hash{
my $self = shift;
my $DBLINKS;
eval { $DBLINKS = $self->Obj->get_all_DBEntries; };
warn ("SIMILARITY_MATCHES Error on retrieving gene DB links $@") if ($@);
return $DBLINKS || [];
}
sub can_export {
my $self = shift;
return $self->action =~ /^Export$/ ? 0 : $self->availability->{'gene'};
}
1;
__END__
sub features
Input: EnsEMBL::Web::Gene object
Description: Returns all the features that regulate this gene
Output: Array ref