package EnsEMBL::Web::Object::DAS::gene;
use strict;
use warnings;
use EnsEMBL::Web::Object::DAS;
our @ISA = qw(EnsEMBL::Web::Object::DAS);
use Data::Dumper;
sub Types {
my $self = shift;
return [
{
'REGION' => '*',
'FEATURES' => [
{ 'id' => 'gene' }
]
}
];
}
sub Features {
my $self = shift;
### Return das features...
### structure returned is an arrayref of hashrefs, each array element refers to
### a different segment, the hashrefs contain segment info (seg type, seg name,
### seg start, seg end) and an array of feature hashes
###_ Part 1: initialize data structures...
my @features; ## Final array whose reference is returned - simplest way to handle errors/unknowns...
my $features; ## Temporary hashref to store segments and features there on...
my %genes; ## Temporary hash to store ensembl gene objects...
my $dba_hashref; ## Hash ref of database handles...
## (although not implemented at the moment may allow multiple dbs to be connected to..)
my @logic_names; ## List of logic names of transcripts to return...
###_ Part 2: parse the DSN to work out what we want to display
### Relevant part of DSN is stored in $ENV{'ENSEMBL_DAS_SUBTYPE'}
###
### For genes the format will eventually be the same as for transcripts and translations:
###
###> {species}.ASSEMBLY[-{coordinate_system}].[enhanced_]transcript[-{database}[-{logicname}]*]
###
### If database is missing assumes core, if logicname is missing assumes all
### transcript features
###
### coordinate_system defines the coord system on which to return the features
### e.g.
###
###* /das/Homo_sapiens.NCBI36-toplevel.transcript-core-ensembl
###
###* /das/Homo_sapiens.NCBI36-clone.transcript-vega
###
### ASSEMBLY is supported, but coordinate_system is not yet.
my @dbs = ();
my $db;
if( $ENV{'ENSEMBL_DAS_SUBTYPE'} ) {
( $db, @logic_names ) = split /-/, $ENV{'ENSEMBL_DAS_SUBTYPE'};
push @dbs, $db;
} else {
@dbs = ('core'); ## default = core...;
}
foreach (@dbs) {
my $T = $self->{data}->{_databases}->get_DBAdaptor($_,$self->real_species);
$dba_hashref->{$_}=$T if $T;
}
@logic_names = (undef) unless @logic_names; ## default is all features of this type
###_ Part 3: parse CGI parameters to get out feature types, group ids and feature ids
###* FeatureTypes - Currently ignored...
###* Group IDs - filter in this case transcripts
###* Feature IDs - filter in ths case exons
my @segments = $self->Locations;
my %fts = map { $_=>1 } grep { $_ } @{$self->FeatureTypes || []};
my @groups = grep { $_ } @{$self->GroupIDs || []};
my @ftids = grep { $_ } @{$self->FeatureIDs || []};
my $filters = {
map( { ( $_, 'exon' ) } @ftids ), ## Filter for exon features...
map( { ( $_, 'gene' ) } @groups ) ## Filter for gene features...
};
my $no_filters = {};
#logic names filter
my %logic_name_filter = map { $_ ? ($_,1) : () } @logic_names;
###Part 4: Fetch features on the segments requested...
###The approach is to map all requested slices onto the top level in the Factory, irrespective of
###their actual coord system. By retrieving features on this top level coord_system partially
###overlapping features can be retrieved.
###When features are requested to be *returned* on a different coordinate system such as clone,
###then this change in coordinates is done at the very end, using information in the
###%projection_mappings hash.
###Note that the projection onto clones is not supported yet
#identify coordinates of the wanted slice on the requested coordinate system
foreach my $segment (@segments) {
if( ref($segment) eq 'HASH' && ($segment->{'TYPE'} eq 'ERROR' || $segment->{'TYPE'} eq 'UNKNOWN') ) {
push @features, $segment;
next;
}
my $segment_name = $segment->slice->seq_region_name;
my $segment_start = $segment->slice->start;
my $segment_end = $segment->slice->end;
my $segment_strand = $segment->slice->strand;
my $slice_name = "$segment_name:$segment_start,$segment_end:$segment_strand";
#get mappings onto any requested coordinate system
$features->{$slice_name}= {
'REGION' => $segment->slice->seq_region_name,
'START' => $segment->slice->start,
'STOP' => $segment->slice->end,
'FEATURES' => [],
};
#foreach database get all genes on the top level slice
foreach my $db ( keys %$dba_hashref ) {
foreach my $gene ( @{$segment->slice->get_all_Genes(undef,$db) } ) {
my $gsi = $gene->stable_id;
delete $filters->{$gsi}; # This comes off a segment so make sure it isn't filtered!
$no_filters->{$gsi} = 1;
my $trans_arrayref = [];
foreach my $transcript ( @{$gene->get_all_Transcripts} ) {
next if defined $logic_names[0] &&
!$logic_name_filter{ $transcript->analysis->logic_name };
my $tsi = $transcript->stable_id;
my $transobj = { 'object' => $transcript, 'exons' => [] };
delete $filters->{$tsi}; # This comes off a segment so make sure it isn't filtered!
$no_filters->{$tsi} = 1;
my $start = 1;
foreach my $exon ( @{$transcript->get_all_Exons} ) {
my $esi = $exon->stable_id;
delete $filters->{$esi}; # This comes off a segment so make sure it isn't filtered!
push @{ $transobj->{'exons'} }, [ $exon , $start, $start+$exon->length-1 ];
$start += $exon->length;
$no_filters->{$esi} = 1;
}
push @$trans_arrayref,$transobj;
}
$genes{ $gsi } = { 'db' => $db,
'object' => $gene,
'transcripts' => $trans_arrayref,
'slice_name' => $slice_name
} if @$trans_arrayref;
}
}
} ## end of segment loop....
###_ Part 5: Fetch features based on group_id and filter_id - filter_id currently only works for exons
### and group_id only for genes
my $ga_hashref = {};
my $ea_hashref = {};
my $ta_hashref = {};
#link extra exon_IDs requested with the projection seq_region(s) they are on
my %extra_regions;
foreach my $id ( keys %$filters ) {
my $gene;
my $filter;
my $slice_name;
foreach my $db ( keys %$dba_hashref ) {
my $gadap = $dba_hashref->{$db}->get_GeneAdaptor;
if( $filters->{$id} eq 'gene' ) {
$gene = $gadap->fetch_by_stable_id( $id );
$slice_name = $gene->slice->seq_region_name.':'.$gene->slice->start.','.$gene->slice->end.':'.$gene->slice->strand;
unless( exists $features->{$slice_name} ) {
$features->{$slice_name} = {
'REGION' => $gene->slice->seq_region_name,
'START' => $gene->slice->start,
'STOP' => $gene->slice->end,
'FEATURES' => [],
};
}
}
if ($filters->{$id} eq 'exon') {
$gene = $gadap->fetch_by_exon_stable_id( $id );
my $eadap = $dba_hashref->{$db}->get_ExonAdaptor;
my $exon = $eadap->fetch_by_stable_id( $id );
$slice_name = $exon->slice->seq_region_name.':'.$exon->slice->start.','.$exon->slice->end.':'.$exon->slice->strand;
unless( exists $features->{$slice_name} ) {
$features->{$slice_name} = {
'REGION' => $exon->slice->seq_region_name,
'START' => $exon->slice->start,
'STOP' => $exon->slice->end,
'FEATURES' => [],
};
}
}
next unless $gene;
if ($gene) {
my $gsi = $gene->stable_id;
unless( exists $genes{$gsi} ) { ## Gene doesn't exist so we have to store it and grab transcripts and exons...
$genes{$gsi}->{'object'} = $gene;
$genes{$gsi}->{'slice_name'} = $slice_name;
$genes{$gsi}->{'db'} = $db;
foreach my $transcript ( @{$gene->get_all_Transcripts} ) {
next if defined $logic_names[0] &&
!$logic_name_filter{ $transcript->analysis->logic_name };
my $tsi = $transcript->stable_id;
my $transobj = { 'object' => $transcript, 'exons' => [] };
my $start = 1;
EXON:
foreach my $exon ( @{$transcript->get_all_Exons} ) {
my $esi = $exon->stable_id;
if ($filters->{$id} eq 'exon') {
delete $filters->{$esi};
$no_filters->{$esi} = 1;
if ($esi ne $id) {
next EXON;
}
}
push @{ $transobj->{'exons'} }, [ $exon , $start, $start+$exon->length-1 ];
$start += $exon->length;
}
push @{ $genes{$gsi}->{'transcripts'} },$transobj;
}
}
# Delete all filters on gene
if( $filter eq 'gene' ) {
delete $filters->{$gsi};
$no_filters->{$gsi} = 1;
}
}
}
} ## end of segment loop....
#View templates
$self->{'templates'} ||= {};
$self->{'templates'}{'geneview_URL'} = sprintf( '%s/%s/Gene/Summary?g=%%s;db=%%s', $self->species_defs->ENSEMBL_BASE_URL, $self->real_species );
### Part 6: Grab and return features
### Now we do all the nasty stuff of retrieving features and creating DAS objects for them...
foreach my $gene_stable_id ( keys %genes ) {
my $gene = $genes{$gene_stable_id}{'object'};
my $slice_name = $genes{$gene_stable_id}{'slice_name'};
my $db = $genes{$gene_stable_id}{'db'};
my $gene_start = $gene->seq_region_start;
my $gene_end = $gene->seq_region_end;
my $gene_group = {
'ID' => $gene_stable_id,
'TYPE' => 'Gene:'.$gene->analysis->logic_name,
'LABEL' => sprintf( '%s (%s)', $gene_stable_id, $gene->external_name || 'Novel' ),
'LINK' => [
{ 'text' => 'GeneView '.$gene_stable_id ,
'href' => sprintf( $self->{'templates'}{'geneview_URL'}, $gene_stable_id, $db ),
}
],
};
foreach my $transobj ( @{ $genes{$gene_stable_id}{'transcripts'} } ) {
my $transcript = $transobj->{'object'};
my $transcript_stable_id = $transcript->stable_id;
EXON:
foreach my $exon_ref ( @{$transobj->{'exons'}}) {
my $exon = $exon_ref->[0];
my $exon_stable_id = $exon->stable_id;
my $exon_start = $exon->seq_region_start;
my $exon_end = $exon->seq_region_end;
#filter exons to only show that which overlaps the slice
next EXON unless ($exon->seq_region_start < $exon->slice->end && $exon->seq_region_end > $exon->slice->start);
my $det = {
'ID' => $exon_stable_id,
'TYPE' => 'exon:'.$transcript->analysis->logic_name,
'METHOD' => $gene->analysis->logic_name,
'CATEGORY' => 'transcription',
'GROUP' => [ $gene_group ],
'START' => $exon_start,
'END' => $exon_end,
'ORIENTATION' => $self->ori($exon->strand),
'TARGET' => {
'ID' => $gene_stable_id,
'START' => $exon_start - $gene_start + 1,
'STOP' => $exon_end - $gene_start + 1,
}
};
push @{$features->{$slice_name}{'FEATURES'}}, $det;
}
}
}
### Part 7: Return the reference to an array of the slice specific hashes.
push @features, values %{$features};
# warn Dumper(\@features);
return \@features;
}
sub Stylesheet {
my $self = shift;
my $stylesheet_structure = {};
my $colour_hash = {
'default' => 'grey50',
'havana' => 'dodgerblue4',
'ensembl' => 'rust',
'flybase' => 'rust',
'wornbase' => 'rust',
'ensembl_havana_transcript' => 'goldenrod3',
'estgene' => 'purple1',
'otter' => 'dodgerblue4',
'otter_external' => 'orangered2',
'otter_corf' => 'olivedrab',
'otter_igsf' => 'olivedrab',
'otter_eucomm' => 'orangered2',
};
foreach my $key ( keys %$colour_hash ) {
my $colour = $colour_hash->{$key};
$stylesheet_structure->{"exon"}{$key ne 'default' ? "exon:$key" : 'default'}=
[{ 'type' => 'box', 'attrs' => { 'FGCOLOR' => $colour, 'BGCOLOR' => 'white', 'HEIGHT' => 6 } }];
$stylesheet_structure->{'exon'}{$key ne 'default' ? "exon:$key" : 'default'} =
[{ 'type' => 'box', 'attrs' => { 'BGCOLOR' => $colour, 'FGCOLOR' => $colour, 'HEIGHT' => 10 } }];
$stylesheet_structure->{"group"}{$key ne 'default' ? "gene:$key" : 'default'} =
[{ 'type' => 'line', 'attrs' => { 'STYLE' => 'intron', 'HEIGHT' => 10, 'FGCOLOR' => $colour, 'POINT' => 1 } }];
}
return $self->_Stylesheet( $stylesheet_structure );
}
1;