package EnsEMBL::Web::Object::DAS::prediction_transcript;
use strict;
use warnings;
use EnsEMBL::Web::Object::DAS;
our @ISA = qw(EnsEMBL::Web::Object::DAS);
sub Types {
my $self = shift;
my $features = [
{ 'id' => 'exon:Genscan', 'method' => 'Genscan', 'text' => "Ab initio prediction of protein coding genes by Genscan (C. Burge et. al., J. Mol. Biol. 1997 268:78-94). The splice site models used are described in more detail in C. Burge, Modelling dependencies in pre-mRNA splicing signals. 1998 In Salzberg, S., Searls, D. and Kasif, S., eds. Computational Methods in Molecular Biology, Elsevier Science, Amsterdam, 127-163." },
{ 'id' => 'exon:SNAP', 'method' => 'SNAP', 'text' => "Ab initio gene prediction by SNAP (I. Korf, BMC Bioinformatics 2004 5:59)" },
{ 'id' => 'exon:GeneFinder', 'method' => 'GeneFinder', 'text' => "Ab initio prediction of protein coding genes by Genefinder (C. Wilson, L. Hilyer, and P. Green, unpublished)." },
{ 'id' => 'exon:Fgenesh', 'method' => 'Fgenesh', 'text' => "Ab initio prediction of protein coding genes (AA Salamov et al., Genome Res. 2000 4:516-22)" },
{ 'id' => 'exon:GSC', 'method' => 'GSC', 'text' => "Ab initio prediction of protein coding genes by Genscan (C. Burge et al., J. Mol. Biol. 1997 268:78-94), with parameters customised for accuracy in Tetraodon sequences" },
{ 'id' => 'exon:GID', 'method' => 'GID', 'text' => "Ab initio prediction of protein coding genes by geneid (http://www1.imim.es/software/geneid/), with parameters customised for accuracy in Tetraodon sequences." },
{ 'id' => 'exon:GWS_H', 'method' => 'GWS_H', 'text' => "Alignment of a human protein to the genome by GeneWise (E. Birney et al., Genome Res. 2004 14:988-95)" },
{ 'id' => 'exon:GWS_S', 'method' => 'GWS_S', 'text' => "Alignment of a mouse protein to the genome by GeneWise (E. Birney et al., Genome Res. 2004 14:988-95)" },
];
return [
{
'REGION' => '*',
'FEATURES' => $features
}
];
}
sub Stylesheet {
my $self = shift;
my $stylesheet_structure = {};
my $colour_hash = {
'default' => 'black',
'Genscan' => 'lightseagreen',
'Fgenesh' => 'darkkhaki',
'SNAP' => 'darkseagreen4',
'GeneFinder' => 'black',
'GSC' => 'black',
'GID' => 'black',
'GWS_H' => 'black',
'GWS_S' => 'black',
};
foreach my $key ( keys %$colour_hash ) {
my $colour = $colour_hash->{$key};
$stylesheet_structure->{'transcription'}{$key ne 'default' ? "exon:$key" : 'default'} =
[{ 'type' => 'box', 'attrs' => { 'BGCOLOR' => $colour, 'FGCOLOR' => $colour, 'HEIGHT' => 10 } }];
$stylesheet_structure->{"group"}{$key ne 'default' ? "transcript:$key" : 'default'} =
[{ 'type' => 'line', 'attrs' => { 'STYLE' => 'intron', 'HEIGHT' => 10, 'FGCOLOR' => $colour, 'POINT' => 1 } }];
}
return $self->_Stylesheet( $stylesheet_structure );
}
sub Features {
### Return das features...
my $self = shift;
my @segments = $self->Locations;
my @features;
my %fts = map { $_=>1 } grep { $_ } @{$self->FeatureTypes || []};
my @groups = grep { $_ } @{$self->GroupIDs || []};
my @ftids = grep { $_ } @{$self->FeatureIDs || []};
my $dba_hashref = { map {
( $_ => $self->{data}->{_databases}->{_dbs}->{ $self->real_species }->{$_} )
} qw(core) };
my %transcripts_to_grab;
## First let us look at feature IDs - these prediction transcript exons...
## Prediction transcript exons have form
## {prediction_transcript.display_label}.{prediction_exon.exon_rank}
foreach my $id (@ftids) {
if( $id =~ /^(.*)\.(\d+)/) {
$transcripts_to_grab{ $1 }{ 'FILTER' }{ $2 } = 1;
}
}
## Second let us look at groups IDs - these are prediction transcript ids'
foreach my $id (@groups) {
$transcripts_to_grab{ $id }{ 'NO_FILTER' } = 1 ;
}
## Finally let us loop through all the segments and retrieve all the
## Prediction transcripts...
foreach my $segment (@segments) {
if( ref($segment) eq 'HASH' && $segment->{'TYPE'} eq 'ERROR' ) {
push @features, $segment;
next;
}
foreach my $prediction_transcript ( @{$segment->slice->get_all_PredictionTranscripts} ) {
$transcripts_to_grab{ $prediction_transcript->display_label }{ 'NO_FILTER' } = 1;
$transcripts_to_grab{ $prediction_transcript->display_label }{ 'TRANS' } = $prediction_transcript;
}
}
## Now we have grabbed all these features on segments we can go back and see if
## we need to grab any more of the group_id / filter_id features...
my $pta_hashref = {};
foreach my $display_label ( keys %transcripts_to_grab ) {
next if exists $transcripts_to_grab{ $display_label }{'TRANS'};
foreach my $db ( keys %$dba_hashref ) {
$pta_hashref->{$db} ||= $dba_hashref->{$db}->get_PredictionTranscriptAdaptor;
last if $transcripts_to_grab{ $display_label }{'TRANS'} = $pta_hashref->{$db}->fetch_by_stable_id( $display_label );
}
}
## Transview template...
my $transview_url = sprintf( '%s/%s/Transcript/Summary?t=%%s',
$self->species_defs->ENSEMBL_BASE_URL, $self->real_species
);
## Now we do all the nasty stuff of retrieving features and creating DAS objects for them...
my %features = ();
my %slice_hack = ();
foreach my $display_label ( keys %transcripts_to_grab ) {
my $pt = $transcripts_to_grab{ $display_label }{ 'TRANS' };
my $exons = $pt->get_all_Exons();
my $rank = 0;
my $end = 1;
foreach my $exon (@$exons) {
$rank++;
my $start = $end;
my $slice_name = $exon->slice->seq_region_name.':'.$exon->slice->start.','.$exon->slice->end.':'.$exon->slice->strand;
unless( exists $features{$slice_name} ) {
$features{$slice_name} = {
'REGION' => $exon->slice->seq_region_name,
'START' => $exon->slice->start,
'STOP' => $exon->slice->end,
'FEATURES' => [],
};
## Offset and orientation multiplier for features to map them back to slice
## co-ordinates - based on the orientation of the slice.
}
$end += $exon->length;
## If we have an exon filter for this transcript... check that the rank is in the
## list if not skip the rest of this loop
if( !exists( $transcripts_to_grab{$display_label}{'NO_FILTER'} ) ) {
my $flag = 0;
foreach( keys %{$transcripts_to_grab{$display_label}{'FILTER'}} ) {
$flag = 1 if $rank == $_;
}
next unless $flag;
}
## Push the features on to the slice specific array
push @{$features{$slice_name}{'FEATURES'}}, {
'ID' => $display_label.'.'.$rank,
'TYPE' => 'exon:'.$pt->analysis->logic_name,
'METHOD' => $pt->analysis->logic_name,
'CATEGORY' => 'transcription',
'START' => $exon->seq_region_start,
'END' => $exon->seq_region_end,
'ORIENTATION' => $self->ori($exon->seq_region_strand),
'TARGET' => {
'ID' => $display_label,
'START' => $start,
'STOP' => $end-1,
'ORIENTATION' => '+',
},
'GROUP' => [{
'ID' => $display_label,
'TYPE' => 'transcript:'.$pt->analysis->logic_name,
'LABEL' => $display_label,
'LINK' => [{
'href' => sprintf( $transview_url, $display_label ),
'text' => 'View Transcript Summary'
}]
}]
};
}
warn "$display_label\n";
}
## Return the reference to an array of the slice specific hashes.
push @features, values %features;
return \@features;
}
1;