my $runnabledb = Bio::EnsEMBL::Analysis::RunnableDB::TranscriptConsensus->new
(
-query => $query,
-analysis => $analysis,
)
TranscriptConsensus is an extension of TranscriptCoalescer that combines protein coding and est
transcripts to identify the best supported transcript models.
The initial gene sets are clustered, then collapsed into a non-redundant set of exons
and introns which are assigned scores according to the amount of supporting evidence they have.
The similarity genes have UTR added using the est transcripts and the resulting models are assigned
a score by summing the individual exon and intron scores.
The transcripts are sorted by score and the highest scoring models are made into
gene objets and written to the TranscriptCoalescer database.
sub _check_config
{ my ( $databases, $TRANSCRIPT_COALESCER_DB_CONFIG, $ev_sets) = @_ ;
for my $db_class (keys %{$TRANSCRIPT_COALESCER_DB_CONFIG}) {
throw ( "\n\tConfig-Error: There is configuration for\" $db_class\" in Analysis/Config/GeneBuild/TranscriptCoalescer.pm " .
"but no Database defined in Exonerate2Genes.pm or Databases.pm")
unless exists $$databases{$db_class} ;
}
my %database_definition ;
for my $db_class (keys %{$TRANSCRIPT_COALESCER_DB_CONFIG}) {
for my $key ( keys %{ $$TRANSCRIPT_COALESCER_DB_CONFIG{$db_class} } ) {
map $database_definition{$_}=(),@{ ${$$TRANSCRIPT_COALESCER_DB_CONFIG{$db_class}}{$key}};
}
}
my %ev_set_definitions;
for my $set_name (keys %{ $ev_sets} ) {
map $ev_set_definitions{$_}=(), @{$$ev_sets{$set_name}} ;
}
for my $ln ( keys %ev_set_definitions ) {
throw ("\n\tError in config-file Analysis/Config/GeneBuild/TranscriptCoalescer.pm\n ".
"\tType $ln has an evidence set but there's NO database for this type defined in Config".
" TranscriptCoalescer.pm : $ln\n")
unless ( exists $database_definition{$ln} ) ;
}
for my $ln ( keys %database_definition) {
throw ("\n\tError in config-file Analysis/Config/GeneBuild/TranscriptCoalescer.pm\n".
"\tType $ln has an entry in a database-section but there's NO evidence-set defined in Config TranscriptCoalescer.pm for this type : $ln\n " )
unless ( exists $ev_set_definitions{$ln} ) ;
}
return 1 ; } |
sub fetch_input
{ my ($self) = @_;
$self->throw("No input id") unless defined($self->input_id);
my $slice = $self->fetch_sequence($self->input_id, $self->db );
my $solexa_slice;
my (%est_genes, %simgw_genes, %pred_transcripts ) ;
$self->query($slice);
my %databases = %{$self->merge_config_details( $DATABASES, $EXONERATE_CONFIG_BY_LOGIC, $TRANSCRIPT_COALESCER_DB_CONFIG)} ;
_check_config(\%databases, $TRANSCRIPT_COALESCER_DB_CONFIG, $self->{evidence_sets} ) ;
my %biotypes_to_genes ;
for my $database_class (keys %databases ) {
if ($SOLEXA && exists $$TRANSCRIPT_CONSENSUS_DB_CONFIG{$database_class}) {
my $dba = $self->get_connection($database_class,\%databases);
$dba->dnadb($self->db) ;
$solexa_slice = $self->fetch_sequence($self->input_id, $dba );
}
next unless (exists $$TRANSCRIPT_COALESCER_DB_CONFIG{$database_class}) ;
my $dba = $self->get_connection($database_class,\%databases);
$dba->dnadb($self->db) ;
my $slice = $self->fetch_sequence($self->input_id, $dba );
for my $biotype ( @{$databases{$database_class}{BIOTYPES} }) {
my $genes = $slice->get_all_Genes_by_type($biotype) ;
foreach my $gene (@$genes){
foreach my $trans (@{$gene->get_all_Transcripts}){
if ($trans->translateable_seq){
$trans->translation->seq;
}
$trans->get_all_supporting_features;
}
foreach my $exon ( @{$gene->get_all_Exons} ){
$exon->get_all_supporting_features;
}
}
my ( $set ) = $self->_get_evidence_set ( $biotype ) ;
my $db_used = "${$databases{$database_class}}{db}{-dbname}\@".
"${$databases{$database_class}}{db}{-host}::".
"${$databases{$database_class}}{db}{-port}";
info( "$db_used :" );
info(scalar (@{$genes}) . "\t$biotype-genes fetched\n ") ;
if (scalar( @{$genes} ) > 0 ) {
my @multi_exon_genes ;
for my $g (@$genes){
my $single_exon_gene ;
for my $t (@{$g->get_all_Transcripts}){
throw ("gene has more than one transcript - only processing 1-gene-1-transcript-genes")
if (@{$g->get_all_Transcripts}>1) ;
bless $t, "Bio::EnsEMBL::Analysis::Tools::GeneBuildUtils::TranscriptExtended" ;
$t->ev_set($set);
my @all_exons = @{ $t->get_all_Exons } ;
$single_exon_gene = 1 if (@all_exons == 1 ) ;
map { bless $_,"Bio::EnsEMBL::Analysis::Tools::GeneBuildUtils::ExonExtended" } @all_exons ;
for (my $int=0; $int < @all_exons ; $int++) {
my $e = $all_exons[$int] ;
$e->biotype($g->biotype) ;
$e->ev_set($set) ;
$e->transcript($t) ;
$e->analysis($g->analysis) ;
$e->next_exon($all_exons[$int+1]);
if ($int == 0 ) {
$e->prev_exon(0);
} else {
$e->prev_exon($all_exons[$int-1]);
}
}
}
push @multi_exon_genes, $g ;
}
push @{$biotypes_to_genes{$biotype} } , @multi_exon_genes ;
}
}
for my $logic_name_becomes_biotype ( @{$databases{$database_class}{AB_INITIO_LOGICNAMES} }) {
my $pt = $slice->get_all_PredictionTranscripts( $logic_name_becomes_biotype ) ;
my $result_set_name = $self->_get_evidence_set( $logic_name_becomes_biotype ) ;
my $ab_initio_genes = convert_prediction_transcripts_to_genes(
$pt,$logic_name_becomes_biotype,$result_set_name ) ;
info ( scalar (@{$ab_initio_genes}) . "\t$logic_name_becomes_biotype-genes\n") ;
if ( scalar(@$ab_initio_genes)>0){
push @{$biotypes_to_genes{$logic_name_becomes_biotype} } , @{$ab_initio_genes} ;
}
}
}
my $runnable = Bio::EnsEMBL::Analysis::Runnable::TranscriptConsensus->new
(
-query => $self->query,
-solexa => $solexa_slice,
-analysis => $self->analysis,
-all_genes =>\% biotypes_to_genes , -evidence_sets => $self->{evidence_sets} ,
-dnadb => $self->db ,
-utils_verbosity => $self->{utils_verbosity},
);
$runnable->solexa_slice($solexa_slice) if ($SOLEXA && $solexa_slice);
$self->runnable($runnable);
return 1; } |
sub get_connection
{ my ($self,$database_class,$d) = @_;
my %databases = %$d;
unless ( $self->{_db_conn}{$database_class} ) {
$self->{_db_conn}{$database_class} = new Bio::EnsEMBL::DBSQL::DBAdaptor( %{ ${$databases{$database_class}}{db}} ) ;
}
return $self->{_db_conn}{$database_class};
}
1; } |