Raw content of Bio::EnsEMBL::Analysis::RunnableDB::Infernal # You may distribute this module under the same terms as perl itself # # POD documentation - main docs before the code =pod =head1 NAME Bio::EnsEMBL::Analysis::RunnableDB::Infernal; =head1 SYNOPSIS my $runnableDB = Bio::EnsEMBL::Analysis::RunnableDB::Infernal->new( -db => $db_adaptor, -input_id => $flag_id, -analysis => $analysis, ); $runnabledb->fetch_input(); $runnabledb->run(); $runnabledb->write_output(); =head1 DESCRIPTION RunnableDB to wrap cmsearch - part of the Infernal suite of programs by Sean Eddy. Uses RFAM blast hits to identify regions where cmsearch should be run. Blast hits are run using the RfamBlast module and then are filtered on a familly by familly basis. Blast hits that look promising are flagged by predict_ncRNAs.pl and the flags are used as input_ids. Uses the Bio::EnsEMBL::Analysis::Config::Databases config file. Writes non coding genes to the $GB_FINALDB database and gets DNA from $GB_ database. The blast hits from BlastRfam are stored in the pipeline database; Creates single exon non-coding genes with gene descriptions obtained from Rfam.descriptions file. The dna align feature representing the initial blast hit is added as a supporting feature and the RNA secondary structure predicted by Infernal is added as a transcript attribute in a length encoded string form to take up less room in the db. =head1 CONTACT Post questions to the Ensembl development list: ensembl-dev@ebi.ac.uk =cut package Bio::EnsEMBL::Analysis::RunnableDB::Infernal; use Bio::EnsEMBL::Pipeline::DBSQL::FlagAdaptor; use Bio::EnsEMBL::Analysis::RunnableDB; use Bio::Seq; use Bio::EnsEMBL::Analysis::Runnable::Infernal; use Bio::EnsEMBL::Analysis::Config::Databases; use Bio::EnsEMBL::Analysis::RunnableDB::BaseGeneBuild; use strict; use warnings; use vars qw(@ISA); @ISA = qw(Bio::EnsEMBL::Analysis::RunnableDB Bio::EnsEMBL::Analysis::RunnableDB::BaseGeneBuild); my $runnable; =head2 fetch_input Title : fetch_input Usage : $self->fetch_input Function: Opens connections to the final gene build database to store the genes in. : Also opens connection to the genebuild DNA database to fetch DNA from. : Parses flags use as input_ids and fetches dna_align_features to run cmsearch : on. Returns : none Args : none =cut sub fetch_input{ my ($self)=@_; # open connection to genes database # if you want to write the final genes into the pipeline database need # to catch it first and store the $self->db as the genes->db otherwise the # registry will cause problems if ( $$DATABASES{'GENEBUILD_DB'}{'-dbname'} eq $self->db->dbc->dbname && $$DATABASES{'GENEBUILD_DB'}{'-host'} eq $self->db->dbc->port && $$DATABASES{'GENEBUILD_DB'}{'-port'} == $self->db->dbc->host){ $self->gene_db($self->db); } else { my $genes_db = $self->get_dbadaptor("GENEBUILD_DB"); $self->gene_db($genes_db); } #add dna_db my $dna_db = $self->get_dbadaptor($DNA_DBNAME) ; $self->db->dnadb($dna_db); my ($start,$end); my (@dafs,@queries); my $padding = 200; my $fa = Bio::EnsEMBL::Pipeline::DBSQL::FlagAdaptor->new($self->db); my $dafa = $self->db->get_DnaAlignFeatureAdaptor; my $sa = $self->db->get_SliceAdaptor; my $runname = "Bio::EnsEMBL::Analysis::Runnable::Infernal"; if ($self->input_id =~ /(\d+):(\d+)/) { $start = $1; $end = $2; } elsif ($self->input_id =~ /^(\d+)/) { $start = $1; $end = $1; } unless ($start){ $self->throw("Input id not recognised\n"); } # get ids for (my $i = $start ; $i <= $end ; $i++){ my $flag; # try and fetch it eval{ $flag = $fa->fetch_by_dbID($i); }; if ($flag && $flag->goalAnalysis->logic_name eq $self->analysis->logic_name){ my $daf = $dafa->fetch_by_dbID($flag->ensembl_id); push @dafs, $daf; } } # Make the runnable my $runnable = $runname->new ( -queries => \@dafs, -analysis => $self->analysis, -program => $self->analysis->program_file ); $self->runnable($runnable); } =head2 run Args : none Description: Runs the runnable. Exceptions : Throws if the the runnable module is not set Returntype : scalar =cut sub run{ my ($self) = @_; foreach my $runnable (@{$self->runnable}) { $self->throw("Runnable module not set") unless ($runnable->isa("Bio::EnsEMBL::Analysis::Runnable")); $runnable->run(); $self->output($runnable->output); } } =head2 write_output Args : none Description: Writes the single exon ncRNA genes into the final genebuild databse, : also stores the structure attribute associated with the transcript Exceptions : Throws if gene or transcript attribute fail to write to the database Returntype : scalar =cut sub write_output{ my ($self) = @_; my $adaptor = $self->gene_db->get_GeneAdaptor; my $aa = $self->gene_db->get_AttributeAdaptor; my $dbea = $self->gene_db->get_DBEntryAdaptor; my @attributes; my $xref; foreach my $gene_hash (@{$self->output}){ my $gene = $gene_hash->{'gene'}; @attributes = @{$gene_hash->{'attrib'}}; $xref = $gene_hash->{'xref'}; $gene->analysis($self->analysis); $gene->status('PREDICTED'); foreach my $trans (@{$gene->get_all_Transcripts}){ $trans->analysis($self->analysis); $trans->status('PREDICTED'); } $gene->slice($self->query) if(!$gene->slice); $self->feature_factory->validate($gene); eval{ $adaptor->store($gene); }; if($@){ $self->throw("Infernal:store failed, failed to write ".$gene." to ". "the database $@"); } foreach my $trans (@{$gene->get_all_Transcripts}){ eval{ $aa->store_on_Transcript($trans->dbID,\@attributes); $dbea->store($xref, $trans->dbID, 'Transcript') if $xref; $trans->display_xref($xref); $gene->display_xref($xref); $self->gene_db->get_TranscriptAdaptor->update($trans); $self->gene_db->get_GeneAdaptor->update($gene); }; if($@){ $self->throw("Infernal:store failed, failed to write xrefs or attributes on transcript ". $trans." in the database $@"); } } } return 1; } ######################################################### # Containers =head2 gene_db Arg [1] : Bio::EnsEMBL::DBSQL::DBAdaptor Description: get/set gene db adaptor Returntype : Bio::EnsEMBL::DBSQL::DBAdaptor Exceptions : none Caller : general =cut sub gene_db { my ($self, $gene_db) = @_; if ($gene_db){ unless ($gene_db->isa("Bio::EnsEMBL::DBSQL::DBAdaptor")){ $self->throw("gene db is not a Bio::EnsEMBL::DBSQL::DBAdaptor, it is a $gene_db"); } $self->{'_gene_db'} = $gene_db; } return $self->{'_gene_db'}; } 1;