Raw content of Bio::EnsEMBL::Analysis::RunnableDB::ExonerateOrthologueEvaluator =pod =head1 NAME Bio::EnsEMBL::Analysis::RunnableDB::ExonerateOrthologueEvaluator; =head1 SYNOPSIS my $exonerate2genes = Bio::EnsEMBL::Analysis::RunnableDB::ExonerateOrthologueEvaluator->new( -db => $refdb, -analysis => $analysis_obj, -database => $EST_GENOMIC, -query_seqs => \@sequences, -query_type => 'dna', ); $exonerate2genes->fetch_input(); $exonerate2genes->run(); $exonerate2genes->output(); $exonerate2genes->write_output(); #writes to DB =head1 DESCRIPTION This object wraps Bio::EnsEMBL::Analysis::Runnable::Exonerate2Genes and is used in the Orthologue-evaluation process. Before writing Gene structures in the ORTHOLOGUE_DB it checks if these structures already exists in either core or the ORTHOLOGUE_DB itself. =head1 CONTACT Post general queries to B<ensembl-dev@ebi.ac.uk> =head1 APPENDIX The rest of the documentation details each of the object methods. Internal methods are usually preceded with a '_' =cut package Bio::EnsEMBL::Analysis::RunnableDB::ExonerateOrthologueEvaluator; use strict; use Bio::EnsEMBL::Utils::Exception qw(throw warning); use Bio::EnsEMBL::Analysis::RunnableDB; use Bio::EnsEMBL::Analysis::RunnableDB::Exonerate2Genes ; use Bio::EnsEMBL::Analysis::Runnable::ExonerateTranscript; use Bio::EnsEMBL::Gene; use Bio::EnsEMBL::Registry; use Bio::EnsEMBL::Analysis::Config::Exonerate2Genes; use Bio::EnsEMBL::Analysis::Config::GeneBuild::OrthologueEvaluator; use vars qw(@ISA); @ISA = qw (Bio::EnsEMBL::Analysis::RunnableDB::Exonerate2Genes); sub run{ my ($self) = @_; print "starting run method \n" ; $self->SUPER::run(); print "run method\n" ; # load connection to core where the 'old' genes are stored in Bio::EnsEMBL::Registry->load_all($$MAIN_CONFIG{LOCATION_OF_COMPARA_REGISTRY_FILE},1,1); my $storedb = Bio::EnsEMBL::Registry->get_DBAdaptor($$MAIN_CONFIG{QUERY_SPECIES},'core') ; throw ("Could not get dba\n") unless $storedb ; # compare against CORE and OUTPUT my $outdb = $self->get_output_db; my $outdb_ga = $outdb->get_GeneAdaptor; my $store_ga = $storedb->get_GeneAdaptor; my $sa = $outdb->get_SliceAdaptor; # my $test_slice = 'chromosome:Btau_3.1:22:30177919:30506937:1' ; # my $slice = $dba->get_SliceAdaptor->fetch_by_name($test_slice) ; # my $ga = $dba->get_GeneAdaptor(); # my @new_genes = @{$ga->fetch_all_by_Slice($slice)}; my @dbas = ( $outdb_ga, $store_ga ) ; my @new_genes = @{ $self->output } ; my %bt; $bt{$outdb_ga}="_out"; $bt{$store_ga}="_core"; for my $ga (@dbas ) { for my $gene ( @new_genes ) { my $slice = $sa->fetch_by_name($gene->slice->name) ; my @stored_genes = @{$ga->fetch_all_by_Slice($slice)}; for my $sg( @stored_genes) { if ( compare_Genes($gene, $sg ) ) { $gene->biotype("redundant".$bt{$ga}); # print "gene is the same\n" ; } } } } } sub compare_Genes { my ($gene1,$gene2) = @_; # quit if genes do not have genomic overlap # start-------gene1------end start--------gene2----------end # if ($gene1->end < $gene2->start || $gene1->start > $gene2->end) { # Failed extents check return 0; } # overlap check based on all (noncoding + coding) Exons my %exon; my $all_exon_overlap; foreach my $exon1 (@{$gene1->get_all_Exons}){ print $exon1->hashkey."\n" ; foreach my $exon2 (@{$gene2->get_all_Exons}){ if ( ($exon1->overlaps($exon2)) && ($exon1->strand == $exon2->strand) ){ #print "Passed exon overlap check (noncod. + cod. exons checked) - returning 1\n"; $exon{$exon1->hashkey} = 1; #print 'hashkeys are the same' if ( $exon1->hashkey eq $exon2->hashkey ) ; #return 1; } } } if ( scalar (keys %exon) == scalar (@{$gene1->get_all_Exons}) ){ return 1 ; } return 0; } sub write_output{ my ($self,@output) = @_; my $outdb = $self->get_output_db; my $gene_adaptor = $outdb->get_GeneAdaptor; unless (@output){ @output = @{$self->output}; } my $fails = 0; my $total = 0; foreach my $gene (@output){ eval { $gene_adaptor->store($gene); }; if ($@){ warning("Unable to store gene!!\n$@"); $fails++; } $total++; } if ($fails > 0) { throw("Not all genes could be written successfully " . "($fails fails out of $total)"); } } 1;