Bio::EnsEMBL::Analysis::RunnableDB OrthologueEvaluator
SummaryIncluded librariesPackage variablesSynopsisDescriptionGeneral documentationMethods
Toolbar
WebCvsRaw content
Summary
Bio::EnsEMBL::Analysis::RunnableDB::OrthologueEvaluator;
Package variables
No package variables defined.
Included modules
Bio::EnsEMBL::Analysis::Config::Exonerate2Genes
Bio::EnsEMBL::Analysis::Config::GeneBuild::OrthologueEvaluator
Bio::EnsEMBL::Analysis::RunnableDB
Bio::EnsEMBL::Analysis::Tools::GeneBuildUtils
Bio::EnsEMBL::Gene
Bio::EnsEMBL::Pipeline::DBSQL::DBAdaptor
Bio::EnsEMBL::Pipeline::DBSQL::RuleAdaptor
Bio::EnsEMBL::Pipeline::Rule
Bio::EnsEMBL::Pipeline::Utils::InputIDFactory(1)
Bio::EnsEMBL::Pipeline::Utils::InputIDFactory(2)
Bio::EnsEMBL::Registry
Bio::EnsEMBL::Utils::Exception qw ( throw warning )
Bio::SeqIO
Inherit
Bio::EnsEMBL::Analysis::RunnableDB
Synopsis
my $orthologueanalysis = Bio::EnsEMBL::Analysis::RunnableDB::OrthologueEvaluator->new(
-analysis => $analysis_obj,
);
$orthologueanalysis->fetch_input();
$orthologueanalysis->run();
$orthologueanalysis->output();
$orthologueanalysis->write_output();
Description
This object wraps Bio::EnsEMBL::Analysis::Runnable::OrthologueEvaluator and is
used to fetch the input from different databases as well as writing results
the results to the database.a
Methods
chunk_and_write_fasta_sequences
No description
Code
genes
No description
Code
get_initial_geneset
No description
Code
new
No description
Code
pipeline_adaptor
No description
Code
post_logic_name
No description
Code
read_and_check_config
No description
Code
run
No description
Code
slice_name
No description
Code
species_1
No description
Code
species_2
No description
Code
upload_input_ids
No description
Code
write_output
No description
Code
Methods description
None available.
Methods code
chunk_and_write_fasta_sequencesdescriptionprevnextTop
sub chunk_and_write_fasta_sequences {
    my ( $self, $tref, $base_dir , $file_prefix, $file_suffix,$chunk_size  ) = @_ ;

    $chunk_size = 5 unless $chunk_size ; 
    print scalar(@$tref) . " sequences to write\n " ;

    return if scalar(@$tref) == 0 ; 

    unless ($base_dir) {
      my $conf = $$EXONERATE_CONFIG_BY_LOGIC{$self->post_logic_name}{QUERYSEQS};  
      $conf ? $base_dir = $conf : throw ( "There's no output-dir configured in Exonerate2Genes.pm " . 
      "for analysis " . $self->post_logic_name . " check the config\n"); 
    }
  
    $base_dir = $base_dir . "/" unless $base_dir =~m/\/$/;
    #$base_dir = $base_dir . $self->post_logic_name . "/" ;
`mkdir $base_dir ` unless ( -e $base_dir); unless ( $file_prefix ) { if ($self->slice_name){ $file_prefix = $self->slice_name ; }elsif( $self->input_id ) { $file_prefix = $self->input_id ; }else{ throw("Error - can't create file name because of missing slice-name or input_id\n") ; } } $file_suffix = ".fa" unless $file_suffix ; print "writing squences to $base_dir\n" ; my @filenames ; my $wtf = 0 ; my @ltr = @$tref ; my ($seq_file , $name ) ; my $fcnt = 0 ; for ( my $i=0 ; $i<scalar(@ltr) ; $i++ ) { my $seq = $ltr[$i] ; # create new file if #$chunk_size chunks are already written
if ($i % $chunk_size == 0 ){ $seq_file->close() if ($wtf == 1) ; $fcnt++; my $file_name = $file_prefix . "_" . $fcnt . $file_suffix ; # these filenames are stored as input_ids in refdb
push @filenames, $file_name ; my $tmp = $base_dir . $file_name ; $seq_file = Bio::SeqIO->new( #-file => ">$base_dir. $file_name" ,
-file => ">$tmp" , -format => 'fasta' ); $wtf = 1 ; } $seq_file->write_seq($seq); } $seq_file->close(); return\@ filenames ;
}
genesdescriptionprevnextTop
sub genes {
  my ($self, $g) = @_ ;
  push @{$self->{_genes}}, @$g if $g ;
  return $self->{_genes} ;
}
get_initial_genesetdescriptionprevnextTop
sub get_initial_geneset {
   
  my ( $self, $species_alias,$biotypes) = @_ ; 

  my $dba = Bio::EnsEMBL::Registry->get_DBAdaptor($species_alias,'core') ;  

  unless ( $dba )  {
    throw("could not get database adaptor for species : $species_alias ") ;  
  } 

  my $sa = $dba->get_SliceAdaptor() ;

  print "Fetching genes out of ". $dba->dbname . " @ " . $dba->host ." : " . $dba->port ." : " . $self->slice_name .  "\n" ;
  my $qy_slice      = $sa->fetch_by_name($self->slice_name) ;

  my @pt_genes ;
  for my $bt ( @$biotypes ) { 
    my $genes_on_slice =  $qy_slice->get_all_Genes_by_type( $bt );
    print scalar(@$genes_on_slice) . " genes of biotype $bt found on slice ( $species_alias )\n ";  
    $self->genes( $genes_on_slice ) ; 
   }
}
newdescriptionprevnextTop
sub new {
  my ($class,@args) = @_;
  my $self = $class->SUPER::new(@args); 
  $self->read_and_check_config;  
  $self->verbose(1) ; 
  return $self   ;
}
pipeline_adaptordescriptionprevnextTop
sub pipeline_adaptor {
    my ($self, $db )= @_ ;    
 
       if ( $db ) { 
         my $pa = Bio::EnsEMBL::Pipeline::DBSQL::DBAdaptor->new(
                                                            -host   => $db->host, 
                                                            -dbname => $db->dbname, 
                                                            -user   => $db->username,
                                                            -pass   => $db->password ,
                                                            -port   => $db->port,
                                                            );
         $self->{_PA}=$pa ;  
       } 
    return $self->{_PA} ; 
}



1;
}
post_logic_namedescriptionprevnextTop
sub post_logic_name {
    my ($self) = @_ ;
  my $post_logic_name = $self->input_id ; 
  $post_logic_name =~s/(^.*?)(\:.*)/$1/ ;  
return $post_logic_name ;
}
read_and_check_configdescriptionprevnextTop
sub read_and_check_config {
  (my $self) = @_ ;  

  # check if config file exists 
throw("Your compara-registry-file LOCATION_OF_COMPARA_REGISTRY_FILE does not exist !!". "\nCheck your config !") unless ( -e $$MAIN_CONFIG{LOCATION_OF_COMPARA_REGISTRY_FILE} ) ; print STDERR "reading compara-config file : $$MAIN_CONFIG{LOCATION_OF_COMPARA_REGISTRY_FILE}\n"; # check compara configuration file and schema-versions of dbs
Bio::EnsEMBL::Registry->load_all($$MAIN_CONFIG{LOCATION_OF_COMPARA_REGISTRY_FILE},1,1); my @dba = @{Bio::EnsEMBL::Registry->get_all_DBAdaptors()}; my %tmp; for my $a ( @dba ) { push @{$tmp{ $a->get_MetaContainer->get_schema_version }} , $a->dbname ; } if ( keys %tmp > 1 ) { warning("You're using databases with different schemas - this can cause problems ...\n" ) ; for ( keys %tmp ) { print "schema $_:\n". join("\n" , @{$tmp{$_}} ) . "\n\n" ; } }
}
rundescriptionprevnextTop
sub run {
  my ($self) = @_;
}
slice_namedescriptionprevnextTop
sub slice_name {
     my ($self) = @_ ;  
  my $slice_name = $self->input_id ; 
   $slice_name  =~s/(^.*?\:)(.*)/$2/   ;
return $slice_name ;
}
species_1descriptionprevnextTop
sub species_1 {
  my ($self, $g) = @_ ;
  $self->{_species_1} = $g if $g ;
  return $self->{_species_1} ;
}
species_2descriptionprevnextTop
sub species_2 {
  my ($self, $g) = @_ ;
  $self->{_species_2} = $g if $g ; 
  return $self->{_species_2} ;
}
upload_input_idsdescriptionprevnextTop
sub upload_input_ids {
     my ( $self, $input_ids  ) = @_ ; 
 
   my $submit_analysis = "Submit_" . $self->post_logic_name;      

   print "Submit-analysis for next analysis is : $submit_analysis\n" ; 
   print "Logic-name for next analysis is      : " . $self->post_logic_name . "\n" ; 

   # otherwise the Registry overrides the adaptor - silly ... 
#print "db before clear : " . $self->db();
#print "\n" ;
#Bio::EnsEMBL::Registry->clear();
#print "db after clear : " . $self->db();
#print "\n" ;
$self->pipeline_adaptor($self->db) ; my $input_id_type = "file_" . $self->post_logic_name ; my $if = Bio::EnsEMBL::Pipeline::Utils::InputIDFactory->new( -db => $self->pipeline_adaptor , -logic_name => $submit_analysis , -file => 1 , -input_id_type => $input_id_type, ); unless ($if->get_analysis($submit_analysis , $input_id_type, 1)) { throw( "Cant find analysis $submit_analysis in db ") ; } # check first if input_id is not already stored ...
my $a = $self->pipeline_adaptor->get_AnalysisAdaptor->fetch_by_logic_name($submit_analysis) ; my $ia_db = $self->pipeline_adaptor->get_StateInfoContainer->list_input_id_by_Analysis($a) ; my @input_ids_not_stored ; my %tmp ; @tmp{@$ia_db} = 1; for my $i ( @$input_ids ) { push @input_ids_not_stored, $i unless (exists $tmp{$i}) ; } print scalar(@$input_ids) - scalar(@input_ids_not_stored) . " input ids already stored in db " . $self->pipeline_adaptor->dbname . "\n" ; $if->input_ids(\@input_ids_not_stored) ; $if->store_input_ids; print scalar(@input_ids_not_stored) . " input-ids uploaded into " . $self->pipeline_adaptor->dbname . "\n" ;
}
write_outputdescriptionprevnextTop
sub write_output {
  my ($self,$missing_seq) = @_;
}
General documentation
CONTACTTop
Post general queries to ensembl-dev@ebi.ac.uk
APPENDIXTop
The rest of the documentation details each of the object methods.
Internal methods are usually preceded with a '_'