Raw content of RunTest.

Raw content of RunTest package RunTest; use lib './modules'; use lib './config'; use strict; use warnings; use File::Path; use Bio::EnsEMBL::Utils::Exception qw(throw warning verbose); use Bio::EnsEMBL::Utils::Argument qw( rearrange ); use Bio::EnsEMBL::Root; use Bio::EnsEMBL::Pipeline::Config::BatchQueue; use TestDB; use FeatureComparison; use Bio::EnsEMBL::Pipeline::Monitor; use vars qw(@ISA); @ISA = qw(Bio::EnsEMBL::Root); =head2 new Arg [1] : RunTest Arg [2] : TestDB object Arg [3] : Environment object Arg [4] : string, path to output directory Arg [5] : string, extra info for perl5lib Arg [6] : arrayref, list of tables to import Arg [7] : string, name of Bio::EnsEMBL::Pipeline::BatchSubmission module to require Arg [8] : int, toggle whether to delete the databases and directories created Function : create a RunTest object Returntype: RunTest Exceptions: throws (1) if not passed in a TestDB object or it isn't a TestDB object; (2) if not passed in an Environment object or it isn't an Environemt object; (3) if it can't require the queue manager; or (4) if not passed any tables to import or if the variable passed isn't an array ref. Example : my $runtest = RunTest->new ( -TESTDB => $testdb, -ENVIRONMENT => $environment, -OUTPUT_DIR => $DEFAULT_OUTPUT_DIR, -EXTRA_PERL => $extra_perl, -BLASTDB => $blastdb, -TABLES => \@tables_to_load, -QUEUE_MANAGER => $QUEUE_MANAGER, -DONT_CLEANUP => $dont_cleanup, -VERBOSE => $verbose, ); =cut sub new{ my ($class,@args) = @_; my $self = $class->SUPER::new(@args); &verbose('WARNING'); my ($testdb, $environment, $output_dir, $extra_perl, $tables, $queue_manager, $cleanup, $verbose, $conf, $blastdb) = rearrange(['TESTDB', 'ENVIRONMENT', 'OUTPUT_DIR', 'EXTRA_PERL', 'TABLES', 'QUEUE_MANAGER', 'DONT_CLEANUP', 'VERBOSE', 'COMPARISON_CONF', 'BLASTDB'], @args); if(!$testdb || !$testdb->isa('TestDB')){ throw("Can't run without a TestDB object or a with a ".$testdb); } if(!$environment || !$environment->isa('Environment')){ throw("Can't run without an Environment object or with a ". $environment); } if(!$output_dir){ $output_dir = $DEFAULT_OUTPUT_DIR; } if(!$queue_manager){ $queue_manager = $QUEUE_MANAGER; #found in BatchQueue.pm } my $batch_q_module = "Bio::EnsEMBL::Pipeline::BatchSubmission::$queue_manager"; my $file = "$batch_q_module.pm"; $file =~ s{::}{/}g; eval { require "$file"; }; if($@){ $self->exception("Can't find $file [$@]"); } if(!$batch_q_module->can('job_stats')){ $self->exception($batch_q_module." doesn't have the job_stats method ". "this won't work"); } if(!$tables || ref($tables) ne 'ARRAY'){ $self->exception("Must define some tables or table groups to load and". " this must passed in as an array ref $tables"); } $self->testdb($testdb); $self->environment($environment); $self->output_dir($output_dir); $self->extra_perl($extra_perl); $self->tables($tables); $self->queue_manager($batch_q_module); $self->dont_cleanup_tests($cleanup); $self->verbosity($verbose); $self->comparison_conf($conf); $self->blastdb($blastdb); return $self; } =head2 containers Arg [1] : RunTest Arg [2] : variable, normally a string Function : Containers for the variables passed into the constructor Returntype: variable stored in container Exceptions: none Example : my $testdb = $self->testdb; =cut sub testdb{ my $self = shift; $self->{'testdb'} = shift if(@_); return $self->{'testdb'}; } sub ref_testdb{ my $self = shift; $self->{'ref_testdb'} = shift if(@_); return $self->{'ref_testdb'}; } sub blastdb{ my $self = shift; $self->{'blastdb'} = shift if(@_); return $self->{'blastdb'}; } sub environment{ my $self = shift; $self->{'environment'} = shift if(@_); return $self->{'environment'}; } sub output_dir{ my $self = shift; $self->{'output_dir'} = shift if(@_); return $self->{'output_dir'}; } sub extra_perl{ my $self = shift; $self->{'extra_perl'} = shift if(@_); return $self->{'extra_perl'}; } sub tables{ my $self = shift; $self->{'tables'} = shift if(@_); return $self->{'tables'}; } sub queue_manager{ my $self = shift; $self->{'queue_manager'} = shift if(@_); return $self->{'queue_manager'}; } sub dont_cleanup_tests{ my $self = shift; $self->{'cleanup_tests'} = shift if(@_); return $self->{'cleanup_tests'}; } sub cleanup_dir{ my $self = shift; $self->{'cleanup_dir'} = shift if(@_); return $self->{'cleanup_dir'}; } sub verbosity{ my $self = shift; $self->{'verbose'} = shift if(@_); return $self->{'verbose'}; } sub comparison_conf{ my $self = shift; $self->{'comparison_conf'} = shift if(@_); return $self->{'comparison_conf'}; } =head2 job_submission_command Arg [1] : RunTest Arg [2] : string, logic_name of analysis to be run Arg [3] : string, directory to store the job output Arg [4] : int, toggle for script verbosity Function : constructs a job_submission_command with the standard options for database, logic_name and -force so it ignores the rules system Returntype: string, the job_submission command Exceptions: throws if the job_submission command isnt executable Example : =cut sub job_submission_command{ my ($self, $logic_name, $output_dir, $verbose) = @_; my $db_conf = $self->testdb->conf_hash; my $dbport = $db_conf->{'port'}; my $dbhost = $db_conf->{'host'}; my $dbpass = $db_conf->{'pass'}; my $dbuser = $db_conf->{'user'}; my $dbname = $db_conf->{'dbname'}; my $job_submission = "../scripts/job_submission.pl"; if(! -e $job_submission){ $self->execption("Can't run $job_submission if it doesn't exist"); } my $db_args = $self->database_args($self->testdb); my $cmd = "perl ".$job_submission." "; $cmd .= $db_args." "; $cmd .= "-logic_name $logic_name "; $cmd .= "-output_dir $output_dir "; $cmd .= "-force "; $cmd .= "-verbose" if($verbose); return $cmd; } =head2 rulemanager_command Arg [1] : RunTest Arg [2] : int, toggle for script verbostiy Function : constructs a rulemanager command with standard database options Returntype: string, the rulemanager command Exceptions: throws if the rulemanager command isnt executable Example : =cut sub rulemanager_command{ my ($self, $output_dir, $verbose) = @_; my $db_conf = $self->testdb->conf_hash; my $dbport = $db_conf->{'port'}; my $dbhost = $db_conf->{'host'}; my $dbpass = $db_conf->{'pass'}; my $dbuser = $db_conf->{'user'}; my $dbname = $db_conf->{'dbname'}; my $job_submission = "../scripts/rulemanager.pl"; if(! -e $job_submission){ $self->exception("Can't run $job_submission if it doesn't exist"); } my $db_args = $self->database_args($self->testdb); my $cmd = "perl ".$job_submission." "; $cmd .= $db_args." "; $cmd .= "-output_dir $output_dir "; $cmd .= "-verbose" if($verbose); print "the rulemanager command is: ", $cmd,"\n"; return $cmd; } =head2 cleanup Arg [1] : RunTest Arg [2] : int, toggle as whether to delete the output directory or not Function : calling the cleanup methods on the other objects to return the status quo and delete the output directoy if required Returntype: none Exceptions: none Example : =cut sub cleanup{ my ($self, $testdb) = @_; if($self->cleanup_dir){ print "Deleting directory tree ".$self->output_dir."\n" if($self->verbosity); rmtree($self->output_dir); } if(!$testdb){ $testdb = $self->testdb; } $testdb->cleanup if($testdb); $self->environment->return_environment if($self->environment); return; } =head2 analysis_stats Arg [1] : RunTest Arg [2] : string, logic_name Arg [3] : string, table_name Function : prints out statistics about the run, how much should have been run, what has run sucessfully, what is left in the job table and how many results there are in total Returntype: none Exceptions: none Example : =cut sub analysis_stats{ my ($self, $logic_name, $table) = @_; my $db = $self->testdb->db; my $aa = $db->get_AnalysisAdaptor; my $ra = $db->get_RuleAdaptor; my $sic = $db->get_StateInfoContainer; my $ja = $db->get_JobAdaptor; my $analysis = $aa->fetch_by_logic_name($logic_name); my $rule = $ra->fetch_by_goal($analysis); my $conditions = $rule->list_conditions; my $input_id_type; COND:foreach my $condition(@$conditions){ my $condition_analysis = $aa->fetch_by_logic_name($condition); $input_id_type = $condition_analysis->input_id_type; if($input_id_type ne 'ACCUMULATOR'){ last COND; } } my $total_input_ids = $sic->list_input_ids_by_type($input_id_type); my $analysis_input_ids = $sic->list_input_ids_by_analysis ($analysis->dbID); my $results_count = $self->count_rows_by_analysis($db, $table, $analysis->dbID); print "\n\nThere were ".@$total_input_ids." input ids to analyse\n"; print @$analysis_input_ids." input_ids were analysed successfully\n"; print "This produced ".$results_count." results\n\n"; $self->job_details($ja); } =head2 whole_pipeline_stats Arg [1] : RunTest Function : produce stats about the pipeline run, This is done using Bio::EnsEMBL::Pipeline::Monitor Returntype: none Exceptions: none Example : =cut sub whole_pipeline_stats{ my ($self) = @_; my $db = $self->testdb->db; my $sic = $db->get_StateInfoContainer; my $input_ids = $sic->get_all_input_id_analysis_sets; print "\nTotal input_ids by type\n"; foreach my $type(keys(%$input_ids)){ my @input_ids = keys(%{$input_ids->{$type}}); print $type." ".@input_ids."\n"; } print "\n"; my $monitor = new Bio::EnsEMBL::Pipeline::Monitor(-dbobj => $db); $monitor->show_current_status; $monitor->show_finished_summary(1); } =head2 job_details Arg [1] : RunTest Arg [2] : Bio::EnsEMBL::Pipeline::DBSQL::JobAdaptor Function : prints out information about the jobs in the job table Returntype: Exceptions: Example : =cut sub job_details{ my ($self, $job_adaptor) = @_; my @jobs = $job_adaptor->fetch_all; my %status_count; foreach my $j(@jobs){ if(!$status_count{$j->current_status->status}){ $status_count{$j->current_status->status} = 1; }else{ $status_count{$j->current_status->status}++; } } if(keys(%status_count)){ print "Unsucessful job details\n"; foreach my $status(keys(%status_count)){ print "Job status ".$status." ".$status_count{$status}." jobs\n"; } print "\n"; } } =head2 count_rows_by_analysis Arg [1] : RunTest Arg [2] : DBI Arg [3] : string, table_name Arg [4] : int, analysis id Function : provides a count of results in the specified table with the provided analysis id Returntype: int, count Exceptions: none Example : =cut sub count_rows_by_analysis { my $self = shift; my $db = shift; my $tablename = shift; my $analysis_id = shift; my $sth = $db->prepare( "select count(*) from $tablename where ". "analysis_id = $analysis_id" ); $sth->execute(); my ( $count ) = $sth->fetchrow_array(); return $count; } =head2 setup_database Arg [1] : RunTest Arg [2] : TestDB, if not provided takes the testdb the object holds Arg [3] : arrayref, list of tables or table groups. If not provided, they are taken from the object container. Function : To insert the listed tables into the given database. The method first checks to see if the name of a table_group has been provided, as TestDB has a method for inserting all the tables in the group. If TestDB doesn't have a method which fits the load_tablename_tables, then the tablename is assumed to be an actual table name and is put on a list to be passed to the TestDB::load_tables method. Returntype: none Exceptions: none Example : =cut sub setup_database{ my ($self, $testdb, $tables) = @_; #$tables are the tables TO LOAD if(!$testdb){ $testdb = $self->testdb; } if(!$tables){ $tables = $self->tables; } my @unloaded; foreach my $table(@{$tables}){ my $method = "load_".$table."_tables"; #loading predefined set of tables if($testdb->can($method)){ $testdb->$method; }else{ push(@unloaded, $table); } } # tables not loaded using "load_tableGroup_tables" method are loaded individually if(@unloaded >= 1){ my $data_dir = $self->testdb->curr_dir."/".$self->testdb->species; $testdb->load_tables(\@unloaded, $data_dir); } } =head2 check_output_dir Arg [1] : RunTest Function : checks if directory specifed in output_dir exists. If it does it returns 0 so the dir wont be deleted otherwise the directory is created Returntype: int Exceptions: throws if directory creation fail Example : =cut sub check_output_dir{ my ($self) = @_; if(-d $self->output_dir){ return 0; }else{ eval{ mkdir($self->output_dir); warning("Your command line-specified test output directory " . $self->output_dir . " does not exist - it's being created"; }; if($@){ $self->exception("Failed to create ".$self->output_dir." $@"); } $self->cleanup_dir(1); return 1; } } =head2 run_single_analysis Arg [1] : RunTest Arg [2] : string, logic_name Arg [3] : string, table the analysis results will be stored in Function : sets up the environment, runs job_submission, checks if jobs are running. Once the system contains no more jobs a set of stats are produced about the analysis run using the analysis_stats method. If a comparison database conf if passed in, the results would also be compared to the data in the reference database Returntype: none Exceptions: throws if fails run the job_submission command Example : $runtest->run_single_analysis('cpg', 'simple_feature'); =cut sub run_single_analysis{ my ($self, $logic_name, $table_to_fill, $verbose) = @_; $self->check_output_dir; $self->environment->add_to_perl5lib($self->extra_perl); $self->environment->change_blastdb($self->blastdb); $self->setup_database; #load the tables required by the analysis my $output_dir = $self->output_dir; my $cmd = $self->job_submission_command($logic_name, $output_dir, $verbose); print "The job submission command is: ".$cmd."\n" if($self->verbosity || $verbose); system($cmd) == 0 or $self->exception("Failed to run ".$cmd); my $run = 1; RUNNING: while ($run == 1) { my $jobs = $self->queue_manager->job_stats; if (keys(%$jobs) == 0) { $self->analysis_stats($logic_name, $table_to_fill); $run = 0; } else { sleep($self->testdb->conf_hash->{'job_stats_sleep'}); } } if($self->comparison_conf){ my $ref_testdb = TestDB->new( -SPECIES => $self->testdb->species, -VERBOSE => $self->verbosity, -CONF_FILE => $self->comparison_conf, ); my $tables_to_fill = $self->tables; push(@$tables_to_fill, $table_to_fill); $self->setup_database($ref_testdb, $tables_to_fill); $self->ref_testdb($ref_testdb); my $method = "compare_".$table_to_fill; if($self->can($method)){ $self->$method($ref_testdb, $logic_name); }else{ print "No comparison can be made as ".$method." doesnt exist\n"; } $ref_testdb->cleanup unless($self->dont_cleanup_tests); #drop testDB if($self->dont_cleanup_tests){ $self->cleanup_command($ref_testdb); #returns a clean up command to run manually later if #the analysis output is to be kept for a while } } $self->cleanup() unless($self->dont_cleanup_tests);#delete output dir, calls cleanup in TestDB.pm #(delete DB), calls Environment.pm to reset PERL5LIB if($self->dont_cleanup_tests){ $self->cleanup_command; $self->environment->return_environment; #reset PERL5LIB & BLASTDB even if dont_cleanup_tests } } =head2 run_pipeline Arg [1] : RunTest Arg [2] : int, toggle for script verbosity Function : sets up environment, sets up databases creates rulemanager commandline then runs rulemanager until sucessful completion Returntype: none Exceptions: none Example : =cut sub run_pipeline{ my ($self, $verbose) = @_; my $cleanup_dir = $self->check_output_dir; $self->environment->add_to_perl5lib($self->extra_perl); $self->environment->change_blastdb($self->blastdb); $self->setup_database; my $output_dir = $self->output_dir; my $cmd = $self->rulemanager_command($output_dir, $verbose); # note: rulemanager command is printed out regardless of $verbose system($cmd) == 0 or $self->exception("Failed to run ".$cmd); $self->whole_pipeline_stats(); if($self->comparison_conf){ my @comparison_tables = ('repeat_feature', 'prediction_transcript', 'marker_feature', 'dna_align_feature', 'protein_align_feature', 'simple_feature'); my $ref_testdb = TestDB->new( -SPECIES => $self->testdb->species, -VERBOSE => $self->verbosity, -CONF_FILE => $self->comparison_conf, ); my $tables_to_fill = $self->tables; push(@$tables_to_fill, @comparison_tables); $self->setup_database($ref_testdb, $tables_to_fill); $self->ref_testdb($ref_testdb); $self->feature_comparison->pipeline_compare (\@comparison_tables, $self->testdb->db, $ref_testdb->db); $ref_testdb->cleanup unless($self->dont_cleanup_tests); if($self->dont_cleanup_tests){ $self->cleanup_command($ref_testdb); } } $self->cleanup ($self->testdb) unless($self->dont_cleanup_tests); if($self->dont_cleanup_tests){ $self->cleanup_command; $self->environment->return_environment; } } =head2 cleanup_command Arg [1] : RunTest Arg [2] : TestDB Function : creates a command and prints it to screen to cleanup after a test run. This is to allow easy removal of test databases and output directories if you wish to keep the output for a little while for investigation Returntype: none Exceptions: Example : =cut sub cleanup_command{ my ($self, $testdb) = @_; $testdb = $self->testdb unless($testdb); my $db_args = $self->database_args($testdb); my $data_dir = $testdb->curr_dir."/".$testdb->species; my $cleanup_command = "cleanup_output.pl "; $cleanup_command .= $db_args." "; $cleanup_command .= " -output_dir ".$self->output_dir; $cleanup_command .= " -sql_data_dir ".$data_dir; print "You have specifed -dont_cleanup when running your test \n". "If you want to delete your output you can run this script: ". "ensembl-pipeline/test_system/cleanup_output.pl\n". "This is the command you should use: \n".$cleanup_command."\n". "If you want to delete only the DB, only the unzipped data dir, ". "only the analysis output dir, or the combination of any of these, remove ". "the unwanted options (-dbname, -sql_data_dir or -output_dir) from the ". "commandline\n"; } =head2 database_args Arg [1] : RunTest Arg [2] : TestDB Function : create a string of the standard database args for ensembl-pipeline scripts for the script RunTest runs Returntype: string Exceptions: none Example : =cut sub database_args{ my ($self, $testdb) = @_; $testdb = $self->testdb if(!$testdb); my $db_conf = $testdb->conf_hash; my $dbport = $db_conf->{'port'}; my $dbhost = $db_conf->{'host'}; my $dbpass = $db_conf->{'pass'}; my $dbuser = $db_conf->{'user'}; my $dbname = $db_conf->{'dbname'}; my $db_args = " -dbhost ".$dbhost." -dbuser ".$dbuser; $db_args .= " -dbpass ".$dbpass if($dbpass); $db_args .= " -dbport ".$dbport if($dbport); $db_args .= " -dbname ".$dbname." "; return $db_args; } =head2 compare_tables Arg [1] : RunTest Arg [2] : TestDB for reference database Arg [3] : logic_name for analysis to compare Function : These are a series of methods used for comparing results between the test database and a reference database using the FeatureComparison module Returntype: none Exceptions: throws if it has no input_ids on which to base its fetching Example : =cut sub compare_simple_feature{ my ($self, $ref_db, $logic_name) = @_; my ($analysis, $ref_ana, $input_ids) = $self-> get_analyses_and_input_ids($ref_db, $logic_name); my $test_id = $input_ids->[0]; my $method; my $query_fs; my $target_fs; if(!$test_id){ $self->exeception("Something is wrong analysis ".$logic_name. " of type ".$analysis->input_id_type. " has produced no input_ids"); }else{ my @array = split(/:/,$test_id); if(scalar(@array) < 3 || scalar(@array) > 6) { $query_fs = $self ->fetch_features_by_dbID($self->testdb->db, "get_SimpleFeatureAdaptor", $logic_name); $target_fs = $self ->fetch_features_by_dbID($ref_db->db, "get_SimpleFeatureAdaptor", $logic_name); }else{ $query_fs = $self ->fetch_features_by_slice_name($input_ids, $self->testdb->db, "get_SimpleFeatureAdaptor", $logic_name); $target_fs = $self ->fetch_features_by_slice_name($input_ids, $ref_db->db, "get_SimpleFeatureAdaptor", $logic_name); } my $feature_comparison = $self->feature_comparison; $feature_comparison->query($query_fs); $feature_comparison->target($target_fs); $feature_comparison->verbose($self->verbosity); $feature_comparison->compare; } } sub compare_prediction_transcript{ my ($self, $ref_db, $logic_name) = @_; my ($analysis, $ref_ana, $input_ids) = $self-> get_analyses_and_input_ids($ref_db, $logic_name); my $data_dir = $self->testdb->curr_dir."/".$self->testdb->species; $ref_db->load_tables(['prediction_exon'], $data_dir); my $test_id = $input_ids->[0]; my $method; my $query_fs; my $target_fs; if(!$test_id){ $self->exception("Something is wrong analysis ".$logic_name. " of type ".$analysis->input_id_type. " has produced no input_ids"); }else{ my @array = split(/:/,$test_id); if(scalar(@array) < 3 || scalar(@array) > 6) { $query_fs = $self ->fetch_features_by_dbID($self->testdb->db, "get_PredictionTranscriptAdaptor", $logic_name); $target_fs = $self ->fetch_features_by_dbID($ref_db->db, "get_PredictionTranscriptAdaptor", $logic_name); }else{ $query_fs = $self ->fetch_features_by_slice_name($input_ids, $self->testdb->db, "get_PredictionTranscriptAdaptor", $logic_name); $target_fs = $self ->fetch_features_by_slice_name($input_ids, $ref_db->db, "get_PredictionTranscriptAdaptor", $logic_name); } my $feature_comparison = $self->feature_comparison; $feature_comparison->query($query_fs); $feature_comparison->target($target_fs); $feature_comparison->verbose($self->verbosity); $feature_comparison->compare; } } sub compare_repeat_feature{ my ($self, $ref_db, $logic_name) = @_; my ($analysis, $ref_ana, $input_ids) = $self-> get_analyses_and_input_ids($ref_db, $logic_name); my $data_dir = $self->testdb->curr_dir."/".$self->testdb->species; $ref_db->load_tables(['repeat_consensus'], $data_dir); my $test_id = $input_ids->[0]; my $method; my $query_fs; my $target_fs; if(!$test_id){ $self->exception("Something is wrong analysis ".$logic_name. " of type ".$analysis->input_id_type. " has produced no input_ids"); }else{ my @array = split(/:/,$test_id); if(scalar(@array) < 3 || scalar(@array) > 6) { $query_fs = $self ->fetch_features_by_dbID($self->testdb->db, "get_RepeatFeatureAdaptor", $logic_name); $target_fs = $self ->fetch_features_by_dbID($ref_db->db, "get_RepeatFeatureAdaptor", $logic_name); }else{ $query_fs = $self ->fetch_features_by_slice_name($input_ids, $self->testdb->db, "get_RepeatFeatureAdaptor", $logic_name); $target_fs = $self ->fetch_features_by_slice_name($input_ids, $ref_db->db, "get_RepeatFeatureAdaptor", $logic_name); } my $feature_comparison = $self->feature_comparison; $feature_comparison->query($query_fs); $feature_comparison->target($target_fs); $feature_comparison->verbose($self->verbosity); $feature_comparison->compare; } } sub compare_dna_align_feature{ my ($self, $ref_db, $logic_name) = @_; my ($analysis, $ref_ana, $input_ids) = $self-> get_analyses_and_input_ids($ref_db, $logic_name); my $data_dir = $self->testdb->curr_dir."/".$self->testdb->species; $ref_db->load_tables(['prediction_exon'], $data_dir); my $test_id = $input_ids->[0]; my $method; my $query_fs; my $target_fs; if(!$test_id){ $self->exception("Something is wrong analysis ".$logic_name. " of type ".$analysis->input_id_type. " has produced no input_ids"); }else{ my @array = split(/:/,$test_id); if(scalar(@array) < 3 || scalar(@array) > 6) { $query_fs = $self ->fetch_features_by_dbID($self->testdb->db, "get_DnaAlignFeatureAdaptor", $logic_name); $target_fs = $self ->fetch_features_by_dbID($ref_db->db, "get_DnaAlignFeatureAdaptor", $logic_name); }else{ $query_fs = $self ->fetch_features_by_slice_name($input_ids, $self->testdb->db, "get_DnaAlignFeatureAdaptor", $logic_name); $target_fs = $self ->fetch_features_by_slice_name($input_ids, $ref_db->db, "get_DnaAlignFeatureAdaptor", $logic_name); } my $feature_comparison = $self->feature_comparison; $feature_comparison->query($query_fs); $feature_comparison->target($target_fs); $feature_comparison->verbose($self->verbosity); $feature_comparison->compare; } } sub compare_protein_align_feature{ my ($self, $ref_db, $logic_name) = @_; my ($analysis, $ref_ana, $input_ids) = $self-> get_analyses_and_input_ids($ref_db, $logic_name); my $data_dir = $self->testdb->curr_dir."/".$self->testdb->species; $ref_db->load_tables(['prediction_exon'], $data_dir); my $test_id = $input_ids->[0]; my $method; my $query_fs; my $target_fs; if(!$test_id){ $self->exception("Something is wrong analysis ".$logic_name. " of type ".$analysis->input_id_type. " has produced no input_ids"); }else{ my @array = split(/:/,$test_id); if(scalar(@array) < 3 || scalar(@array) > 6) { $query_fs = $self ->fetch_features_by_dbID($self->testdb->db, "get_ProteinAlignFeatureAdaptor", $logic_name); $target_fs = $self ->fetch_features_by_dbID($ref_db->db, "get_ProteinAlignFeatureAdaptor", $logic_name); }else{ $query_fs = $self ->fetch_features_by_slice_name($input_ids, $self->testdb->db, "get_ProteinAlignFeatureAdaptor", $logic_name); $target_fs = $self ->fetch_features_by_slice_name($input_ids, $ref_db->db, "get_ProteinAlignFeatureAdaptor", $logic_name); } my $feature_comparison = $self->feature_comparison; $feature_comparison->query($query_fs); $feature_comparison->target($target_fs); $feature_comparison->verbose($self->verbosity); $feature_comparison->compare; } } sub compare_marker_feature{ my ($self, $ref_db, $logic_name) = @_; my ($analysis, $ref_ana, $input_ids) = $self-> get_analyses_and_input_ids($ref_db, $logic_name); my $data_dir = $self->testdb->curr_dir."/".$self->testdb->species; my $test_id = $input_ids->[0]; my $method; my $query_fs; my $target_fs; if(!$test_id){ $self->exception("Something is wrong analysis ".$logic_name. " of type ".$analysis->input_id_type. " has produced no input_ids"); }else{ my @array = split(/:/,$test_id); if(scalar(@array) < 3 || scalar(@array) > 6) { $query_fs = $self ->fetch_features_by_dbID($self->testdb->db, "get_MarkerFeatureAdaptor", $logic_name); $target_fs = $self ->fetch_features_by_dbID($ref_db->db, "get_MarkerFeatureAdaptor", $logic_name); }else{ $query_fs = $self ->fetch_features_by_slice_name($input_ids, $self->testdb->db, "get_MarkerFeatureAdaptor", $logic_name); $target_fs = $self ->fetch_features_by_slice_name($input_ids, $ref_db->db, "get_MarkerFeatureAdaptor", $logic_name); } my $feature_comparison = $self->feature_comparison; $feature_comparison->query($query_fs); $feature_comparison->target($target_fs); $feature_comparison->verbose($self->verbosity); $feature_comparison->compare; } } =head2 feature_comparison Arg [1] : RunTest Arg [2] : FeatureComparision Function : a container for the FeatureComparison object it will create an empty FeatureComparison object if one isn't passed in Returntype: FeatureComparison Exceptions: throws if passed a second argument which isn't a FeatureComparison Example : =cut sub feature_comparison{ my ($self, $feature_comparison) = @_; if($feature_comparison){ $self->exception("Must pass in a FeatureComparison not a " .$feature_comparison) unless($feature_comparison->isa('FeatureComparison')); $self->{'feature_comparison'} = $feature_comparison; } if(!$self->{'feature_comparison'}){ $self->{'feature_comparison'} = FeatureComparison->new ( -VERBOSE=>$self->verbosity, ); } return $self->{'feature_comparison'}; } =head2 get_analyses_and_input_ids Arg [1] : RunTest Arg [2] : TestDB for reference database Arg [3] : string, logic name for analysis to fetch Function : fetches the analysis object from both the reference database and the test database and fetched the input_ids from the test database Returntype: Bio::EnsEMBL::Pipeline::Analysis, Bio::EnsEMBL::Pipeline::Analysis, hashref Exceptions: none Example : =cut sub get_analyses_and_input_ids{ my ($self, $ref_db, $logic_name) = @_; my $analysis = $self->testdb->db->get_AnalysisAdaptor ->fetch_by_logic_name($logic_name); my $ref_ana = $ref_db->db->get_AnalysisAdaptor ->fetch_by_logic_name($logic_name); my $input_ids = $self->testdb->db->get_StateInfoContainer ->list_input_ids_by_type($analysis->input_id_type); return($analysis, $ref_ana, $input_ids); } =head2 fetch_features_by_slice Arg [1] : RunTest Arg [2] : arrayref of slice names Arg [3] : dbadaptor Arg [4] : string method to fetch the appropriate adaptor Arg [5] : logic_name of the analysis results desired Function : fetches features on a slice by slice basis using the adaptors fetch_all_by_Slice method Returntype: arrayref of Bio::EnsEMBL::Features Exceptions: Example : =cut sub fetch_features_by_slice_name{ my ($self, $names, $db, $fetch_adaptor_method, $logic_name) = @_; my @features; my $sa = $db->get_SliceAdaptor; my $fa = $db->$fetch_adaptor_method; foreach my $name(@$names){ my $slice = $sa->fetch_by_name($name); my $features = $fa->fetch_all_by_Slice($slice, $logic_name); push(@features, @$features); } return \@features; } =head2 fetch_features_by_dbID Arg [1] : RunTest Arg [2] : dbadaptor Arg [3] : string method to fetch the appropriate adaptor Arg [4] : logic_name of the analysis results desired Function : when the input_ids for a particular input_id_type doesnt match the standard slice name then all the features are fetched out of the database on the basis of dbID and then the features of appropriate analysis are filtered out Returntype: arrayref of Bio::EnsEMBL::Features Exceptions: Example : =cut sub fetch_features_by_dbID{ my ($self, $db, $fetch_adaptor_method, $logic_name) = @_; my @features; my $fa = $db->$fetch_adaptor_method; my $dbIDs = $fa->list_dbIDs; if(@$dbIDs <= 1000){ @features = @{$fa->fetch_all_by_dbID_list($dbIDs)}; } else{ my $num_in_list = 1000; my @list_of_lists; while(@$dbIDs){ my @list = splice(@$dbIDs, 0, $num_in_list); push(@list_of_lists, \@list); } foreach my $list(@list_of_lists){ my @chunk = @{$fa->fetch_all_by_dbID_list($list)}; push(@features, @chunk); } } my @analysis_features; foreach my $f(@features){ if($f->analysis->logic_name eq $logic_name){ push(@analysis_features, $f); } } return(\@analysis_features); } sub before_throw{ my ($self) = @_; if(!$self->dont_cleanup_tests){ $self->cleanup; if($self->ref_testdb){ $self->cleanup($self->ref_testdb); } }else{ $self->cleanup_command; if($self->ref_testdb){ $self->cleanup_command($self->ref_testdb); } } } sub exception{ my ($self, $msg) = @_; $self->before_throw; throw($msg); } 1;