Bio::EnsEMBL::Analysis::RunnableDB Exonerate2Genes
SummaryIncluded librariesPackage variablesSynopsisDescriptionGeneral documentationMethods
Toolbar
WebCvsRaw content
Summary
Bio::EnsEMBL::Analysis::RunnableDB::Exonerate2Genes;
Package variables
No package variables defined.
Included modules
Bio::EnsEMBL::Analysis::Config::Exonerate2Genes
Bio::EnsEMBL::Analysis::Runnable::ExonerateTranscript
Bio::EnsEMBL::Analysis::RunnableDB::BaseGeneBuild
Bio::EnsEMBL::Gene
Bio::EnsEMBL::Utils::Exception qw ( throw warning )
Inherit
Bio::EnsEMBL::Analysis::RunnableDB::BaseGeneBuild
Synopsis
my $exonerate2genes = Bio::EnsEMBL::Analysis::RunnableDB::Exonerate2Genes->new(
-db => $refdb,
-analysis => $analysis_obj,
-input_id => $chunk_file_name
);
$exonerate2genes->fetch_input();
$exonerate2genes->run();
$exonerate2genes->output();
$exonerate2genes->write_output(); #writes to DB
Description
This object wraps Bio::EnsEMBL::Analysis::Runnable::ExonerateTranscript
It is meant to provide the interface for mapping ESTs to the genome
sequence and writing the results as genes. By the way Exonerate is run
we do not cluster transcripts into genes and only write one transcript per gene.
we then create a dbadaptor for the target database.
Methods
COVERAGE_BY_ALIGNED
No description
Code
FILTER
No description
Code
GENOMICSEQS
No description
Code
IIDREGEXP
No description
Code
NONREF_REGIONS
No description
Code
OPTIONS
No description
Code
OUTDB
No description
Code
PROGRAM
No description
Code
QUERYANNOTATION
No description
Code
QUERYSEQS
No description
Code
QUERYTYPE
No description
Code
database
No description
Code
fetch_input
No description
Code
filter
No description
Code
genomic
No description
Code
get_chr_names
No description
Code
get_output_db
No description
Code
make_genes
No description
Code
new
No description
Code
query_seqs
No description
Code
read_and_check_config
No description
Code
run
No description
Code
write_output
No description
Code
Methods description
None available.
Methods code
COVERAGE_BY_ALIGNEDdescriptionprevnextTop
sub COVERAGE_BY_ALIGNED {
  my ($self,$value) = @_;
  
  if (defined $value) {
    $self->{'_CONFIG_COVERAGE'} = $value;
  }

  if (exists($self->{'_CONFIG_COVERAGE'})) {
    return $self->{'_CONFIG_COVERAGE'};
  } else {
    return undef;
  }
}
FILTERdescriptionprevnextTop
sub FILTER {
  my ($self,$value) = @_;
  
  if (defined $value) {
    $self->{'_CONFIG_FILTER'} = $value;
  }
  
  if (exists($self->{'_CONFIG_FILTER'})) {
    return $self->{'_CONFIG_FILTER'};
  } else {
    return undef;
  }
}
GENOMICSEQSdescriptionprevnextTop
sub GENOMICSEQS {
  my ($self,$value) = @_;

  if (defined $value) {
    $self->{'_CONFIG_GENOMICSEQS'} = $value;
  }
  
  if (exists($self->{'_CONFIG_GENOMICSEQS'})) {
    return $self->{'_CONFIG_GENOMICSEQS'};
  } else {
    return undef;
  }
}
IIDREGEXPdescriptionprevnextTop
sub IIDREGEXP {
  my ($self,$value) = @_;
  
  if (defined $value) {
    $self->{'_CONFIG_IIDREGEXP'} = $value;
  }

  if (exists($self->{'_CONFIG_IIDREGEXP'})) {
    return $self->{'_CONFIG_IIDREGEXP'};
  } else {
    return undef;
  }
}
NONREF_REGIONSdescriptionprevnextTop
sub NONREF_REGIONS {
  my ($self,$value) = @_;

  if (defined $value) {
    $self->{'_CONFIG_NONREF_REGIONS'} = $value;
  }

  if (exists($self->{'_CONFIG_NONREF_REGIONS'})) {
    return $self->{'_CONFIG_NONREF_REGIONS'};
  } else {
    return undef;
  }
}
OPTIONSdescriptionprevnextTop
sub OPTIONS {
  my ($self,$value) = @_;
  
  if (defined $value) {
    $self->{'_CONFIG_OPTIONS'} = $value;
  }

  if (exists($self->{'_CONFIG_OPTIONS'})) {
    return $self->{'_CONFIG_OPTIONS'};
  } else {
    return undef;
  }
}
OUTDBdescriptionprevnextTop
sub OUTDB {
  my ($self,$value) = @_;
  
  if (defined $value) {
    $self->{'_CONFIG_OUTDB'} = $value;
  }

  if (exists($self->{'_CONFIG_OUTDB'})) {
    return $self->{'_CONFIG_OUTDB'};

  } else {
    return undef;
  }
}
PROGRAMdescriptionprevnextTop
sub PROGRAM {
  my ($self,$value) = @_;

  if (defined $value) {
    $self->{'_CONFIG_PROGRAM'} = $value;
  }

  if (exists($self->{'_CONFIG_PROGRAM'})) {
    return $self->{'_CONFIG_PROGRAM'};
  } else {
    return undef;
  }
}


###############################################
### end of config
###############################################
1;
}
QUERYANNOTATIONdescriptionprevnextTop
sub QUERYANNOTATION {
  my ($self,$value) = @_;
  
  if (defined $value) {
    $self->{'_CONFIG_QUERYANNOTATION'} = $value;
  }

  if (exists($self->{'_CONFIG_QUERYANNOTATION'})) {
    return $self->{'_CONFIG_QUERYANNOTATION'};
  } else {
    return undef;
  }
}
QUERYSEQSdescriptionprevnextTop
sub QUERYSEQS {
  my ($self,$value) = @_;
  
  if (defined $value) {
    $self->{'_CONFIG_QUERYSEQS'} = $value;
  }

  if (exists($self->{'_CONFIG_QUERYSEQS'})) {
    return $self->{'_CONFIG_QUERYSEQS'};
  } else {
    return undef;
  }
}
QUERYTYPEdescriptionprevnextTop
sub QUERYTYPE {
  my ($self,$value) = @_;
  
  if (defined $value) {
    $self->{'_CONFIG_QUERYTYPE'} = $value;
  }

  if (exists($self->{'_CONFIG_QUERYTYPE'})) {
    return $self->{'_CONFIG_QUERYTYPE'};
  } else {
    return undef;
  }
}
databasedescriptionprevnextTop
sub database {
  my ($self, $database) = @_;
  if ($database) {
    $self->{_database} = $database;
  }
  return $self->{_database};
}

############################################################
}
fetch_inputdescriptionprevnextTop
sub fetch_input {
  my( $self) = @_;

  my $logic = $self->analysis->logic_name;

  ##########################################
# set up the target (genome)
##########################################
my @db_files; my @target_list = $self->GENOMICSEQS; foreach my $target (@target_list){ if(ref $target eq 'ARRAY'){ #check to see if we have multiple files or directories:
my $dir = 0; foreach my $alt_target (@$target){ if (-d $alt_target){ $dir = 1; last; } } # genome is in multiple directories; the order of directories determines
# which file is used in case of duplicates. New versions should therefore
# be in the directory listed first.
if ($dir){ foreach my $chr_name ($self->get_chr_names) { my $found = 0; DIRCHECK: foreach my $alt_target (@$target){ if (-s "$alt_target/$chr_name.fa") { push @db_files, "$alt_target/$chr_name.fa"; $found = 1; last DIRCHECK; } } if(!$found){ warning( "Could not find fasta file for '$chr_name' in directories:\n". join("\n\t", @$target)."\n"); } } }else{ foreach my $alt_target (@$target){ if (-s $alt_target){ push @db_files, $alt_target; } } } } # // end target is a directory
else{ $target =~s/^\s+//;
if (-e $target and -d $target) { # genome is in a directory; the directory must contain the complete
# genome else we cannot do best-in-genome filtering.
#
# We would like to use exonerate's ability to accept a directory as
# target (because bestn then works), but we must check that the directory
# contains only toplevel sequence files
my %dir_contents; opendir DIR, $target; while(my $entry = readdir DIR) { if ($entry ne '.' and $entry ne '..') { $dir_contents{$entry} = 0; } } closedir(DIR); foreach my $chr ($self->get_chr_names) { my $seq_fname = "$chr.fa"; if (-s "$target/$seq_fname") { $dir_contents{$seq_fname}++; push @db_files, "$target/$seq_fname"; } else { warning( "Could not find fasta file for '$chr' in '$target'\n"); } } # if all files in dir were expected, we can revert to having
# the whole directory as target
if (not grep { $dir_contents{$_} == 0 } keys %dir_contents) { @db_files = ($target); } } elsif (-e $target and -s $target) { # genome sequence is in a single file
@db_files = ($target); } else { throw("'$target' refers to something that could not be made sense of"); } } } ##########################################
# set up the query (est/cDNA/protein)
##########################################
my ($query_file, $chunk_number, $chunk_total); my $query = $self->QUERYSEQS; if (-e $query and -d $query) { # query seqs is a directory; input id will be a file in that directory
$query_file = "$query/" . $self->input_id; if (not -e $query_file) { throw( "Query file '$query_file' does not exist'\n"); } } elsif (-e $query and -s $query) { # query seqs is a single file; input id will correspond to a chunk number
$query_file = $query; my $iid_regexp = $self->IIDREGEXP; throw("When your input ids are not filenames, you must define ". "IIDREGEXP in config to enable inference of chunk number and total") if not defined $iid_regexp; ($chunk_number, $chunk_total) = $self->input_id =~ /$iid_regexp/; } else { throw("'$query' refers to something that could not be made sense of\n"); } ##########################################
# Annotation file with CDS positions
##########################################
##########################################
# setup the runnables
##########################################
my %parameters = %{$self->parameters_hash}; if (not exists($parameters{-options}) and defined $self->OPTIONS) { $parameters{-options} = $self->OPTIONS } if (not exists($parameters{-coverage_by_aligned}) and defined $self->COVERAGE_BY_ALIGNED) { $parameters{-coverage_by_aligned} = $self->COVERAGE_BY_ALIGNED; } if (defined $self->PROGRAM && defined $self->analysis->program_file) { if ($self->PROGRAM ne $self->analysis->program_file) { throw("CONFLICT: You have defined -program in your config file and ". "-program_file in your analysis table."); } } foreach my $database ( @db_files ){ my $runnable = Bio::EnsEMBL::Analysis::Runnable::ExonerateTranscript ->new( -program => $self->PROGRAM ? $self->PROGRAM : $self->analysis->program_file, -analysis => $self->analysis, -target_file => $database, -query_type => $self->QUERYTYPE, -query_file => $query_file, -annotation_file => $self->QUERYANNOTATION ? $self->QUERYANNOTATION : undef, -query_chunk_number => $chunk_number ? $chunk_number : undef, -query_chunk_total => $chunk_total ? $chunk_total : undef, %parameters, ); $self->runnable($runnable); } } ############################################################
}
filterdescriptionprevnextTop
sub filter {
  my ($self, $val) = @_;
  if ($val) {
    $self->{_transcript_filter} = $val;
  }
  return $self->{_transcript_filter};
}


#############################################################
# Declare and set up config variables
#############################################################
}
genomicdescriptionprevnextTop
sub genomic {
  my ($self, $seq) = @_;
  if ($seq){
    unless ($seq->isa("Bio::PrimarySeqI") || $seq->isa("Bio::SeqI")){
      throw("query seq must be a Bio::SeqI or Bio::PrimarySeqI");
    }
    $self->{_genomic} = $seq ;
  }
  return $self->{_genomic};
}


############################################################
}
get_chr_namesdescriptionprevnextTop
sub get_chr_names {
  my ($self) = @_;
  my @chr_names;
  my @chromosomes;

  my $chr_adaptor = $self->db->get_SliceAdaptor;
  #also fetching non-reference regions like DR52 for human by default.
#specify in Exonerate2Genes config-file.
if(defined($self->NONREF_REGIONS)){ @chromosomes = @{$chr_adaptor->fetch_all('toplevel', undef, 1)}; } else{ @chromosomes = @{$chr_adaptor->fetch_all('toplevel')}; } foreach my $chromosome ( @chromosomes ){ push( @chr_names, $chromosome->seq_region_name ); } return @chr_names; } ############################################################
}
get_output_dbdescriptionprevnextTop
sub get_output_db {
  my ($self) = @_;

  my $outdb;

  if ($self->OUTDB) {
    if ( ref($self->OUTDB)=~m/HASH/) {

      $outdb = new Bio::EnsEMBL::DBSQL::DBAdaptor(%{$self->OUTDB},
                                                -dnadb => $self->db);
    }else{
      $outdb = $self->get_dbadaptor($self->OUTDB);
    }
  } else {
    $outdb = $self->db;
  }

  return $outdb;
}


############################################################
#
# get/set methods
#
############################################################
}
make_genesdescriptionprevnextTop
sub make_genes {
  my ($self,@transcripts) = @_;
  
  my (@genes);

  my $slice_adaptor = $self->db->get_SliceAdaptor;
  
  my %genome_slices;

  foreach my $tran ( @transcripts ){
    my $gene = Bio::EnsEMBL::Gene->new();
    $gene->analysis($self->analysis);
    $gene->biotype($self->analysis->logic_name);

    ############################################################
# put a slice on the transcript
my $slice_id = $tran->start_Exon->seqname; if (not exists $genome_slices{$slice_id}) { # assumes genome seqs were named in the Ensembl API Slice naming
# convention, i.e. coord_syst:version:seq_reg_id:start:end:strand
$genome_slices{$slice_id} = $slice_adaptor->fetch_by_name($slice_id); } my $slice = $genome_slices{$slice_id}; foreach my $exon (@{$tran->get_all_Exons}){ $exon->slice($slice); foreach my $evi (@{$exon->get_all_supporting_features}){ $evi->slice($slice); $evi->analysis($self->analysis); } } foreach my $evi (@{$tran->get_all_supporting_features}) { $evi->slice($slice); $evi->analysis($self->analysis); } if (!$slice){ my ($sf); if (@{$tran->get_all_supporting_features}) { ($sf) = @{$tran->get_all_supporting_features}; } else { my @exons = @{$tran->get_all_Exons}; ($sf) = @{$exons[0]->get_all_supporting_features}; } print $sf->hseqname."\t$slice_id\n"; } throw("Have no slice") if(!$slice); $tran->slice($slice); $gene->add_Transcript($tran); push( @genes, $gene); } return @genes; } ############################################################
}
newdescriptionprevnextTop
sub new {
  my ($class,@args) = @_;
  my $self = $class->SUPER::new(@args);

  $self->read_and_check_config($EXONERATE_CONFIG_BY_LOGIC);

  return $self;
}
query_seqsdescriptionprevnextTop
sub query_seqs {
  my ($self, @seqs) = @_;
  if( @seqs ) {
    unless ($seqs[0]->isa("Bio::PrimarySeqI") || $seqs[0]->isa("Bio::SeqI")){
      throw("query seq must be a Bio::SeqI or Bio::PrimarySeqI");
    }
    push( @{$self->{_query_seqs}}, @seqs);
  }
  return @{$self->{_query_seqs}};
}

############################################################
}
read_and_check_configdescriptionprevnextTop
sub read_and_check_config {
  my $self = shift;

  $self->SUPER::read_and_check_config($EXONERATE_CONFIG_BY_LOGIC);

  ##########
# CHECKS
##########
my $logic = $self->analysis->logic_name; # check that compulsory options have values
foreach my $config_var (qw(QUERYSEQS QUERYTYPE GENOMICSEQS)) { throw("You must define $config_var in config for logic '$logic'") if not defined $self->$config_var; } throw("QUERYANNOTATION '" . $self->QUERYANNOTATION . "' in config must be readable") if $self->QUERYANNOTATION and not -e $self->QUERYANNOTATION; # filter does not have to be defined, but if it is, it should
# give details of an object and its parameters
if ($self->FILTER) { if (not ref($self->FILTER) eq "HASH" or not exists($self->FILTER->{OBJECT}) or not exists($self->FILTER->{PARAMETERS})) { throw("FILTER in config fo '$logic' must be a hash ref with elements:\n" . " OBJECT : qualified name of the filter module;\n" . " PARAMETERS : anonymous hash of parameters to pass to the filter"); } else { my $module = $self->FILTER->{OBJECT}; my $pars = $self->FILTER->{PARAMETERS}; (my $class = $module) =~ s/::/\//g; eval{ require "$class.pm"; }; throw("Couldn't require ".$class." Exonerate2Genes:require_module $@") if($@); $self->filter($module->new(%{$pars})); } }
}
rundescriptionprevnextTop
sub run {
  my ($self) = @_;
  my @results;
  
  throw("Can't run - no runnable objects") unless ($self->runnable);
  
  foreach my $runnable (@{$self->runnable}){
    
    $runnable->run;     
    push ( @results, @{$runnable->output} );
  }

  if ($self->filter) {
    my $filtered_transcripts = $self->filter->filter_results(\@results);
    @results = @$filtered_transcripts;
  }

  my @genes = $self->make_genes(@results);
  
  $self->output(\@genes);
}


############################################################
}
write_outputdescriptionprevnextTop
sub write_output {
  my ($self,@output) = @_;

  my $outdb = $self->get_output_db;
  my $gene_adaptor = $outdb->get_GeneAdaptor;  

  unless (@output){
    @output = @{$self->output};
  }
  
  my $fails = 0;
  my $total = 0;
  foreach my $gene (@output){

    eval {
      $gene_adaptor->store($gene);
    };    
    if ($@){
      warning("Unable to store gene!!\n$@");
      $fails++;
    }
    $total++;
  }
  if ($fails > 0) {
    throw("Not all genes could be written successfully " .
          "($fails fails out of $total)");
  }
}

############################################################
}
General documentation
CONTACTTop
Post general queries to ensembl-dev@ebi.ac.uk
APPENDIXTop
The rest of the documentation details each of the object methods.
Internal methods are usually preceded with a '_'