Bio::EnsEMBL::Analysis::RunnableDB GeneBuilder
Included librariesPackage variablesGeneral documentationMethods
Toolbar
WebCvsRaw content
Package variables
No package variables defined.
Included modules
Bio::EnsEMBL::Analysis::Config::GeneBuild::GeneBuilder qw ( GENEBUILDER_CONFIG_BY_LOGIC )
Bio::EnsEMBL::Analysis::Runnable::GeneBuilder
Bio::EnsEMBL::Analysis::RunnableDB::BaseGeneBuild
Bio::EnsEMBL::Analysis::Tools::GeneBuildUtils qw ( id coord_string lies_inside_of_slice )
Bio::EnsEMBL::Analysis::Tools::GeneBuildUtils::ExonUtils qw ( exon_length_less_than_maximum )
Bio::EnsEMBL::Analysis::Tools::GeneBuildUtils::GeneUtils qw ( Gene_info attach_Analysis_to_Gene_no_support )
Bio::EnsEMBL::Analysis::Tools::GeneBuildUtils::TranscriptUtils qw ( are_strands_consistent are_phases_consistent is_not_folded all_exons_are_valid intron_lengths_all_less_than_maximum )
Bio::EnsEMBL::Analysis::Tools::GeneBuildUtils::TranslationUtils qw ( validate_Translation_coords contains_internal_stops print_Translation print_peptide )
Bio::EnsEMBL::Analysis::Tools::Logger
Bio::EnsEMBL::Utils::Argument qw ( rearrange )
Bio::EnsEMBL::Utils::Exception qw ( throw warning )
Inherit
Unavailable
Synopsis
No synopsis!
Description
No description!
Methods
BLESSED_BIOTYPES
No description
Code
INPUT_GENES
No description
Code
MAX_EXON_LENGTH
No description
Code
MAX_SHORT_INTRON_LEN
No description
Code
MAX_TRANSCRIPTS_PER_CLUSTER
No description
Code
MIN_SHORT_INTRON_LEN
No description
Code
OUTPUT_BIOTYPE
No description
Code
OUTPUT_DB
No description
Code
fetch_input
No description
Code
filter_genes
No description
Code
get_Genes
No description
Code
get_adaptor
No description
Code
input_genes
No description
Code
newDescriptionCode
output_db
No description
Code
read_and_check_configDescriptionCode
validate_Transcript
No description
Code
write_output
No description
Code
Methods description
newcode    nextTop
  Arg [1]   : Bio::EnsEMBL::Analysis::RunnableDB::GeneBuilder
Function : instatiates a GeneBuilder object and reads and checks the config
file
Returntype: Bio::EnsEMBL::Analysis::RunnableDB::GeneBuilder
Exceptions:
Example :
read_and_check_configcodeprevnextTop
  Arg [1]   : Bio::EnsEMBL::Analysis::RunnableDB::GeneBuilder
Arg [2] : hashref from config file
Function : call the superclass method to set all the varibles and carry
out some sanity checking
Returntype: N/A
Exceptions: throws if certain variables arent set properlu
Example :
Methods code
BLESSED_BIOTYPESdescriptionprevnextTop
sub BLESSED_BIOTYPES {
  my ($self, $arg) = @_;
  if($arg){
    $self->{'BLESSED_BIOTYPES'} = $arg;
  }
  return $self->{'BLESSED_BIOTYPES'};
}
INPUT_GENESdescriptionprevnextTop
sub INPUT_GENES {
  my ($self, $arg) = @_;
  if($arg){
    $self->{'INPUT_GENES'} = $arg;
  }
  return $self->{'INPUT_GENES'};
}
MAX_EXON_LENGTHdescriptionprevnextTop
sub MAX_EXON_LENGTH {
  my ($self, $arg) = @_;
  if($arg){
    $self->{'MAX_EXON_LENGTH'} = $arg;
  }
  return $self->{'MAX_EXON_LENGTH'};
}


1;
}
MAX_SHORT_INTRON_LENdescriptionprevnextTop
sub MAX_SHORT_INTRON_LEN {
  my ($self, $arg) = @_;
  if($arg){
    $self->{'MAX_SHORT_INTRON_LEN'} = $arg;
  }
  return $self->{'MAX_SHORT_INTRON_LEN'};
}
MAX_TRANSCRIPTS_PER_CLUSTERdescriptionprevnextTop
sub MAX_TRANSCRIPTS_PER_CLUSTER {
  my ($self, $arg) = @_;
  if($arg){
    $self->{'MAX_TRANSCRIPTS_PER_CLUSTER'} = $arg;
  }
  return $self->{'MAX_TRANSCRIPTS_PER_CLUSTER'};
}
MIN_SHORT_INTRON_LENdescriptionprevnextTop
sub MIN_SHORT_INTRON_LEN {
  my ($self, $arg) = @_;
  if($arg){
    $self->{'MIN_SHORT_INTRON_LEN'} = $arg;
  }
  return $self->{'MIN_SHORT_INTRON_LEN'};
}
OUTPUT_BIOTYPEdescriptionprevnextTop
sub OUTPUT_BIOTYPE {
  my ($self, $arg) = @_;
  if($arg){
    $self->{'OUTPUT_BIOTYPE'} = $arg;
  }
  return $self->{'OUTPUT_BIOTYPE'};
}
OUTPUT_DBdescriptionprevnextTop
sub OUTPUT_DB {
  my ($self, $arg) = @_;
  if($arg){
    $self->{'OUTPUT_DB'} = $arg;
  }
  return $self->{'OUTPUT_DB'};
}
fetch_inputdescriptionprevnextTop
sub fetch_input {
  my ($self) = @_;
  #fetch sequence
$self->query($self->fetch_sequence); #fetch genes
$self->get_Genes; #print "Have ".@{$self->input_genes}." genes to cluster\n";
#filter genes
my @filtered_genes = @{$self->filter_genes($self->input_genes)}; #print "Have ".@filtered_genes." filtered genes\n";
#create genebuilder runnable
my $runnable = Bio::EnsEMBL::Analysis::Runnable::GeneBuilder ->new( -query => $self->query, -analysis => $self->analysis, -genes =>\@ filtered_genes, -output_biotype => $self->OUTPUT_BIOTYPE, -max_transcripts_per_cluster => $self->MAX_TRANSCRIPTS_PER_CLUSTER, -min_short_intron_len => $self->MIN_SHORT_INTRON_LEN, -max_short_intron_len => $self->MAX_SHORT_INTRON_LEN, -blessed_biotypes => $self->BLESSED_BIOTYPES, ); $self->runnable($runnable); };
}
filter_genesdescriptionprevnextTop
sub filter_genes {
  my ($self, $genes) = @_;
  $genes = $self->input_genes if(!$genes);
  print "Have ".@$genes." to filter\n";
  my @filtered;
 GENE:foreach my $gene(@$genes){
    #throw("Genebuilder only works with one gene one transcript structures")
# if(@{$gene->get_all_Transcripts} >= 2);
foreach my $transcript(@{$gene->get_all_Transcripts}){ if($self->validate_Transcript($transcript)){ push(@filtered, $gene); next GENE; }else{ print Gene_info($gene)." is invalid skipping\n"; next GENE; } } } return\@ filtered;
}
get_GenesdescriptionprevnextTop
sub get_Genes {
  my ($self) = @_;
  my @genes;
  foreach my $db_name(keys(%{$self->INPUT_GENES})){
    my $gene_db = $self->get_dbadaptor($db_name);
    my $slice = $self->fetch_sequence($self->input_id, $gene_db);
    my $biotypes = $self->INPUT_GENES->{$db_name};
    foreach my $biotype(@$biotypes){
      my $genes = $slice->get_all_Genes_by_type($biotype);
      print "Retrieved ".@$genes." of type ".$biotype."\n";
      push(@genes, @$genes);
    }
  }
  $self->input_genes(\@genes);
}
get_adaptordescriptionprevnextTop
sub get_adaptor {
  my ($self) = @_;
  return $self->output_db->get_GeneAdaptor;
}
input_genesdescriptionprevnextTop
sub input_genes {
  my ($self, $arg) = @_;
  if($arg){
    throw("Need to pass input genes an arrayref not ".$arg) 
      if(!ref($arg) || ref($arg) ne 'ARRAY');
    push(@{$self->{input_genes}}, @$arg);
  }
  return $self->{input_genes};
}
newdescriptionprevnextTop
sub new {
  my ($class,@args) = @_;
  my $self = $class->SUPER::new(@args);
  $self->read_and_check_config($GENEBUILDER_CONFIG_BY_LOGIC);
  return $self;
}
output_dbdescriptionprevnextTop
sub output_db {
  my ($self, $db) = @_;
  if($db){
    $self->{output_db} = $db;
  }
  if(!$self->{output_db}){
    my $db = $self->get_dbadaptor($self->OUTPUT_DB);
    $self->{output_db} = $db;
  }
  return $self->{output_db};
}
read_and_check_configdescriptionprevnextTop
sub read_and_check_config {
  my ($self, $hash) = @_;
  $self->SUPER::read_and_check_config($hash);

  #######
#CHECKS
#######
foreach my $var(qw(INPUT_GENES OUTPUT_DB)){ throw("RunnableDB::GeneBuilder $var config variable is not defined") unless($self->$var); } my @keys = keys(%{$self->INPUT_GENES}); throw("RunnableDB::GeneBuilder INPUT_GENES has needs to contain values") if(!@keys); my %unique; foreach my $key(@keys){ my $biotypes = $self->INPUT_GENES->{$key}; foreach my $biotype(@$biotypes){ if(!$unique{$biotype}){ $unique{$biotype} = $key; }else{ if($self->BLESSED_BIOTYPES->{$biotype}){ throw($biotype." is defined for both ".$key." and ".$unique{$biotype}. " and is found in the blessed biotype hash\n". "This is likely to cause problems for the filtering done in ". "the genebuilder code"); }else{ warning($biotype." appears twice in your listing, make sure this ". "isn't for the same database otherwise it will cause issue"); } } } } $self->OUTPUT_BIOTYPE($self->analysis->logic_name) if(!$self->OUTPUT_BIOTYPE);
}
validate_TranscriptdescriptionprevnextTop
sub validate_Transcript {
  my ($self, $transcript) = @_;
  my $slice = $self->query;
  $slice = $transcript->slice if(!$slice);
  my $is_valid = 0;
  #basic transcript validation
unless(are_strands_consistent($transcript)){ $is_valid++; } unless(are_phases_consistent($transcript)){ $is_valid++; } unless(is_not_folded($transcript)){ $is_valid++; } EXON:foreach my $exon(@{$transcript->get_all_Exons}){ if(exon_length_less_than_maximum($exon, $self->MAX_EXON_LENGTH)){ next EXON; }else{ $is_valid++; last EXON; } } if(contains_internal_stops($transcript)){ $is_valid++; } unless(validate_Translation_coords($transcript)){ $is_valid++; } return 0 if($is_valid >= 1); return 1; } #CONFIG METHODS
}
write_outputdescriptionprevnextTop
sub write_output {
  my ($self) = @_;
  my $ga = $self->get_adaptor;
  my $sucessful_count = 0;
  logger_info("WRITE OUTPUT have ".@{$self->output}." genes to write");
  foreach my $gene(@{$self->output}){
    my $attach = 0;
    if(!$gene->analysis){
      my $attach = 1;
      attach_Analysis_to_Gene_no_support($gene, $self->analysis);
    }
    if($attach == 0){
    TRANSCRIPT:foreach my $transcript(@{$gene->get_all_Transcripts}){
        if(!$transcript->analysis){
          attach_Analysis_to_Gene_no_support($gene, $self->analysis);
          last TRANSCRIPT;
        }
      }
    }
    eval{
      $ga->store($gene);
    };
    if($@){
      warning("Failed to write gene ".id($gene)." ".coord_string($gene)." $@");
    }else{
      $sucessful_count++;
      logger_info("STORED GENE ".$gene->dbID);
    }
  }
  if($sucessful_count != @{$self->output}){
    throw("Failed to write some genes");
  }
}
General documentation
CONFIG_ACCESSOR_METHODSTop
  Arg [1]   : Bio::EnsEMBL::Analysis::RunnableDB::GeneBuilder
Arg [2] : Varies, tends to be boolean, a string, a arrayref or a hashref
Function : Getter/Setter for config variables
Returntype: again varies
Exceptions:
Example :