Bio::EnsEMBL::Analysis::RunnableDB
GeneBuilder
Toolbar
Package variables
No package variables defined.
Included modules
Inherit
Synopsis
No synopsis!
Description
No description!
Methods
BLESSED_BIOTYPES | No description | Code |
INPUT_GENES | No description | Code |
MAX_EXON_LENGTH | No description | Code |
MAX_SHORT_INTRON_LEN | No description | Code |
MAX_TRANSCRIPTS_PER_CLUSTER | No description | Code |
MIN_SHORT_INTRON_LEN | No description | Code |
OUTPUT_BIOTYPE | No description | Code |
OUTPUT_DB | No description | Code |
fetch_input | No description | Code |
filter_genes | No description | Code |
get_Genes | No description | Code |
get_adaptor | No description | Code |
input_genes | No description | Code |
new | Description | Code |
output_db | No description | Code |
read_and_check_config | Description | Code |
validate_Transcript | No description | Code |
write_output | No description | Code |
Methods description
Arg [1] : Bio::EnsEMBL::Analysis::RunnableDB::GeneBuilder Function : instatiates a GeneBuilder object and reads and checks the config file Returntype: Bio::EnsEMBL::Analysis::RunnableDB::GeneBuilder Exceptions: Example : |
Arg [1] : Bio::EnsEMBL::Analysis::RunnableDB::GeneBuilder Arg [2] : hashref from config file Function : call the superclass method to set all the varibles and carry out some sanity checking Returntype: N/A Exceptions: throws if certain variables arent set properlu Example : |
Methods code
BLESSED_BIOTYPES | description | prev | next | Top |
sub BLESSED_BIOTYPES
{ my ($self, $arg) = @_;
if($arg){
$self->{'BLESSED_BIOTYPES'} = $arg;
}
return $self->{'BLESSED_BIOTYPES'}; } |
sub INPUT_GENES
{ my ($self, $arg) = @_;
if($arg){
$self->{'INPUT_GENES'} = $arg;
}
return $self->{'INPUT_GENES'}; } |
sub MAX_EXON_LENGTH
{ my ($self, $arg) = @_;
if($arg){
$self->{'MAX_EXON_LENGTH'} = $arg;
}
return $self->{'MAX_EXON_LENGTH'};
}
1; } |
sub MAX_SHORT_INTRON_LEN
{ my ($self, $arg) = @_;
if($arg){
$self->{'MAX_SHORT_INTRON_LEN'} = $arg;
}
return $self->{'MAX_SHORT_INTRON_LEN'}; } |
sub MAX_TRANSCRIPTS_PER_CLUSTER
{ my ($self, $arg) = @_;
if($arg){
$self->{'MAX_TRANSCRIPTS_PER_CLUSTER'} = $arg;
}
return $self->{'MAX_TRANSCRIPTS_PER_CLUSTER'}; } |
sub MIN_SHORT_INTRON_LEN
{ my ($self, $arg) = @_;
if($arg){
$self->{'MIN_SHORT_INTRON_LEN'} = $arg;
}
return $self->{'MIN_SHORT_INTRON_LEN'}; } |
sub OUTPUT_BIOTYPE
{ my ($self, $arg) = @_;
if($arg){
$self->{'OUTPUT_BIOTYPE'} = $arg;
}
return $self->{'OUTPUT_BIOTYPE'}; } |
sub OUTPUT_DB
{ my ($self, $arg) = @_;
if($arg){
$self->{'OUTPUT_DB'} = $arg;
}
return $self->{'OUTPUT_DB'}; } |
sub fetch_input
{ my ($self) = @_;
$self->query($self->fetch_sequence);
$self->get_Genes;
my @filtered_genes = @{$self->filter_genes($self->input_genes)};
my $runnable = Bio::EnsEMBL::Analysis::Runnable::GeneBuilder
->new(
-query => $self->query,
-analysis => $self->analysis,
-genes =>\@ filtered_genes,
-output_biotype => $self->OUTPUT_BIOTYPE,
-max_transcripts_per_cluster => $self->MAX_TRANSCRIPTS_PER_CLUSTER,
-min_short_intron_len => $self->MIN_SHORT_INTRON_LEN,
-max_short_intron_len => $self->MAX_SHORT_INTRON_LEN,
-blessed_biotypes => $self->BLESSED_BIOTYPES,
);
$self->runnable($runnable);
}; } |
sub filter_genes
{ my ($self, $genes) = @_;
$genes = $self->input_genes if(!$genes);
print "Have ".@$genes." to filter\n";
my @filtered;
GENE:foreach my $gene(@$genes){
foreach my $transcript(@{$gene->get_all_Transcripts}){
if($self->validate_Transcript($transcript)){
push(@filtered, $gene);
next GENE;
}else{
print Gene_info($gene)." is invalid skipping\n";
next GENE;
}
}
}
return\@ filtered; } |
sub get_Genes
{ my ($self) = @_;
my @genes;
foreach my $db_name(keys(%{$self->INPUT_GENES})){
my $gene_db = $self->get_dbadaptor($db_name);
my $slice = $self->fetch_sequence($self->input_id, $gene_db);
my $biotypes = $self->INPUT_GENES->{$db_name};
foreach my $biotype(@$biotypes){
my $genes = $slice->get_all_Genes_by_type($biotype);
print "Retrieved ".@$genes." of type ".$biotype."\n";
push(@genes, @$genes);
}
}
$self->input_genes(\@genes); } |
sub get_adaptor
{ my ($self) = @_;
return $self->output_db->get_GeneAdaptor; } |
sub input_genes
{ my ($self, $arg) = @_;
if($arg){
throw("Need to pass input genes an arrayref not ".$arg)
if(!ref($arg) || ref($arg) ne 'ARRAY');
push(@{$self->{input_genes}}, @$arg);
}
return $self->{input_genes}; } |
sub new
{ my ($class,@args) = @_;
my $self = $class->SUPER::new(@args);
$self->read_and_check_config($GENEBUILDER_CONFIG_BY_LOGIC);
return $self; } |
sub output_db
{ my ($self, $db) = @_;
if($db){
$self->{output_db} = $db;
}
if(!$self->{output_db}){
my $db = $self->get_dbadaptor($self->OUTPUT_DB);
$self->{output_db} = $db;
}
return $self->{output_db}; } |
sub read_and_check_config
{ my ($self, $hash) = @_;
$self->SUPER::read_and_check_config($hash);
foreach my $var(qw(INPUT_GENES
OUTPUT_DB)){
throw("RunnableDB::GeneBuilder $var config variable is not defined")
unless($self->$var);
}
my @keys = keys(%{$self->INPUT_GENES});
throw("RunnableDB::GeneBuilder INPUT_GENES has needs to contain values") if(!@keys);
my %unique;
foreach my $key(@keys){
my $biotypes = $self->INPUT_GENES->{$key};
foreach my $biotype(@$biotypes){
if(!$unique{$biotype}){
$unique{$biotype} = $key;
}else{
if($self->BLESSED_BIOTYPES->{$biotype}){
throw($biotype." is defined for both ".$key." and ".$unique{$biotype}.
" and is found in the blessed biotype hash\n".
"This is likely to cause problems for the filtering done in ".
"the genebuilder code");
}else{
warning($biotype." appears twice in your listing, make sure this ".
"isn't for the same database otherwise it will cause issue");
}
}
}
}
$self->OUTPUT_BIOTYPE($self->analysis->logic_name) if(!$self->OUTPUT_BIOTYPE); } |
sub validate_Transcript
{ my ($self, $transcript) = @_;
my $slice = $self->query;
$slice = $transcript->slice if(!$slice);
my $is_valid = 0;
unless(are_strands_consistent($transcript)){
$is_valid++;
}
unless(are_phases_consistent($transcript)){
$is_valid++;
}
unless(is_not_folded($transcript)){
$is_valid++;
}
EXON:foreach my $exon(@{$transcript->get_all_Exons}){
if(exon_length_less_than_maximum($exon, $self->MAX_EXON_LENGTH)){
next EXON;
}else{
$is_valid++;
last EXON;
}
}
if(contains_internal_stops($transcript)){
$is_valid++;
}
unless(validate_Translation_coords($transcript)){
$is_valid++;
}
return 0 if($is_valid >= 1);
return 1;
}
} |
sub write_output
{ my ($self) = @_;
my $ga = $self->get_adaptor;
my $sucessful_count = 0;
logger_info("WRITE OUTPUT have ".@{$self->output}." genes to write");
foreach my $gene(@{$self->output}){
my $attach = 0;
if(!$gene->analysis){
my $attach = 1;
attach_Analysis_to_Gene_no_support($gene, $self->analysis);
}
if($attach == 0){
TRANSCRIPT:foreach my $transcript(@{$gene->get_all_Transcripts}){
if(!$transcript->analysis){
attach_Analysis_to_Gene_no_support($gene, $self->analysis);
last TRANSCRIPT;
}
}
}
eval{
$ga->store($gene);
};
if($@){
warning("Failed to write gene ".id($gene)." ".coord_string($gene)." $@");
}else{
$sucessful_count++;
logger_info("STORED GENE ".$gene->dbID);
}
}
if($sucessful_count != @{$self->output}){
throw("Failed to write some genes");
} } |
General documentation
CONFIG_ACCESSOR_METHODS | Top |
Arg [1] : Bio::EnsEMBL::Analysis::RunnableDB::GeneBuilder
Arg [2] : Varies, tends to be boolean, a string, a arrayref or a hashref
Function : Getter/Setter for config variables
Returntype: again varies
Exceptions:
Example :