Bio::EnsEMBL::Analysis::RunnableDB
Exonerate2Genes
Toolbar
Summary
Bio::EnsEMBL::Analysis::RunnableDB::Exonerate2Genes;
Package variables
No package variables defined.
Included modules
Bio::EnsEMBL::Analysis::Config::Exonerate2Genes
Inherit
Synopsis
my $exonerate2genes = Bio::EnsEMBL::Analysis::RunnableDB::Exonerate2Genes->new(
-db => $refdb,
-analysis => $analysis_obj,
-input_id => $chunk_file_name
);
$exonerate2genes->fetch_input();
$exonerate2genes->run();
$exonerate2genes->output();
$exonerate2genes->write_output(); #writes to DB
Description
This object wraps Bio::EnsEMBL::Analysis::Runnable::ExonerateTranscript
It is meant to provide the interface for mapping ESTs to the genome
sequence and writing the results as genes. By the way Exonerate is run
we do not cluster transcripts into genes and only write one transcript per gene.
we then create a dbadaptor for the target database.
Methods
COVERAGE_BY_ALIGNED | No description | Code |
FILTER | No description | Code |
GENOMICSEQS | No description | Code |
IIDREGEXP | No description | Code |
NONREF_REGIONS | No description | Code |
OPTIONS | No description | Code |
OUTDB | No description | Code |
PROGRAM | No description | Code |
QUERYANNOTATION | No description | Code |
QUERYSEQS | No description | Code |
QUERYTYPE | No description | Code |
database | No description | Code |
fetch_input | No description | Code |
filter | No description | Code |
genomic | No description | Code |
get_chr_names | No description | Code |
get_output_db | No description | Code |
make_genes | No description | Code |
new | No description | Code |
query_seqs | No description | Code |
read_and_check_config | No description | Code |
run | No description | Code |
write_output | No description | Code |
Methods description
None available.
Methods code
COVERAGE_BY_ALIGNED | description | prev | next | Top |
sub COVERAGE_BY_ALIGNED
{ my ($self,$value) = @_;
if (defined $value) {
$self->{'_CONFIG_COVERAGE'} = $value;
}
if (exists($self->{'_CONFIG_COVERAGE'})) {
return $self->{'_CONFIG_COVERAGE'};
} else {
return undef;
} } |
sub FILTER
{ my ($self,$value) = @_;
if (defined $value) {
$self->{'_CONFIG_FILTER'} = $value;
}
if (exists($self->{'_CONFIG_FILTER'})) {
return $self->{'_CONFIG_FILTER'};
} else {
return undef;
} } |
sub GENOMICSEQS
{ my ($self,$value) = @_;
if (defined $value) {
$self->{'_CONFIG_GENOMICSEQS'} = $value;
}
if (exists($self->{'_CONFIG_GENOMICSEQS'})) {
return $self->{'_CONFIG_GENOMICSEQS'};
} else {
return undef;
} } |
sub IIDREGEXP
{ my ($self,$value) = @_;
if (defined $value) {
$self->{'_CONFIG_IIDREGEXP'} = $value;
}
if (exists($self->{'_CONFIG_IIDREGEXP'})) {
return $self->{'_CONFIG_IIDREGEXP'};
} else {
return undef;
} } |
sub NONREF_REGIONS
{ my ($self,$value) = @_;
if (defined $value) {
$self->{'_CONFIG_NONREF_REGIONS'} = $value;
}
if (exists($self->{'_CONFIG_NONREF_REGIONS'})) {
return $self->{'_CONFIG_NONREF_REGIONS'};
} else {
return undef;
} } |
sub OPTIONS
{ my ($self,$value) = @_;
if (defined $value) {
$self->{'_CONFIG_OPTIONS'} = $value;
}
if (exists($self->{'_CONFIG_OPTIONS'})) {
return $self->{'_CONFIG_OPTIONS'};
} else {
return undef;
} } |
sub OUTDB
{ my ($self,$value) = @_;
if (defined $value) {
$self->{'_CONFIG_OUTDB'} = $value;
}
if (exists($self->{'_CONFIG_OUTDB'})) {
return $self->{'_CONFIG_OUTDB'};
} else {
return undef;
} } |
sub PROGRAM
{ my ($self,$value) = @_;
if (defined $value) {
$self->{'_CONFIG_PROGRAM'} = $value;
}
if (exists($self->{'_CONFIG_PROGRAM'})) {
return $self->{'_CONFIG_PROGRAM'};
} else {
return undef;
}
}
1; } |
sub QUERYANNOTATION
{ my ($self,$value) = @_;
if (defined $value) {
$self->{'_CONFIG_QUERYANNOTATION'} = $value;
}
if (exists($self->{'_CONFIG_QUERYANNOTATION'})) {
return $self->{'_CONFIG_QUERYANNOTATION'};
} else {
return undef;
} } |
sub QUERYSEQS
{ my ($self,$value) = @_;
if (defined $value) {
$self->{'_CONFIG_QUERYSEQS'} = $value;
}
if (exists($self->{'_CONFIG_QUERYSEQS'})) {
return $self->{'_CONFIG_QUERYSEQS'};
} else {
return undef;
} } |
sub QUERYTYPE
{ my ($self,$value) = @_;
if (defined $value) {
$self->{'_CONFIG_QUERYTYPE'} = $value;
}
if (exists($self->{'_CONFIG_QUERYTYPE'})) {
return $self->{'_CONFIG_QUERYTYPE'};
} else {
return undef;
} } |
sub database
{ my ($self, $database) = @_;
if ($database) {
$self->{_database} = $database;
}
return $self->{_database};
}
} |
sub fetch_input
{ my( $self) = @_;
my $logic = $self->analysis->logic_name;
my @db_files;
my @target_list = $self->GENOMICSEQS;
foreach my $target (@target_list){
if(ref $target eq 'ARRAY'){
my $dir = 0;
foreach my $alt_target (@$target){
if (-d $alt_target){
$dir = 1;
last;
}
}
if ($dir){
foreach my $chr_name ($self->get_chr_names) {
my $found = 0;
DIRCHECK:
foreach my $alt_target (@$target){
if (-s "$alt_target/$chr_name.fa") {
push @db_files, "$alt_target/$chr_name.fa";
$found = 1;
last DIRCHECK;
}
}
if(!$found){
warning( "Could not find fasta file for '$chr_name' in directories:\n".
join("\n\t", @$target)."\n");
}
}
}else{
foreach my $alt_target (@$target){
if (-s $alt_target){
push @db_files, $alt_target;
}
}
}
} else{
$target =~s/^\s+//; if (-e $target and -d $target) {
my %dir_contents;
opendir DIR, $target;
while(my $entry = readdir DIR) {
if ($entry ne '.' and $entry ne '..') {
$dir_contents{$entry} = 0;
}
}
closedir(DIR);
foreach my $chr ($self->get_chr_names) {
my $seq_fname = "$chr.fa";
if (-s "$target/$seq_fname") {
$dir_contents{$seq_fname}++;
push @db_files, "$target/$seq_fname";
} else {
warning( "Could not find fasta file for '$chr' in '$target'\n");
}
}
if (not grep { $dir_contents{$_} == 0 } keys %dir_contents) {
@db_files = ($target);
}
}
elsif (-e $target and -s $target) {
@db_files = ($target);
} else {
throw("'$target' refers to something that could not be made sense of");
}
}
}
my ($query_file, $chunk_number, $chunk_total);
my $query = $self->QUERYSEQS;
if (-e $query and -d $query) {
$query_file = "$query/" . $self->input_id;
if (not -e $query_file) {
throw( "Query file '$query_file' does not exist'\n");
}
}
elsif (-e $query and -s $query) {
$query_file = $query;
my $iid_regexp = $self->IIDREGEXP;
throw("When your input ids are not filenames, you must define ".
"IIDREGEXP in config to enable inference of chunk number and total")
if not defined $iid_regexp;
($chunk_number, $chunk_total) = $self->input_id =~ /$iid_regexp/;
} else {
throw("'$query' refers to something that could not be made sense of\n");
}
my %parameters = %{$self->parameters_hash};
if (not exists($parameters{-options}) and
defined $self->OPTIONS) {
$parameters{-options} = $self->OPTIONS
}
if (not exists($parameters{-coverage_by_aligned}) and
defined $self->COVERAGE_BY_ALIGNED) {
$parameters{-coverage_by_aligned} = $self->COVERAGE_BY_ALIGNED;
}
if (defined $self->PROGRAM && defined $self->analysis->program_file) {
if ($self->PROGRAM ne $self->analysis->program_file) {
throw("CONFLICT: You have defined -program in your config file and ".
"-program_file in your analysis table.");
}
}
foreach my $database ( @db_files ){
my $runnable = Bio::EnsEMBL::Analysis::Runnable::ExonerateTranscript
->new(
-program => $self->PROGRAM ? $self->PROGRAM : $self->analysis->program_file,
-analysis => $self->analysis,
-target_file => $database,
-query_type => $self->QUERYTYPE,
-query_file => $query_file,
-annotation_file => $self->QUERYANNOTATION ? $self->QUERYANNOTATION : undef,
-query_chunk_number => $chunk_number ? $chunk_number : undef,
-query_chunk_total => $chunk_total ? $chunk_total : undef,
%parameters,
);
$self->runnable($runnable);
}
}
} |
sub filter
{ my ($self, $val) = @_;
if ($val) {
$self->{_transcript_filter} = $val;
}
return $self->{_transcript_filter};
}
} |
sub genomic
{ my ($self, $seq) = @_;
if ($seq){
unless ($seq->isa("Bio::PrimarySeqI") || $seq->isa("Bio::SeqI")){
throw("query seq must be a Bio::SeqI or Bio::PrimarySeqI");
}
$self->{_genomic} = $seq ;
}
return $self->{_genomic};
}
} |
sub get_chr_names
{ my ($self) = @_;
my @chr_names;
my @chromosomes;
my $chr_adaptor = $self->db->get_SliceAdaptor;
if(defined($self->NONREF_REGIONS)){
@chromosomes = @{$chr_adaptor->fetch_all('toplevel', undef, 1)};
}
else{
@chromosomes = @{$chr_adaptor->fetch_all('toplevel')};
}
foreach my $chromosome ( @chromosomes ){
push( @chr_names, $chromosome->seq_region_name );
}
return @chr_names;
}
} |
sub get_output_db
{ my ($self) = @_;
my $outdb;
if ($self->OUTDB) {
if ( ref($self->OUTDB)=~m/HASH/) {
$outdb = new Bio::EnsEMBL::DBSQL::DBAdaptor(%{$self->OUTDB},
-dnadb => $self->db);
}else{
$outdb = $self->get_dbadaptor($self->OUTDB);
}
} else {
$outdb = $self->db;
}
return $outdb;
}
} |
sub make_genes
{ my ($self,@transcripts) = @_;
my (@genes);
my $slice_adaptor = $self->db->get_SliceAdaptor;
my %genome_slices;
foreach my $tran ( @transcripts ){
my $gene = Bio::EnsEMBL::Gene->new();
$gene->analysis($self->analysis);
$gene->biotype($self->analysis->logic_name);
my $slice_id = $tran->start_Exon->seqname;
if (not exists $genome_slices{$slice_id}) {
$genome_slices{$slice_id} = $slice_adaptor->fetch_by_name($slice_id);
}
my $slice = $genome_slices{$slice_id};
foreach my $exon (@{$tran->get_all_Exons}){
$exon->slice($slice);
foreach my $evi (@{$exon->get_all_supporting_features}){
$evi->slice($slice);
$evi->analysis($self->analysis);
}
}
foreach my $evi (@{$tran->get_all_supporting_features}) {
$evi->slice($slice);
$evi->analysis($self->analysis);
}
if (!$slice){
my ($sf);
if (@{$tran->get_all_supporting_features}) {
($sf) = @{$tran->get_all_supporting_features};
} else {
my @exons = @{$tran->get_all_Exons};
($sf) = @{$exons[0]->get_all_supporting_features};
}
print $sf->hseqname."\t$slice_id\n";
}
throw("Have no slice") if(!$slice);
$tran->slice($slice);
$gene->add_Transcript($tran);
push( @genes, $gene);
}
return @genes;
}
} |
sub new
{ my ($class,@args) = @_;
my $self = $class->SUPER::new(@args);
$self->read_and_check_config($EXONERATE_CONFIG_BY_LOGIC);
return $self; } |
sub query_seqs
{ my ($self, @seqs) = @_;
if( @seqs ) {
unless ($seqs[0]->isa("Bio::PrimarySeqI") || $seqs[0]->isa("Bio::SeqI")){
throw("query seq must be a Bio::SeqI or Bio::PrimarySeqI");
}
push( @{$self->{_query_seqs}}, @seqs);
}
return @{$self->{_query_seqs}};
}
} |
sub read_and_check_config
{ my $self = shift;
$self->SUPER::read_and_check_config($EXONERATE_CONFIG_BY_LOGIC);
my $logic = $self->analysis->logic_name;
foreach my $config_var (qw(QUERYSEQS
QUERYTYPE
GENOMICSEQS)) {
throw("You must define $config_var in config for logic '$logic'")
if not defined $self->$config_var;
}
throw("QUERYANNOTATION '" . $self->QUERYANNOTATION . "' in config must be readable")
if $self->QUERYANNOTATION and not -e $self->QUERYANNOTATION;
if ($self->FILTER) {
if (not ref($self->FILTER) eq "HASH" or
not exists($self->FILTER->{OBJECT}) or
not exists($self->FILTER->{PARAMETERS})) {
throw("FILTER in config fo '$logic' must be a hash ref with elements:\n" .
" OBJECT : qualified name of the filter module;\n" .
" PARAMETERS : anonymous hash of parameters to pass to the filter");
} else {
my $module = $self->FILTER->{OBJECT};
my $pars = $self->FILTER->{PARAMETERS};
(my $class = $module) =~ s/::/\//g;
eval{
require "$class.pm";
};
throw("Couldn't require ".$class." Exonerate2Genes:require_module $@") if($@);
$self->filter($module->new(%{$pars}));
}
} } |
sub run
{ my ($self) = @_;
my @results;
throw("Can't run - no runnable objects") unless ($self->runnable);
foreach my $runnable (@{$self->runnable}){
$runnable->run;
push ( @results, @{$runnable->output} );
}
if ($self->filter) {
my $filtered_transcripts = $self->filter->filter_results(\@results);
@results = @$filtered_transcripts;
}
my @genes = $self->make_genes(@results);
$self->output(\@genes);
}
} |
sub write_output
{ my ($self,@output) = @_;
my $outdb = $self->get_output_db;
my $gene_adaptor = $outdb->get_GeneAdaptor;
unless (@output){
@output = @{$self->output};
}
my $fails = 0;
my $total = 0;
foreach my $gene (@output){
eval {
$gene_adaptor->store($gene);
};
if ($@){
warning("Unable to store gene!!\n$@");
$fails++;
}
$total++;
}
if ($fails > 0) {
throw("Not all genes could be written successfully " .
"($fails fails out of $total)");
}
}
} |
General documentation
Post general queries to ensembl-dev@ebi.ac.uk
The rest of the documentation details each of the object methods.
Internal methods are usually preceded with a '_'