Bio::EnsEMBL::Analysis::RunnableDB
WGA2GenesDirect
Toolbar
Summary
Bio::EnsEMBL::Analysis::RunnableDB::WGA2GenesDirect
Package variables
No package variables defined.
Included modules
Bio::EnsEMBL::Analysis::Config::WGA2GenesDirect
Bio::EnsEMBL::Analysis::Tools::WGA2Genes::GeneScaffoldDirect
Inherit
Synopsis
my $db = Bio::EnsEMBL::DBAdaptor->new($locator);
my $genscan = Bio::EnsEMBL::Analysis::RunnableDB::WGA2GenesDirect
->new (-db => $pipelinedb,
-input_id => $input_id
-analysis => $analysis );
$genscan->fetch_input();
$genscan->run();
$genscan->write_output(); #writes to stdout
Description
Methods
COMPARA_DB | No description | Code |
INPUT_METHOD_LINK_TYPE | No description | Code |
MAX_EXON_READTHROUGH_DIST | No description | Code |
MIN_COVERAGE | No description | Code |
QUERY_CORE_DB | No description | Code |
TARGET_CORE_DB | No description | Code |
TRANSCRIPT_FILTER | No description | Code |
_check_gene | No description | Code |
fetch_input | Description | Code |
filter | No description | Code |
gene | No description | Code |
genomic_align_block_chains | No description | Code |
good_transcripts | No description | Code |
new | Description | Code |
process_transcript | No description | Code |
read_and_check_config | No description | Code |
run | Description | Code |
target_slices | No description | Code |
write_output | Description | Code |
Methods description
Methods code
sub COMPARA_DB
{ my ($self, $db) = @_;
if (defined $db) {
$self->{_compara_db} = $db;
}
return $self->{_compara_db}; } |
sub INPUT_METHOD_LINK_TYPE
{ my ($self, $type) = @_;
if (defined $type) {
$self->{_input_method_link_type} = $type;
}
return $self->{_input_method_link_type}; } |
sub MAX_EXON_READTHROUGH_DIST
{ my ($self, $val) = @_;
if (defined $val) {
$self->{_max_ex_rt_dist} = $val;
}
return $self->{_max_ex_rt_dist}; } |
sub MIN_COVERAGE
{ my ($self, $val) = @_;
if (defined $val) {
$self->{_min_coverage} = $val;
}
return $self->{_min_coverage};
}
1; } |
sub QUERY_CORE_DB
{ my ($self, $db) = @_;
if (defined $db) {
$self->{_query_core_db} = $db;
}
return $self->{_query_core_db}; } |
sub TARGET_CORE_DB
{ my ($self, $db) = @_;
if (defined $db) {
$self->{_target_core_db} = $db;
}
return $self->{_target_core_db};
}
} |
sub TRANSCRIPT_FILTER
{ my ($self, $val) = @_;
if (defined $val) {
$self->{_transcript_filter} = $val;
}
return $self->{_transcript_filter}; } |
sub _check_gene
{ my ($self, $gene) = @_;
my @good_transcripts;
foreach my $t (@{$gene->get_all_Transcripts}){
if ((length($t->translateable_seq) % 3) == 0){
push @good_transcripts, $t;
}
else{
warn ("Gene ", $gene->display_id(), " contains no valid transcripts");
}
}
warn ("Gene ", $gene->display_id(), " contains no valid transcripts") if (scalar(@good_transcripts) == 0);
$self->good_transcripts(\@good_transcripts);
}
} |
sub fetch_input
{ my( $self) = @_;
my $input_id = $self->input_id;
throw("No input id") unless defined($input_id);
print "YOUR INPUT ID:",$input_id,"\n";
my $q_dbh = Bio::EnsEMBL::DBSQL::DBAdaptor->
new(%{$self->QUERY_CORE_DB});
my $t_dbh = Bio::EnsEMBL::DBSQL::DBAdaptor->
new(%{$self->TARGET_CORE_DB});
my $compara_dbh = Bio::EnsEMBL::Compara::DBSQL::DBAdaptor->
new(%{$self->COMPARA_DB});
my $query_species =
$q_dbh->get_MetaContainerAdaptor->get_Species->binomial;
my $target_species =
$t_dbh->get_MetaContainerAdaptor->get_Species->binomial;
my $gdb_adap = $compara_dbh->get_GenomeDBAdaptor;
my $q_gdb = $gdb_adap->fetch_by_name_assembly($query_species);
my $t_gdb = $gdb_adap->fetch_by_name_assembly($target_species);
my ($q_assembly_version, $t_assembly_version);
eval {
$q_assembly_version = $q_dbh->get_CoordSystemAdaptor->
fetch_by_name('toplevel',
$q_gdb->assembly);
$t_assembly_version = $t_dbh->get_CoordSystemAdaptor->
fetch_by_name('toplevel',
$t_gdb->assembly);
};
$@ and do {
throw("Had trouble fetching coord systems for ".
$q_gdb->assembly . " and " .
$t_gdb->assembly . " from core dbs: $@");
};
my $gene = $q_dbh->get_GeneAdaptor->fetch_by_stable_id($input_id);
$self->_check_gene($gene);
$self->gene($gene);
my $mlss = $compara_dbh->get_MethodLinkSpeciesSetAdaptor
->fetch_by_method_link_type_GenomeDBs($self->INPUT_METHOD_LINK_TYPE,
[$q_gdb, $t_gdb]);
throw("No MethodLinkSpeciesSet for :\n" .
$self->INPUT_METHOD_LINK_TYPE . "\n" .
$query_species . "\n" .
$target_species)
if not $mlss;
my $dnafrag = $compara_dbh->get_DnaFragAdaptor->
fetch_by_GenomeDB_and_name($q_gdb,
$self->gene->slice->seq_region_name);
my $gaba = $compara_dbh->get_GenomicAlignBlockAdaptor;
my $gen_al_blocks =
$gaba->fetch_all_by_MethodLinkSpeciesSet_DnaFrag($mlss,
$dnafrag,
$gene->start,
$gene->end);
my (%chains, @chains);
foreach my $block (@$gen_al_blocks) {
my $qga = $block->reference_genomic_align;
my ($tga) = @{$block->get_all_non_reference_genomic_aligns};
if (not exists $self->target_slices->{$tga->dnafrag->name}) {
$self->target_slices->{$tga->dnafrag->name} =
$t_dbh->get_SliceAdaptor->fetch_by_region('toplevel',
$tga->dnafrag->name);
}
if ($block->reference_genomic_align->dnafrag_strand < 0) {
$block->reverse_complement;
}
push @{$chains{$block->group_id}}, $block;
}
foreach my $chain_id (keys %chains) {
push @chains, [
sort {
$a->reference_genomic_align->dnafrag_start <=>
$b->reference_genomic_align->dnafrag_start;
} @{$chains{$chain_id}}
];
}
$self->genomic_align_block_chains(\@chains);
}
} |
sub filter
{ my ($self, $val) = @_;
if ($val) {
$self->{_filter} = $val;
}
return $self->{_filter}; } |
sub gene
{ my ($self, $val) = @_;
if (defined $val) {
$self->{_gene_recs} = $val;
}
return $self->{_gene_recs}; } |
sub genomic_align_block_chains
{ my ($self, $val) = @_;
if (defined $val) {
$self->{_gen_al_chains} = $val;
}
return $self->{_gen_al_chains}; } |
sub good_transcripts
{ my ($self, $val) = @_;
if (defined $val) {
$self->{_good_transcripts} = $val;
}
return $self->{_good_transcripts}; } |
sub new
{ my ($class,@args) = @_;
my $self = $class->SUPER::new(@args);
$self->read_and_check_config($WGA2GENES_CONFIG_BY_LOGIC);
return $self;
}
} |
sub process_transcript
{ my ($self,
$tran,
$source_id
) = @_;
return 0 if not $tran;
my (@processed_transcripts);
my @exons = @{$tran->get_all_Exons};
my ($tsf) = @{$tran->get_all_supporting_features};
my $pep = $tran->translate->seq;
if (CORE::length($pep) == 0) {
logger_info("Rejecting proj of $source_id because was just a stop codon");
return 0;
}
$tran = replace_stops_with_introns($tran);
return $tran;
}
} |
sub read_and_check_config
{ my ($self, $hash) = @_;
$self->SUPER::read_and_check_config($hash);
my $logic = $self->analysis->logic_name;
foreach my $var (qw(INPUT_METHOD_LINK_TYPE
QUERY_CORE_DB
TARGET_CORE_DB
COMPARA_DB)) {
throw("You must define $var in config for logic '$logic'" .
" or in the DEFAULT entry")
if not $self->$var;
}
if ($self->TRANSCRIPT_FILTER) {
if (not ref($self->TRANSCRIPT_FILTER) eq "HASH" or
not exists($self->TRANSCRIPT_FILTER->{OBJECT}) or
not exists($self->TRANSCRIPT_FILTER->{PARAMETERS})) {
throw("FILTER in config foR '$logic' must be a hash ref with elements:\n" .
" OBJECT : qualified name of the filter module;\n" .
" PARAMETERS : anonymous hash of parameters to pass to the filter");
} else {
my $module = $self->TRANSCRIPT_FILTER->{OBJECT};
my $pars = $self->TRANSCRIPT_FILTER->{PARAMETERS};
(my $class = $module) =~ s/::/\//g;
eval{
require "$class.pm";
};
throw("Couldn't require ".$class." Exonerate2Genes:require_module $@") if($@);
$self->filter($module->new(%{$pars}));
}
}
}
} |
sub run
{ my ($self) = @_;
my @res_tran;
my $tran_stable_id;
my @final_tran;
print scalar(@{$self->genomic_align_block_chains}),"\n";
foreach my $chain (@{$self->genomic_align_block_chains}) {
my $gene_scaffold = Bio::EnsEMBL::Analysis::Tools::WGA2Genes::GeneScaffold->new(
-genomic_align_blocks => $chain,
-from_slice => $self->gene->slice,
-to_slices => $self->target_slices,
-transcripts => $self->good_transcripts,
-max_readthrough_dist => $self->MAX_EXON_READTHROUGH_DIST,
-direct_target_coords => 1,
);
foreach my $tran (@{$self->good_transcripts}) {
$tran_stable_id = $tran->stable_id;
my $proj_trans =
$gene_scaffold->place_transcript($tran);
if ($proj_trans) {
push @res_tran, $proj_trans;
}
}
}
if ($self->TRANSCRIPT_FILTER){
@res_tran = @{$self->filter->filter_results(\@res_tran)};
}
foreach my $res_tran (@res_tran){
$res_tran = $self->process_transcript($res_tran,
$tran_stable_id);
push @final_tran, $res_tran;
}
print "At the end of RUN, we had ", scalar(@final_tran), " transcripts\n";
$self->output(\@final_tran);
}
} |
sub target_slices
{ my ($self, $val) = @_;
if (defined $val) {
$self->{_target_slices} = $val;
}
if (not exists $self->{_target_slices}) {
$self->{_target_slices} = {};
}
return $self->{_target_slices};
}
} |
sub write_output
{ my ($self) = @_;
my $trans_count = 0;
my $t_dbh = Bio::EnsEMBL::DBSQL::DBAdaptor->
new(%{$self->TARGET_CORE_DB});
my $t_gene_adaptor = $t_dbh->get_GeneAdaptor();
foreach my $t (@{$self->output}) {
$t->analysis($self->analysis);
my $gene = Bio::EnsEMBL::Gene->new( -analysis => $self->analysis,
-biotype => 'protein_coding',);
foreach my $tsf ( @{ $t->get_all_supporting_features }){
$tsf->analysis($self->analysis);
}
foreach my $exon (@{$t->get_all_Exons()}){
$exon->analysis($self->analysis);
foreach my $esf (@{$exon->get_all_supporting_features()}){
$esf->analysis($self->analysis);
}
}
$gene->add_Transcript($t);
$t_gene_adaptor->store($gene);
$trans_count++;
print "TRANSCRIPT:\n";
foreach my $e (@{$t->get_all_Exons}) {
printf("%s\tEnsembl\tExon\t%d\t%d\t%d\t%d\t%d\n", $e->slice->seq_region_name, $e->start, $e->end, $e->strand, $e->phase, $e->end_phase);
}
my $seqio = Bio::SeqIO->new(-format => 'fasta',
-fh =>\* STDOUT);
$seqio->write_seq($t->translate);
}
print "For gene " . $self->input_id . " you stored ", $trans_count, " transcripts\n";
}
} |
General documentation
No general documentation available.