Bio::EnsEMBL::Analysis::RunnableDB
FindPartialGenes
Toolbar
Summary
Bio::EnsEMBL::Analysis::RunnableDB::FindPartialGenes ;
Package variables
No package variables defined.
Included modules
Inherit
Synopsis
my $orthologueanalysis = Bio::EnsEMBL::Analysis::RunnableDB::FindPartialGenes->new (
-analysis => $analysis_obj,
);
$orthologueanalysis->fetch_input();
$orthologueanalysis->run();
$orthologueanalysis->output();
$orthologueanalysis->write_output();
Description
This object uses information from a Ensembl Compara database to find partial
gene predictions in an organism. It's
used to fetch the input from different databases as well as writing results
the results to the database.a
Methods
fetch_input | No description | Code |
new | No description | Code |
partials | No description | Code |
read_and_check_config | No description | Code |
run | No description | Code |
write_output | No description | Code |
Methods description
None available.
Methods code
sub fetch_input
{ my( $self ) = @_;
$self->get_initial_geneset(
$$MAIN_CONFIG{QUERY_SPECIES},
$$FIND_PARTIAL_GENES{DEFAULT_GENE_BIOTYPES}
);
print "SUMMARY : " . scalar( @{$self->genes}) . " genes fetched\n" ; } |
sub new
{ my ($class,@args) = @_;
my $self = $class->SUPER::new(@args);
$self->{_partials}={};
return $self;
}
} |
sub partials
{ my ($self, $tsi, $tg1_homol, $longest_tg1_trans, $query_gene ) = @_ ;
if ( $tsi ) {
my $already_stored = 0 ;
if (!exists($self->{_partials}{$tsi})) {
$self->{_partials}{$tsi}{gene} = $tg1_homol;
$self->{_partials}{$tsi}{matches} = [];
$self->{_partials}{$tsi}{transcript} = [];
} else {
for my $longest_tr ( @{$self->{_partials}{$tsi}{transcript}} ) {
if ( $longest_tr eq $longest_tg1_trans ) {
$already_stored = 1 ;
}
}
}
unless ( $already_stored ) {
push @{$self->{_partials}{$tsi}{matches}}, $query_gene;
push @{$self->{_partials}{$tsi}{transcript}}, $longest_tg1_trans;
}
}
return $self->{_partials}; } |
sub read_and_check_config
{ (my $self) = @_ ;
$self->SUPER::read_and_check_config();
my %analysis_sets = %{$$FIND_PARTIAL_GENES{ANALYSIS_SETS}} ;
foreach my $logic_name ( keys %analysis_sets ) {
unless ( exists $$EXONERATE_CONFIG_BY_LOGIC{$logic_name} ) {
throw("You have defined a logic_name ".
"\"$logic_name\"\n in our OrthologueEvaluator.pm".
" configuration file but there is no configuration for such an analysis in the\n".
" Exonerate2Genes-config, so i don't know where to write the genes to.\n".
" I suggest to add a configuration for $logic_name to your ".
"Exoneate2Genes.pm config\n") ;
}
my %subsets = %{$analysis_sets{$logic_name}};
for my $species ( qw ( MAIN_ORTH TEST_SPEC_1 TEST_SPEC_2) ) {
print "analysing $subsets{$species}\n" ;
my $sa = Bio::EnsEMBL::Registry->get_adaptor($subsets{$species},'core',"Slice" ) ;
unless ( $sa ) {
throw ( " can't get adaptor for species $subsets{$species} - maybe typo in config ?");
}
}
my $analysis = $self->db->get_adaptor("Analysis")->fetch_by_logic_name($logic_name) ;
unless ( $analysis) {
throw("There's no analysis $logic_name in your database - this analysis is normally added by".
" the setup script\n" ) ;
}
}
}
1; } |
sub run
{ my ( $self ) = @_;
my $qy_species = $$MAIN_CONFIG{QUERY_SPECIES};
my $tg1_species = $$FIND_PARTIAL_GENES{ANALYSIS_SETS}{$self->post_logic_name}{MAIN_ORTH};
my $tg2_species = $$FIND_PARTIAL_GENES{ANALYSIS_SETS}{$self->post_logic_name}{TEST_SPEC_1};
my $tg3_species = $$FIND_PARTIAL_GENES{ANALYSIS_SETS}{$self->post_logic_name}{TEST_SPEC_2};
QUERY_GENES : for my $query_gene (@{$self->genes} ) {
my $tg1_homol = get_one2one_orth_for_gene_in_other_species($query_gene,$tg1_species);
if ($tg1_homol) {
my $tg2_homol = get_one2one_orth_for_gene_in_other_species($tg1_homol,$tg2_species);
my $tg3_homol = get_one2one_orth_for_gene_in_other_species($tg1_homol,$tg3_species);
if ($tg2_homol || $tg3_homol) {
my ($longest_qy,$len_longest_qy,$nexon_longest_qy) = get_transcript_with_longest_CDS($query_gene);
my ($longest_tg1,$len_longest_tg1,$nexon_longest_tg1) = get_transcript_with_longest_CDS($tg1_homol);
my ($longest_tg2,$len_longest_tg2,$nexon_longest_tg2) = get_transcript_with_longest_CDS($tg2_homol) if ($tg2_homol);
my ($longest_tg3,$len_longest_tg3,$nexon_longest_tg3) = get_transcript_with_longest_CDS($tg3_homol) if ($tg3_homol);
" " . $tg2_species ." " . (defined($tg2_homol) ? $len_longest_tg2 : "N/A") .
" " . $tg3_species ." " . (defined($tg3_homol) ? $len_longest_tg3 : "N/A") . "\n";
my $ratio_tg1_tg2 = 10;
my $ratio_tg1_tg3 = 10;
$ratio_tg1_tg2 = $len_longest_tg2/$len_longest_tg1 if ($tg2_homol); $ratio_tg1_tg3 = $len_longest_tg3/$len_longest_tg1 if ($tg3_homol);
my $ratio_tg1_qy = $len_longest_qy/$len_longest_tg1;
my $ratio_tg1_tg2_low = $$FIND_PARTIAL_GENES{ANALYSIS_SETS}{$self->post_logic_name}{RATIO_MAIN_vs_SPEC_1_LOW};
my $ratio_tg1_tg2_high = $$FIND_PARTIAL_GENES{ANALYSIS_SETS}{$self->post_logic_name}{RATIO_MAIN_vs_SPEC_1_HIGH};
my $ratio_tg1_tg3_low = $$FIND_PARTIAL_GENES{ANALYSIS_SETS}{$self->post_logic_name}{RATIO_MAIN_vs_SPEC_2_LOW};
my $ratio_tg1_tg3_high = $$FIND_PARTIAL_GENES{ANALYSIS_SETS}{$self->post_logic_name}{RATIO_MAIN_vs_SPEC_2_HIGH};
my $ratio_tg1_qy_75 = $$FIND_PARTIAL_GENES{ANALYSIS_SETS}{$self->post_logic_name}{RATIO_MAIN_vs_QUERY};
if ($ratio_tg1_tg2 > $ratio_tg1_tg2_low && $ratio_tg1_tg2 < $ratio_tg1_tg2_high ||
$ratio_tg1_tg3 > $ratio_tg1_tg3_low && $ratio_tg1_tg3 < $ratio_tg1_tg3_high ) {
if ($ratio_tg1_qy < $ratio_tg1_qy_75 ) {
print "Significant length difference for :" . $qy_species . "\t" . $longest_qy->stable_id .
"\t" . $query_gene->stable_id . " (" . ($query_gene->external_name ? $query_gene->external_name : '') . ")" .
" with " . $tg1_species . "\t" . $longest_tg1->stable_id .
"\t" . $tg1_homol->stable_id . " (" . $tg1_homol->external_name . ")\n";
my $transcript_stable_id = $longest_tg1->stable_id;
if ( $$FIND_PARTIAL_GENES{ANALYSIS_SETS}{$self->post_logic_name}{IGNORE_ORTHOLOGUES_ON_MT_REGIONS} ) {
if ( $tg1_homol->slice->seq_region_name =~m/MT/){
print "ignore MT regions is switched on\n " ;
print "\nIgnoring homolog : $transcript_stable_id 'cause it's on an MT region\n " ;
} else {
$self->partials($transcript_stable_id, $tg1_homol, $longest_tg1, $query_gene ) ;
}
} else {
$self->partials($transcript_stable_id, $tg1_homol, $longest_tg1, $query_gene ) ;
}
}
}
}
}
} } |
sub write_output
{ my ($self) = @_;
my %identified_partials = %{ $self->partials } ;
foreach my $id (keys %identified_partials) {
my $gene = $identified_partials{$id}{gene};
print "#desc stable_id chr start end strand cov".
" stable_id chr start end strand cov\n";
foreach my $match_gene (@{$identified_partials{$id}{matches}}) {
printf " %-9s %18s %3s %9d %9d %2d 0 %18s %3s %8d %8d %2d 0\n",
"NA",$gene->stable_id, $gene->slice->seq_region_name, $gene->start,
$gene->end, $gene->strand,
$match_gene->stable_id, $match_gene->slice->seq_region_name, $match_gene->start,
$match_gene->end, $match_gene->strand;
}
print "#----\n";
}
my @sequences_to_write ;
foreach my $id (keys %identified_partials) {
foreach my $longest_missing_trans (@{$identified_partials{$id}{transcript}}) {
push @sequences_to_write, $longest_missing_trans;
}
}
my @output ;
TRANSLATIONS: for my $trans ( @sequences_to_write ) {
if ( $trans->translation ) {
my $translate = $trans->translate;
my $transstr = $translate->seq() ;
$transstr =~ s/(.{1,60})/$1\n/g;
if ($transstr =~m/\*/ ) {
print "SKIPPING " . $trans->translation->stable_id . " : translation contain stop codon\n$transstr\n\n";
next TRANSLATIONS ;
}
$translate->display_id($trans->translation->stable_id) ;
push @output, $translate ;
}
}
my $written_files = $self->chunk_and_write_fasta_sequences( shuffle(\@output) ) ;
$self->upload_input_ids( $written_files ) ; } |
General documentation
Post general queries to ensembl-dev@ebi.ac.uk
The rest of the documentation details each of the object methods.
Internal methods are usually preceded with a '_'