Bio::EnsEMBL::Analysis::RunnableDB
FindFalseIntrons
Toolbar
Summary
Bio::EnsEMBL::Analysis::RunnableDB::FindFalseIntrons
Package variables
No package variables defined.
Included modules
Inherit
Synopsis
my $fs = Bio::EnsEMBL::Analysis::RunnableDB::FindFalseIntrons->new(
-analysis => $analysis_obj,
# );
$fs->fetch_input();
$fs->run();
$fs->output();
$fs->write_output();
Description
This object wraps Bio::EnsEMBL::Analysis::Runnable::OrthologueEvaluator and is
used to fetch the input from different databases as well as writing results
the results to the database.a
Configuration used to run this module is contained in the following files :
Bio::EnsEMBL::Analysis::Config::GeneBuild::ExamineGeneSets;
Bio::EnsEMBL::Analysis::Config::GeneBuild::OrthologueEvaluator;
The analysis FindFalseIntrons uses input-ids of the following format :
Coordsystem:Assembly:Seq_region_name:Start:End:Strand
chromosome:BROADD2:12:1333:890890:1
chromosome:BROADD2:3:30434100:30529400:1
Methods
AUTOLOAD | No description | Code |
exonerate_intron_vs_exons | No description | Code |
exonerate_intron_vs_homologs | No description | Code |
exonerate_sequences | No description | Code |
fetch_input | No description | Code |
get_coords | No description | Code |
get_homologues_genes | No description | Code |
get_recover_ids | No description | Code |
get_unique_introns_of_transcripts | No description | Code |
intron_aligns_alternative_transcript | No description | Code |
new | No description | Code |
read_and_check_config | No description | Code |
reformat_input_ids | No description | Code |
run | No description | Code |
Methods description
None available.
Methods code
sub AUTOLOAD
{ my ($self,$val) = @_;
(my $routine_name=$AUTOLOAD)=~s/.*:://; #trim package name $self->{$routine_name}=$val if $val ; return $self->{$routine_name} ; } |
sub exonerate_intron_vs_exons
{ my ($intron_to_check , $homology_trans, $intron_id, $transcript_of_intron ) = @_ ;
print "INTRON_EXONERATING to identify CODING exons in homolog.....................\n";
unless ( $intron_id) {
$intron_id = $intron_to_check->display_id ;
}
my $out_dir = "/tmp";
my $f1= $out_dir."/".$intron_id.".query" ;
my @alignment ;
open (FH,">$f1") || die "Cant write file $f1\n" ;
my $seq = $intron_to_check->seq;
$seq=~ s/(.{1,60})/$1\n/g;
print FH ">Intron $intron_id\n" . $seq;
close(FH) ;
my %aligning_hom_exons ;
my @homology_exons = @{$homology_trans->get_all_translateable_Exons};
for my $hom_exon ( @homology_exons ) {
my $f2= $out_dir."/".$hom_exon->stable_id .".homology_trans"; my $f3= $out_dir."/".$hom_exon->stable_id .".exonerate";
open (FH,">$f2") || die "Cant write file $f2\n" ;
($seq=$hom_exon->seq->seq )=~ s/(.{1,60})/$1\n/g;
print FH ">".$hom_exon->stable_id . "\n$seq";
close(FH) ;
my $exonerate_version ="/usr/local/ensembl/bin/exonerate-1.0.0" ;
my $cmd = "$exonerate_version -q $f1 -t $f2 -m affine:local --bestn 1 ";
$cmd .=" > $f3";
system("$cmd") ;
my $hit = 0 ;
my $query_length =0 ;
my $raw_score ;
open (F,"$f3") || die " can't read $f3\n" ;
foreach my $l (<F>){
if ($l=~/C4 Alignment:/) {
$hit = 1;
$l = "INTRON vs CDS-EXON-HOMOLOG " . $l ;
print "\nINTRON vs CDS-EXON-HOMOLOG\n";
}
push @alignment , $l if $hit ;
if ( $l=~m/Query range:/){
my @it = split/\s+/,$l ;
if ( $it[5]>$it[3]) {
$query_length = ($it[5] - $it[3])+1 ;
} else {
$query_length = ($it[3] - $it[5])+1 ;
}
}
}
close(F);
if ( $hit ) {
print "\ntest-intron aligns vs coding exon : " . $transcript_of_intron->stable_id.
" [ ". $intron_to_check->prev_Exon->stable_id." <intron> " .$intron_to_check->next_Exon->stable_id . "] ALIGNS "
. $hom_exon->stable_id ." ] QLEN $query_length iLEN " . $intron_to_check->length."\n\n" ;
$aligning_hom_exons{$hom_exon->stable_id} = [ $hom_exon , $query_length] ;
}else {
print "intron does not match exon " . $hom_exon->stable_id . "\n" ;
}
system("rm $f2"); system("rm $f3");
}
system("rm $f1");
return\% aligning_hom_exons ; } |
sub exonerate_intron_vs_homologs
{ my ($self,$intron_to_check, $gene, $i2t, $transcript_of_intron ) = @_ ;
my %intron_aligns_in_x_homologues ;
my %i2t = %{$i2t};
my @aligned_homolog_transcripts ;
my @homo_genes = @{ $self->get_homologues_genes($gene) } ;
if ( scalar(@homo_genes) == 0 ) {
print "no homologs found\n" ;
}
HOMOLOG: for my $homolog_gene ( @homo_genes ) {
print $homolog_gene . "\t" . $homolog_gene->stable_id . "\n" ;
my @introns_in_homolog_gene ;
TRANS: for my $ht ( @{$homolog_gene->get_all_Transcripts } ) {
push @introns_in_homolog_gene, @{$ht->get_all_Introns};
}
TRANS: for my $homology_trans ( @{$homolog_gene->get_all_Transcripts } ) {
my $transcript_of_intron = $i2t{$intron_to_check};
my $intron_id = $transcript_of_intron->stable_id."_".$intron_to_check->prev_Exon->stable_id."_".$intron_to_check->next_Exon->stable_id;
my $query_length = exonerate_sequences($intron_to_check, $homology_trans , $intron_id, $transcript_of_intron);
if ( $query_length > 0 ) {
push @aligned_homolog_transcripts, $homology_trans ;
TRANSCRIPTS: for my $tr ( @{$gene->get_all_Transcripts} ) {
my $tr2 = $transcript_of_intron->stable_id ;
if ( $tr->stable_id =~m/$tr2/) {
next TRANSCRIPTS ;
}
my @exons = @{$tr->get_all_Exons} ;
for my $e ( @exons ) {
if ( $e->overlaps($intron_to_check)){
print "\nExon in alternative transcript overlaps FalseIntron - Intron will not be recovered.\n";
print "Exon : " . $e->stable_id ." ( " . $tr->stable_id . ") overlaps_intron in $intron_id\n" ;
print "Positive: " . $tr->stable_id . " vs " . $transcript_of_intron->stable_id . "\n" ;
$self->intron_aligns_alternative_transcript($intron_to_check, $transcript_of_intron, $e, $tr, $homology_trans ) ;
}
}
}
my %hom_coding_alinging_exons = %{ exonerate_intron_vs_exons($intron_to_check, $homology_trans, $intron_id, $transcript_of_intron) } ;
print "exonerating done\n " ;
for my $exon_stable_id ( keys %hom_coding_alinging_exons ) {
my ( $exon, $query_length ) = @{$hom_coding_alinging_exons{$exon_stable_id}} ;
for my $hom_trans_intron ( @introns_in_homolog_gene ) {
if ( $hom_trans_intron->overlaps($exon) ) {
print "\nXXXXXXXX\nIntron overlaps " . $exon->stable_id . " - better not recover this as it's\n XXXXXXXXX\n" ;
print $hom_trans_intron->prev_Exon->stable_id . " " .
$hom_trans_intron->next_Exon->stable_id . " " .
$exon->stable_id . "\n" ;
print "\nXXXXXXXXXXXXXX\n" ;
}
}
}
}
} } return (\@aligned_homolog_transcripts) ;
}
} |
sub exonerate_sequences
{ my ($intron_to_check , $target, $intron_id, $transcript_of_intron,$version) = @_ ;
unless ( $intron_id) {
$intron_id = $intron_to_check->display_id ;
}
my $out_dir = "/tmp" ;
my $f1= $out_dir."/".$intron_id.".query" ; my $f2= $out_dir."/".$target->stable_id .".target"; my $f3= $out_dir."/".$target->stable_id .".exonerate";
open (FH,">$f1") || die "Cant write file $f1\n" ;
my $seq = $intron_to_check->seq;
$seq=~ s/(.{1,60})/$1\n/g;
print FH ">Intron $intron_id\n" . $seq;
close(FH) ;
open (FH,">$f2") || die "Cant write file $f2\n" ;
($seq=$target->translateable_seq )=~ s/(.{1,60})/$1\n/g;
print FH ">".$target->stable_id . "\n$seq";
close(FH) ;
my @versions = qw ( 1.0.0 ) ;
my @options = ( " " , " --exhaustive " ) ;
my $query_length =0 ;
my $cmd ;
for my $version ( @versions ) {
for my $extra_opt ( @options ) {
my $exonerate_version ;
if ( $version =~m/0.7.1/ || $version=~m/1.0.0/) {
$exonerate_version = "/usr/local/ensembl/bin/exonerate-$version" ;
} else {
$exonerate_version = "/software/ensembl/bin/exonerate-1.4.0 " ;
}
my @alignment ;
$cmd = $exonerate_version . " -q $f1 -t $f2 -m affine:local --bestn 1 ";
$cmd .= $extra_opt ;
$cmd.=" > $f3";
system("$cmd") ;
my $hit = 0 ;
my $raw_score ;
open (F,"$f3") || die " can't read $f3\n" ;
foreach my $l (<F>){
$hit = 1 if ($l=~/C4 Alignment:/) ;
push @alignment , $l if $hit ;
if ( $l=~m/Query range:/){
my @it = split/\s+/,$l ;
if ( $it[5]>$it[3]) {
$query_length = ($it[5] - $it[3])+1 ;
} else {
$query_length = ($it[3] - $it[5])+1 ;
}
}
}
close(F);
if ( $hit ) {
print "hit_with $version $extra_opt\n $cmd" ;
print "\nIntron_exonerate_alignment_OK : " . $transcript_of_intron->stable_id." [ ". $intron_to_check->prev_Exon->stable_id." <intron> "
.$intron_to_check->next_Exon->stable_id . "] ALIGNS " . $target->stable_id ." ] QLEN $query_length iLEN " . $intron_to_check->length."\n\n" ;
print $intron_id . "aligns perfect\n " if ( $intron_to_check->length == $query_length );
system("rm $f1"); system("rm $f2"); system("rm $f3");
return $query_length ;
} else {
}
} } return $query_length ; } |
sub fetch_input
{ my( $self ) = @_;
print "\n\nChecking introns for the species configured in ".
" Config/GeneBuild/OrthologEvaluator.pm : $$MAIN_CONFIG{QUERY_SPECIES}\n" ;
$self->cmp_species( $$MAIN_CONFIG{QUERY_SPECIES} );
print "getting adaptor from registry ... " . $self->cmp_species . "\n" ;
my $ga = Bio::EnsEMBL::Registry->get_adaptor($self->cmp_species,"core","gene");
unless ( $ga ) {
print " I can't get a gene-adaptor for " . $self->cmp_species . "\n" .
" Maybe you forgot to add the alias for this to your compara registry file\n" ;
exit(0);
}
print "==> Fetching gene from db : " . $ga->db->dbname . "\@ " . $ga->db->host .
" : ". $ga->db->port ."\n" ;
my $sa = Bio::EnsEMBL::Registry->get_adaptor($self->cmp_species,"core","slice");
throw("Can't get GeneAdaptor from Registry") unless $ga ;
my @array = split(/:/,$self->input_id) ;
my $slice ;
my @genes_for_stats;
my @limit_to_biotypes = @{$$FIND_FALSE_INTRONS{DEFAULT_GENE_BIOTYPES}};
if(scalar(@array) < 3 || scalar(@array) > 6) {
print "Fetching all genes in genome - you can also supply a slice name to limit to a certain slice ...\n" ;
if ( @limit_to_biotypes ) {
print "Limiting analysis to biotypes : ";
for ( @limit_to_biotypes ){
print "$_\t" ;
push @genes_for_stats, @{$ga->fetch_all_by_biotype($_)} ;
}
print "\n";
}else{
@genes_for_stats = @{$ga->fetch_all};
}
}else {
my $slice = $sa->fetch_by_name ( $self->input_id) ;
print "==> Getting genes on slice ".$self->input_id . "\n" ;
if ( @limit_to_biotypes ){
print "==> Limiting analysis to biotypes :\n";
for my $bt ( @limit_to_biotypes){
print "\t- $bt\n" ;
push @genes_for_stats, @{$slice->get_all_Genes_by_type($bt)} ;
}
print "\n\n";
}else{
print "==> using all biotypes\n " ;
push @genes_for_stats, @{$slice->get_all_Genes};
}
}
my $short = 0 ;
if ( $short ) {
print "SHORT==1 ONLY RUNNING ON SUBSET !!!!!!!!!!!!!\n";
@genes_for_stats = splice(@genes_for_stats,0,10);
}
print "==> have ".scalar(@genes_for_stats)." genes fetched for ".$self->cmp_species()."\n";
$self->genes(\@genes_for_stats) ;
}
use vars '$AUTOLOAD'; } |
sub get_coords
{ my ($gene) = @_ ;
my ($start, $end, $chr ) ;
if ( ($gene->seq_region_start - 50000 ) < 0 ) {
$start = $gene->slice->start ;
} else {
$start = $gene->seq_region_start - 50000 ;
}
if (($gene->seq_region_start + 50000 ) > $gene->slice->end) {
$end = $gene->slice->length ;
} else {
$end = $gene->seq_region_end + 50000 ;
}
$chr = $gene->slice->seq_region_name ;
return ( $start, $end, $chr ) ; } |
sub get_homologues_genes
{ my ($self, $gene) = @_ ;
my @specs_to_check = @{$$FIND_FALSE_INTRONS{ANALYSIS_SETS}{$self->analysis->logic_name}} ;
my @homologs;
my @hg_2 ;
SPECIES: foreach my $other_species ( @specs_to_check ) {
print "getting homolog for $other_species\n" ;
my $hg = get_one2one_orth_for_gene_in_other_species($gene, $other_species) ;
if ( $hg ) {
if ( $hg->biotype=~m/protein_coding/ ) {
push @hg_2 , $hg;
}
}
}
return\@ hg_2; } |
sub get_recover_ids
{ my ($self, $gene, $t_long, $aref ) = @_ ;
my $g_sid = $gene->stable_id ? $gene->stable_id : "NO_ID_".$gene->dbID ;
my %recover_ids ;
my ($start, $end , $chr ) = get_coords($gene) ;
my $cs = $gene->slice->coord_system->name();
my $csv = $gene->slice->coord_system->version();
unless ( $csv ) {
warning("Error: Could not get coord_system_version for coord_system with name : ".$cs ."\n");
}
for my $homolog ( @$aref ) {
$recover_ids{"$cs:$csv:$chr:$start:$end:1:".$t_long->stable_id.":".$homolog->stable_id}=1;
}
return [keys %recover_ids] ; } |
get_unique_introns_of_transcripts | description | prev | next | Top |
sub get_unique_introns_of_transcripts
{ my ( $g ) = @_;
my (%unique_introns, %i2t) ;
for my $tr ( @{$g->get_all_Transcripts}){
my @tr_exons = @{ $tr->get_all_translateable_Exons};
if ( $tr->seq_region_strand == 1) {
@tr_exons = sort { $a->seq_region_start <=> $b->seq_region_start } @tr_exons ;
} else {
@tr_exons = sort { $b->seq_region_start <=> $a->seq_region_start } @tr_exons ;
}
my $intron_count = 0 ;
foreach ( my $i=0; $i < scalar(@tr_exons) ; $i++) {
if ( $i+1 < scalar(@tr_exons) ){
$intron_count++;
my $intron = new Bio::EnsEMBL::Intron( $tr_exons[$i], $tr_exons[$i+1] ) ;
$i2t{$intron} = $tr;
my $intron_hkey = $intron->slice->name."-".$intron->seq_region_start."-".
$intron->seq_region_end."-".$intron->seq_region_strand;
unless ( exists $unique_introns{$intron_hkey}) {
$unique_introns{$intron_hkey}=$intron ;
}
} } } return (\% unique_introns,\% i2t) ; } |
intron_aligns_alternative_transcript | description | prev | next | Top |
sub intron_aligns_alternative_transcript
{ my ($self, $false_intron, $false_transcript, $overlapping_exon , $alt_transcript, $homology_trans ) = @_ ;
${$self->{_coding_ex_in_alt_tr_which_aligns_false_intron}}{$false_intron}{false_intron}= $false_intron;
${$self->{_coding_ex_in_alt_tr_which_aligns_false_intron}}{$false_intron}{overlapping_coding_exon}= $overlapping_exon;
push @{${ $self->{_false_introns_alt_tr}}{$false_intron}}, $alt_transcript;
push @{${ $self->{_intron_coding_in_hom}}{$false_intron}}, $homology_trans;
push @{ ${$self->{_ftr_false_introns}} {$false_transcript->stable_id} }, $false_intron ; } |
sub new
{ my ($class,@args) = @_;
my $self = $class->SUPER::new(@args);
$self->read_and_check_config();
$self->orthologues_species( $$FIND_FALSE_INTRONS{ANALYSIS_SETS}{$self->analysis->logic_name} ) ;
return $self;
}
} |
sub read_and_check_config
{ (my $self) = @_ ;
throw("Your compara-registry-file LOCATION_OF_COMPARA_REGISTRY_FILE does not exist !!".
"\nCheck your config !")
unless ( -e $$MAIN_CONFIG{LOCATION_OF_COMPARA_REGISTRY_FILE} ) ;
print STDERR "reading compara-config file : $$MAIN_CONFIG{LOCATION_OF_COMPARA_REGISTRY_FILE}\n";
Bio::EnsEMBL::Registry->load_all($$MAIN_CONFIG{LOCATION_OF_COMPARA_REGISTRY_FILE},1,1);
my @dba = @{Bio::EnsEMBL::Registry->get_all_DBAdaptors()};
my %tmp;
for my $a ( @dba ) {
push @{$tmp{ $a->get_MetaContainer->get_schema_version }} , $a->dbname ;
}
if ( keys %tmp > 1 ) {
warning("You're using databases with different schemas - this can cause problems ...\n" ) ;
for ( keys %tmp ) {
print "schema $_:\n". join("\n" , @{$tmp{$_}} ) . "\n\n" ;
}
}
for my $a ( @dba ) {
$a->disconnect_when_inactive(1);
}
}
} |
sub reformat_input_ids
{ my ($input_id_aref ) = @_ ;
my @new_ids ;
my %tmp;
for my $id ( @{$input_id_aref} ) {
my @components = split /\:/,$id ;
my $sf_id = pop @components ;
my $hom_stable_id = pop @components ;
my $key = join ( ":", @components ) ;
push @{$tmp{$key}{$hom_stable_id}}, $sf_id ;
}
for my $region_key ( keys %tmp ) {
my %sf_id_index ;
print "processing region and transcript : ";
print $region_key . "\n" ;
my %hom_to_sf_id = %{$tmp{$region_key}} ;
for my $stable_id ( keys %hom_to_sf_id ) {
print $stable_id . "\t" ;
my @sf_ids = @{$hom_to_sf_id{ $stable_id}};
for my $sf_id ( @sf_ids ) {
print "$sf_id\t" ;
$sf_id_index{$sf_id} = 1;
}
print "\n" ;
}
my $sf_db_id_string = "";
my $counter = 0 ;
for my $id ( sort keys %sf_id_index ) {
$sf_db_id_string .= $id .",";
$sf_id_index{$id}=$counter ;
print $id . " --> " . $counter . "\n" ;
$counter++;
}
my $hom_string ="";
my $sf_string ="";
for my $stable_id ( keys %hom_to_sf_id ) {
$hom_string .=$stable_id."," ;
for my $sf_id ( sort @{$hom_to_sf_id{ $stable_id}} ) {
print "\t$sf_id\t" ;
$sf_string .= $sf_id_index{$sf_id}. ",";
}
print "\n";
$sf_string =~s/,$//; $sf_string .= "_";
}
$sf_string=~s/_$//; $sf_string=~s/,$//; $sf_db_id_string=~s/,$//; $hom_string =~s/,$//; push @new_ids , $region_key . ":" . $hom_string .":" . $sf_string . ":" . $sf_db_id_string ;
}
return\@ new_ids ; } |
sub run
{ my ($self) = @_;
my @genes = @{$self->genes} ;
my $cnt =0 ;
my %input_ids_to_upload ;
my %transcripts_marked_for_deletion;
my %intron_attributes;
GENES: for my $g ( @genes ) {
$cnt++;
print "\n\nprocessing gene-nr $cnt / ".scalar(@genes) . " : " . $g->stable_id . "\n" ;
print "="x80; print "\n";
my ($t_long,$cdsl, $n_exon_max) = get_transcript_with_longest_CDS($g);
unless ( $t_long) {
print " longest transcript does not exist\n" ;
next GENES;
}
my ( $unique_introns , $i2t ) = get_unique_introns_of_transcripts ( $g ) ;
my %unique_introns = %{$unique_introns} ; ;
my %i2t = %{$i2t};
$self->{_false_introns_alt_tr} = {} ;
$self->{_ftr_false_introns} = {} ;
$self->{_coding_ex_in_alt_tr_which_aligns_false_intron} = {};
my %intron_is_coding_in_homologues ;
for my $intron ( values %unique_introns ) {
if ( $intron->length < 150 ) {
print "\nINTRON : " . $intron->length . "< 150\n========================================\n";
my $aligned_homolog_trans = $self->exonerate_intron_vs_homologs( $intron, $g, $i2t);
if ( scalar( @$aligned_homolog_trans) > 0 ) {
print "INTRON_IS_CODING_IN_HOMOLOGUES : " . join ("," , map { $_->stable_id } @$aligned_homolog_trans ) . "\n\n" ;
$intron_is_coding_in_homologues{$intron} = $aligned_homolog_trans ;
my @exonerateVSGenwise_ids = @{$self->get_recover_ids( $g,$t_long, $aligned_homolog_trans)} ;
my @iid = split /\:/, $self->input_id ;
my $my_slice = $self->db->get_SliceAdaptor->fetch_by_region(undef,$iid[2]) ;
my $feature = Bio::EnsEMBL::SimpleFeature->new(
-seq_region_start => $intron->seq_region_start ,
-seq_region_end => $intron->seq_region_end,
-start => $intron->seq_region_start ,
-end => $intron->seq_region_end,
-strand=> $intron->seq_region_strand,
-analysis => $self->analysis() ,
-score => 0,
-display_label => $intron->prev_Exon->stable_id."_".$intron->next_Exon->stable_id ,
-slice => $my_slice,
) ;
$intron_attributes{$intron}{feature} = $feature ;
$intron_attributes{$intron}{input_ids} = $self->get_recover_ids( $g,$t_long, $aligned_homolog_trans) ;
} else {
print "no hit - intron did not align\n" ;
}
print "\n\n" ;
} else {
}
}
print "Summary:\n=====================\n" ;
my %alt_trans_ex_coding;
my %false_introns = %{ $self->{_false_introns_alt_tr} } ;
for my $k ( keys %false_introns ) {
for my $tr ( @{$false_introns{$k}} ) {
$alt_trans_ex_coding{$tr->stable_id}++ ;
}
}
print "\n" ;
foreach my $tr_coding_stable_id ( keys %alt_trans_ex_coding ) {
print "ALT_CODING_TR : $tr_coding_stable_id\t$alt_trans_ex_coding{$tr_coding_stable_id} introns recovered\n" ;
}
print "\n" ;
my %false_transcripts= %{ $self->{_ftr_false_introns} } ;
for my $f_tsi ( keys %false_transcripts) {
print "FalseTranscript " . $f_tsi ."\t: ".scalar(@{$false_transcripts{$f_tsi}})." possible_false_introns.\n" ;
my $nr_false_introns_recovered = 0 ;
for my $false_intron ( @{$false_transcripts{$f_tsi }} ) {
print "\tFI coding in HOMOLOGUES: ".join ("," , map { $_->stable_id } @{$intron_is_coding_in_homologues{$false_intron}}) . "\n";
print "\tFI coding in alt.Trans : ".join (" ,", map { $_->stable_id } @{${$self->{_false_introns_alt_tr}}{$false_intron}} ) ;
my %fi = %{ $self->{_coding_ex_in_alt_tr_which_aligns_false_intron}} ;
print "\tFI-start : " . $fi{$false_intron}{false_intron}->seq_region_start . "\t" ;
print " FI-end : " . $fi{$false_intron}{false_intron}->seq_region_end . "\t" ;
print "\toverlapping_coding_ex_in_alt_tr: ". $fi{$false_intron}{overlapping_coding_exon}->stable_id . "\n\n" ;
$nr_false_introns_recovered++;
}
for my $false_intron ( @{$false_transcripts{$f_tsi }} ) {
my $delete_recovery_id = 0 ;
if ( (scalar( @{$false_transcripts{$f_tsi }}) == $nr_false_introns_recovered)
&& (scalar( @{$intron_is_coding_in_homologues{$false_intron}}) == scalar(@{$self->orthologues_species}) ) ) {
print "marked_for_deletion_alt_coding_and_coding_in_homologues : $f_tsi\n" ;
} elsif ( scalar ( @{$false_transcripts{$f_tsi }} ) == $nr_false_introns_recovered ) {
print "transcript could be marked for deletion as it's coding in alt. trans : $f_tsi\n" ;
} else {
foreach my $intron_key ( keys %intron_attributes ) {
print "intron_key $intron_key\n" ;
my $simple_feature = $intron_attributes{$intron_key}{feature};
print "simple_feature $simple_feature\n" ;
my $feature = $intron_attributes{$intron_key}{feature};
my @recover_ids = @{ $intron_attributes{$intron_key}{input_ids} } ;
for my $rci ( @recover_ids ) {
print " recover_id $rci\n" ;
}
}
}
if ( $delete_recovery_id ) {
print "deleteing recovery_ids as FalseIntron is coding:\n" ;
delete $intron_attributes{$false_intron}{feature};
delete $intron_attributes{$false_intron}{input_ids} ;
}
}
}
}
my $sfa = $self->db->get_SimpleFeatureAdaptor() ;
for my $false_intron ( keys %intron_attributes ) {
my $feature = $intron_attributes{$false_intron}{feature};
if ( $feature ) {
$sfa->store($feature) ;
for my $exVSgen_id ( @{ $intron_attributes{$false_intron}{input_ids} }){
$input_ids_to_upload{$exVSgen_id.":".$feature->dbID}=1;
}
}
}
for my $set ( keys %transcripts_marked_for_deletion ) {
for my $tsi ( keys %{$transcripts_marked_for_deletion{$set}}) {
my $rm_transcript = $self->db->get_TranscriptAdaptor->fetch_by_stable_id($tsi) ;
my $feature = Bio::EnsEMBL::SimpleFeature->new(
-seq_region_start => $rm_transcript->seq_region_start ,
-seq_region_end => $rm_transcript->seq_region_end,
-start => $rm_transcript->seq_region_start ,
-end => $rm_transcript->seq_region_end,
-strand=> $rm_transcript->seq_region_strand,
-analysis => $self->analysis() ,
-score => 0,
-display_label => $rm_transcript->stable_id."_$set",
-slice => $rm_transcript->slice,
) ;
$sfa->store($feature) ;
print $feature->display_id() . " stored for further investigation in SimpleFeature table\n " ;
}
}
my $analysis_obj = new Bio::EnsEMBL::Pipeline::Analysis(
-logic_name => "RecoverFalseIntrons",
-program=> "exonerate",
-module => "RecoverFalseIntrons",
-input_id_type => 'prot_slice_form',
);
my $submit_ana = new Bio::EnsEMBL::Pipeline::Analysis(
-logic_name => "Submit_RecoverFalseIntrons",
-module => "Dummy",
-input_id_type => 'prot_slice_form',
);
my $pa = $self->pipeline_adaptor($self->db) ;
my $aa = $pa->get_AnalysisAdaptor;
$aa->store($submit_ana);
$aa->store($analysis_obj);
print "input_ids:\n" ;
print join("\n", keys %input_ids_to_upload)."\n\n" ;
print "reformatting input_ids as we use the new format:\n" ;
my @old_input_ids = keys %input_ids_to_upload;
my @new_input_ids = @{reformat_input_ids(\@old_input_ids)} ;
print "new_input_ids:\n" ;
for my $nid ( @new_input_ids ) {
print "new_input_id " . $nid . "\n" ;
print "new_input_id_length " . length($nid) . "\n" ;
}
$self->upload_input_ids(\@ new_input_ids , "Submit_RecoverFalseIntrons" , "prot_slice_form") ;
my $goal_analysis = $aa->fetch_by_logic_name("RecoverFalseIntrons") ;
my $rule = Bio::EnsEMBL::Pipeline::Rule->new(-goalanalysis => $goal_analysis);
my $ra = $self->pipeline_adaptor($self->db)->get_RuleAdaptor();
my $store_rule = 1 ;
RULE: foreach my $old_rule( $ra->fetch_all ) {
if ( $goal_analysis->logic_name eq $old_rule->goalAnalysis->logic_name ) {
$store_rule = 0 ;
}
}
if ( $store_rule ) {
$ra->store($rule) ;
}
Bio::EnsEMBL::Registry->load_all($$MAIN_CONFIG{LOCATION_OF_COMPARA_REGISTRY_FILE},1,1);
my @dba = @{Bio::EnsEMBL::Registry->get_all_DBAdaptors()};
for ( @dba ) {
$_->disconnect_when_inactive(1);
} } |
General documentation
Post general queries to ensembl-dev@ebi.ac.uk
The rest of the documentation details each of the object methods.
Internal methods are usually preceded with a '_'