Raw content of Bio::EnsEMBL::Analysis::Runnable::Fgenesh package Bio::EnsEMBL::Analysis::Runnable::Fgenesh; # Ensembl module for Bio::EnsEMBL::Analysis::Runnable::Fgenes # # Copyright (c) 2004 Ensembl # =head1 NAME Bio::EnsEMBL::Analysis::Runnable::Fgenes =head1 SYNOPSIS my $runnable = Bio::EnsEMBL::Analysis::Runnable::Fgenes->new ( -query => $slice, -program => 'genscan', -matrix => 'HumanIso.smat', -analysis => $analysis, ); $runnable->run; my @predictions = @{$runnable->output}; =head1 DESCRIPTION Wrapper to run the fgenesh gene predictor and then parse the results into prediction transcripts this is a bare bones module which inherits most of its functionality from Bio::EnsEMBL::Analysis::Runnable::Genscan =head1 CONTACT Post questions to the Ensembl development list: ensembl-dev@ebi.ac.uk =cut use vars qw(@ISA); use strict; use warnings; use Bio::EnsEMBL::Analysis::Runnable::BaseAbInitio; use Bio::EnsEMBL::Utils::Exception qw(throw warning); use Bio::EnsEMBL::Utils::Argument qw( rearrange ); @ISA = qw(Bio::EnsEMBL::Analysis::Runnable::BaseAbInitio); =head2 new Arg [1] : Bio::EnsEMBL::Analysis::Runnable::Fgenesh Function : create a new Bio::EnsEMBL::Analysis::Runnable::Fgenesh object and setting up default values Returntype: Bio::EnsEMBL::Analysis::Runnable::Fgenesh Exceptions: Example : =cut sub new { my ($class,@args) = @_; my $self = $class->SUPER::new(@args); ###################### #SETTING THE DEFAULTS# ###################### $self->program('fgenesh') if(!$self->program); $self->matrix('hum.dat') if(!$self->matrix); ###################### return $self; } #FGENESH-1.0 Prediction of potential genes in genomic DNA #Time: Thu Sep 2 10:57:44 2004. #Seq name: HS120G22 AL031847.17 Human DNA sequence from clone RP1-120G22 on chromosome 1p36.21-36.33 #length of sequence 166518bp G+C content: 56 Isochore: 3 #number of predicted genes 18 in +chain 13 in -chain 5 #number of predicted exons 97 in +chain 72 in -chain 25 # Gn S Type Start End Score ORF Len # -- - ---- ----- --- ----- --- --- # 1 + CDSf 827 - 849 6.70 827 - 847 21 # 1 + CDSi 4024 - 4166 -1.97 4025 - 4165 141 # 1 + CDSi 5726 - 5809 0.50 5728 - 5808 81 # 1 + CDSi 7096 - 7154 7.92 7098 - 7154 57 # 1 + CDSl 7429 - 7740 7.18 7429 - 7740 312 # 1 + PolA 7762 2.02 # 2 + TSS 9875 -2.59 # 2 + CDSf 10848 - 10925 11.04 10848 - 10925 78 # 2 + CDSi 14545 - 14719 5.83 14545 - 14718 174 # 2 + CDSi 14883 - 14999 20.17 14885 - 14998 114 # 2 + CDSl 15087 - 15169 -2.37 15089 - 15169 81 # 2 + PolA 15355 -2.78 =head2 parse_results Arg [1] : Bio::EnsEMBL::Analysis::Runnable::Fgenesh Arg [2] : string, filename Function : parse the output from Fgenesh into prediction transcripts Returntype: none Exceptions: throws if cannot open or close results file Example : =cut sub parse_results{ my ($self, $results) = @_; if(!$results){ $results = $self->resultsfile; } open(OUT, "<".$results) or throw("FAILED to open ".$results. "Genscan:parse_results"); if (<OUT> =~ m|no reliable predictions|i ){ print STDERR "No genes predicted\n"; return; } while (<OUT>) { chomp; my $flag = 0; if (/^\s*-[-\s]+$/) { GENES : while (<OUT>) { my @lines; until (/^$/) { if (/CDSl|CDSi|CDSf|CDSo/i) { my @element = split; push @lines, \@element; throw("Unable to parse fgenesh ouput (".@element. ") Line: $_\n") unless (scalar(@element) == 11); }elsif (/Predicted protein/i) { $flag = 1; last GENES ; } #print "Getting next line\n"; $_ = <OUT>; } if ($lines[0]->[1] eq '+') { @lines = sort {$a->[3] <=> $b->[3]} @lines; } elsif ($lines[0]->[1] eq '-') { @lines = reverse sort {$a->[3] <=> $b->[3]} @lines; } my $exon_num=1; foreach my $line (@lines) { my ($start, $end, $strand, $score, $pvalue, $phase, $seqname); $seqname = $line->[0]+($exon_num/100); if ($line->[1] eq '+') { $start = $line->[3]; $end = $line->[5]; $strand = 1; $phase = (3-($line->[7]-$line->[3]))% 3; } elsif ($line->[1] eq '-') { $start = $line->[3]; $end = $line->[5]; $strand = -1; $phase = (3-($line->[5]-$line->[9]))% 3; } $score = $line->[6]; my $exon = $self->feature_factory->create_prediction_exon ($start, $end, $strand, $score, '0', $phase, $seqname, $self->query, $self->analysis); $self->exon_groups($line->[0], $exon); $exon_num++; } if ( $flag==1 ) { last; } } } } $self->create_transcripts(); close(OUT) or throw("FAILED to close ".$results. "Fgenesh:parse_results"); } 1;