Raw content of Bio::EnsEMBL::Analysis::Runnable::RNAFold # Ensembl module for Bio::EnsEMBL::Analysis::Runnable::RNAfold # # Copyright (c) 2005 Ensembl # =head1 NAME Bio::EnsEMBL::Analysis::Runnable::RNAfold =head1 SYNOPSIS my $runnable = Bio::EnsEMBL::Analysis::Runnable::RNAfold->new ( -analysis => $analysis, -sequence => $seq, -structure => $str, ); $self->runnable($runnable); =head1 DESCRIPTION Runnble to wrap RNAfold, part of the Vienna RNA package. Takes a Bio::Seq object and an optional structure string as parameters. If a structure is provided it uses the structure to constrain the folding prediction (RNAFold -C). The resulting structure string is run-length encoded. =head1 CONTACT Post questions to the Ensembl development list: ensembl-dev@ebi.ac.uk =cut package Bio::EnsEMBL::Analysis::Runnable::RNAFold; use strict; use warnings; use Bio::EnsEMBL::Analysis::Runnable; use Bio::EnsEMBL::Utils::Exception qw(throw warning); use Bio::EnsEMBL::Utils::Argument qw( rearrange ); use vars qw(@ISA); @ISA = qw(Bio::EnsEMBL::Analysis::Runnable); my $verbose = ""; =head2 new Title : new Usage : my $runnable = Bio::EnsEMBL::Analysis::Runnable::RNAfold->new : ( : -sequence => $seq, : -structure => $structure, : -analysis => $self->analysis, : ); Function : Instantiates new RNAfold runnable Returns : Bio::EnsEMBL::Analysis::Runnable::RNAFold Exceptions : Thows if no sequence is provided Args : sequence (Bio::PrimarySeq) : analysis (Bio::EnsEMBL::Analysis) : structure (String) =cut sub new { my ($class,@args) = @_; my $self = $class->SUPER::new(@args); my ($sequence,$structure) = rearrange(['SEQUENCE','STRUCTURE'], @args); $self->throw("RNAFold: Cannot work without a sequence object\n") unless $sequence; $self->sequence($sequence); $self->structure_constraint($structure) if $structure; return $self; } =head2 new Title : run Usage : my $runnable->run; Function : Run method. Returns : None Exceptions : None Args : None =cut sub run{ my ($self) = @_; print STDERR "Run analysis\n" if $verbose; my $filename = $self->write_seq($self->sequence); $self->RNAfold($self->sequence,$filename); my $structure = $self->parse; $self->structure($structure); $self->encoded_str($self->encode_str($structure)); # print STDERR "delete temp files\n" if $verbose; $self->delete_files; } =head2 RNAfold Title : RNAfold Usage : $self->RNAfold($seq,$filename); Function : Wrapper for RNAfold Returns : none Exceptions : Throws if RNAfold fails to run Args : sequence (Bio::PrimarySeq) : filename (String) =cut sub RNAfold{ my ($self,$seq,$filename)=@_; my $command = "RNAfold "; my $options = ""; if ($self->structure_constraint){ $options = " -C "; } my $results_file = $self->create_filename("RNAfold","txt"); $self->files_to_delete($results_file); # delete the postcript file that RNAfold generates $self->files_to_delete("/tmp/".$seq->display_id."_ss.ps"); $self->resultsfile($results_file); $command .= "$options < $filename 2>&1 > ".$results_file; print STDERR "Running RNAfold ".$command."\n"; open(my $fh, "$command |") || $self->throw("Error opening RNAfold cmd <$command>"); # this loop reads the STDERR from the blast command while(<$fh>){ if(/FATAL:(.+)/){ my $match = $1; $self->throw("miRNA: RNAfold failed to run: $match$@\n"); } } close $fh; return 1; } =head2 parse Title : parse Usage : my $structure = $self->parse; Function : Parses the results of RNAfold Returns : structure (String) Exceptions : Throws if results file cannot be opened or closed Args : None =cut sub parse{ my ($self)=@_; my $results = $self->resultsfile; my $structure; my $score; open(RNAFOLD, $results) or $self->throw("FAILED to open ".$results. " miRNA:parse_results\n$@\n"); LINE: while(<RNAFOLD>){ chomp; if ($_ =~ /([().]+)\s\(\s*(-*\d+.\d+)\)$/){ $structure = $1; $score = $2; } } close(RNAFOLD) or $self->throw("FAILED to close ".$results. " miRNA:parse_results\n$@\n"); $self->score($score); if ($structure){ return $structure; } else { return 0; } } =head2 write_seq Title : write_seq Usage : my $filename = $self->write_seq($daf); Function : Writes the dna sequence file Returns : filename (String) Exceptions : Throws if it cannot write to the file Args : Bio::PrimarySeq =cut sub write_seq{ my ($self,$seq)=@_; my $filename = $self->create_filename("miRNA","seq"); # have to write file so the sequence is all on a single line # cos thats the way RNAfold likes it $self->files_to_delete("/tmp/$filename"); eval{ open (FILE,">$filename"); print FILE ">".$seq->display_id."\n"; print FILE $seq->seq."\n"; print FILE $self->structure_constraint if ($self->structure_constraint); close FILE; }; if ($@){ $self->throw ("RNAFold: Error writing to seq file $@\n"); }; $self->files_to_delete($filename); return $filename; } =head2 encode_str Title : encode_str Usage : my $encoded_str = $runnable->encode_string($string) Function : Does string length encoding to reduce size of structure string : splits strings if they are longer then 200 characters so they : will fit in the transcript attribute table, gives a range value : at the start of the string indicating the start and stop positions of the : structure on the transcript Returns : String Exceptions : None Args : String =cut sub encode_str{ my ($self,$string)= @_; my @codes; my $start = 1; my $count=0; my $code; my @elements = split //,$string; my $last_chr = ""; my @array =[]; foreach my $chr (@elements){ $count++; if ($chr eq $last_chr){ push @array,$chr; } else { if ($code && length($code) > 200 && scalar(@array) == 1){ push @codes,"$start:$count\t$code"; $code = undef; $start = $count+1; } # Character has changed print STDERR it and the associated array length if (scalar(@array) > 1){ $code .= scalar(@array); @array = []; } $code .= "$chr"; $last_chr = $chr; } } # last element if (scalar(@array) > 1){ $code .= scalar(@array); } push @codes,"$start:$count\t$code"; return \@codes; } ################################################################################## # Containers =head2 sequence Title : sequence Usage : my $seq = $runnable->sequence Function : Get/ set for the sequence object Returns : Bio::PrimarySeq Exceptions : Throws if not passed a Bio::PrimarySeq object Args : BioPrimarySeq =cut sub sequence{ my ($self, $sequence) = @_; if ($sequence){ $self->throw("RNAFold: Sequence needs to be a Bio::PrimarySeq object not a $sequence\n") unless ($sequence->isa("Bio::PrimarySeq")); $self->{'_sequence'} = $sequence; } return $self->{'_sequence'}; } =head2 structure_constraint Title : structure_constraint Usage : my $str = $runnable->structure_constraint Function : Get/ set for the structure string used by RNAfold to constrain : the folding prediction Returns : String Exceptions : None Args : String =cut sub structure_constraint{ my ($self, $structure_constraint) = @_; if ($structure_constraint){ $self->{'_structure_constraint'} = $structure_constraint; } return $self->{'_structure_constraint'}; } =head2 structure Title : structure Usage : my $str = $runnable->structure Function : Get/ set for the structure string predicted by RNAfold Returns : String Exceptions : None Args : String =cut sub structure{ my ($self, $structure) = @_; if ($structure){ $self->{'_structure'} = $structure; } return $self->{'_structure'}; } =head2 encoded_str Title : encoded_str Usage : my $str = $runnable->encoded_str Function : Get/ set for the run-length encoded structure string predicted by RNAfold Returns : String Exceptions : None Args : String =cut sub encoded_str{ my ($self, $encoded_str) = @_; if ($encoded_str){ $self->{'_encoded_str'} = $encoded_str; } return $self->{'_encoded_str'}; } =head2 score Title : score Usage : my $score = $runnable->score Function : Get/ set for the score predicted by RNAfold Returns : String Exceptions : None Args : String =cut sub score{ my ($self, $score) = @_; if ($score){ $self->{'_score'} = $score; } return $self->{'_score'}; } 1;