Bio::EnsEMBL::Analysis::Runnable miRNA
SummaryIncluded librariesPackage variablesSynopsisDescriptionGeneral documentationMethods
Toolbar
WebCvsRaw content
Summary
  Bio::EnsEMBL::Analysis::Runnable:miRNA
Package variables
Privates (from "my" definitions)
$verbose = "yes"
Included modules
Bio::EnsEMBL::Analysis::Runnable
Bio::EnsEMBL::Analysis::Runnable::RNAFold
Bio::EnsEMBL::DBEntry
Bio::EnsEMBL::Exon
Bio::EnsEMBL::Gene
Bio::EnsEMBL::Transcript
Bio::EnsEMBL::Utils::Argument qw ( rearrange )
Bio::EnsEMBL::Utils::Exception qw ( throw warning )
Inherit
Bio::EnsEMBL::Analysis::Runnable
Synopsis
  my $runnable = Bio::EnsEMBL::Analysis::Runnable::miRNA->new
(
-queries => \%families,
-analysis => $self->analysis,
);
$self->runnable($runnable);
Description
Takes blast alignments in the form of a hash ref of Bio::EnsEMBL::DnaDnaAlignFeatures
grouped by miRNA family.
Opens an EMBL format file of miRNA sequences used to create the blast database and
parses out the positions of the mature RNA for each dna align feature.
Determines if the mature sequence is wholly contained within the dna align feature.
Applies a percent identity filter to the blast alignments.
If the alignment meet these criteria it then runs RNAfold and parses the results.
If the results of RNAfold indicate that the sequence can form a hairpin structure with
a score < -20 it is classed as a miRNA.
Single exon gene objects are made and the DnaDnaAlignFeature is added as supporting evidence.
The predicted structure and coordinates of the mature sequence are added to the transcript as
transcript attributes
Methods
display_stuff
No description
Code
get_matureDescriptionCode
get_miRNAsDescriptionCode
make_geneDescriptionCode
miRNAsDescriptionCode
newDescriptionCode
outputDescriptionCode
queriesDescriptionCode
run
No description
Code
run_analysisDescriptionCode
Methods description
get_maturecode    nextTop
  Title      : get_mature
Usage : my @mature = $self->get_mature($dna_align_feature);
Function : Returns the coordinates of the mature miRNA on the target sequence
Returns : Array containing 2 elements, the start and stop position (inclusive)
Exceptions : Throws if coordinates cannot be parsed or the miRNA cannot be found
Args : Bio::EnsEMBL::DnaDnaAlignFeature
get_miRNAscodeprevnextTop
  Title      : get_miRNAs
Usage : $self->get_miRNAs;
Function : Opens and reads EMBL formatted file of miRNA sequences used to make
: the miRNA blast database
Returns : None
Exceptions : Throws if sequence file is not found / cannot be opened
Args : None
make_genecodeprevnextTop
  Title      : make_gene
Usage : my $gene = $runnable->make_gene($dna_align_feature,$structure,$simple_alignment)
Function : Creates the non coding gene object from the parsed result file.
Returns : Hashref of Bio::EnsEMBL::Gene, Bio::EnsEMBL::Attribute and Bio::EnsEMBL::DBEntry
Exceptions : None
Args : dna_align_feature (Bio::EnsEMBL::DnaDnaAlignFeature)
: structure (String)
: alignment (Bio::SimpleAlign)
miRNAscodeprevnextTop
  Title      : miRNAs
Usage : my %miRNAs = %{$self->miRNAs};
Function : Container for storing miRNA BioSeq objects
Returns : Hash reference to Bio::Seq object
Exceptions : None
Args : Hash reference to Bio::Seq object
new(2)codeprevnextTop
  Title      : run
Usage : my $runnable->run;
Function : Run method.
Returns : None
Exceptions : Throws if no query sequences found
Args : None
outputcodeprevnextTop
  Arg [1]   : Bio::EnsEMBL::Analysis::Runnable
Arg [2] : hashref of output
Function : overrides runnable output method to allow storing of hash refs
Exceptions: throws if not passed an hashref
Example :
queriescodeprevnextTop
  Title      : queries
Usage : my %queries = %$runnable->queries
Function :
Returns : Hash reference
Exceptions : None
Args :
run_analysiscodeprevnextTop
  Title      : run_analysis
Usage : my $align = $self->run_analysis($dna_align_feature);
Function : makes an alignment that represents the mature sequence of the miRNA
: tests to check that it contains the entire mature sequence and is of
: high enough percent identity
Returns : Bio::SimpleAlign object
Exceptions : Throws if sequence file is not found / cannot be opened
Args : Bio::EnsEMBL::DnaDnaAlignFeature
Methods code
display_stuffdescriptionprevnextTop
sub display_stuff {
  my ($self,$daf,$structure,$aligns,$score)=@_;
  my $all_mature;
  my $status;
  my %miRNAs = %{$self->miRNAs};
  my $description = $miRNAs{$daf->hseqname}->display_id;
  ($all_mature,$status) = $self->get_mature($daf);
  print STDERR "DAF ".$daf->dbID." chr ".$daf->seq_region_name." ".$daf->seq_region_start." ".$daf->seq_region_end."\n";
  foreach my $mature (@$all_mature){
    print STDERR $daf->hseqname." $description miRNA at ".$mature->{'start'}." ".$mature->{'end'}."\n ";
  }
  print STDERR "target strand ".$daf->strand."\n";
  print STDERR "Query start end ".$daf->hstart.":".$daf->hend.
    " strand ".$daf->hstrand."\n";
  print STDERR "Start ".$daf->start." end ".$daf->end.
    " Hit end ".$daf->hstart." hit end ".$daf->hend."\n";
  print STDERR "Score $score\n";
  foreach my $align(@{$aligns}){
    my $simple = $align->get_SimpleAlign;
    foreach my $seq ( $simple->each_seq() ) {
      print STDERR $seq->seq."\n";
    }
    print STDERR $simple->match_line."\n";
    print STDERR "\n";
    $simple = $daf->get_SimpleAlign;
    foreach my $seq ( $simple->each_seq() ) {
      print STDERR $seq->seq."\n";
    }
    print STDERR $simple->match_line."\n";
    print STDERR "\n";
    print STDERR "$structure\n";

    for (my $i=1 ; $i< $align->start;$i++) {
      print STDERR ".";
    }
    print STDERR substr($daf->seq,$align->start-1,$align->end-$align->start+1);
    print STDERR "\nMirna position = ".$align->start." ". $align->end."\n";
  }
}

##################################################################################
# Containers
}
get_maturedescriptionprevnextTop
sub get_mature {
  my ($self,$query)=@_;
  my %miRNAs = %{$self->miRNAs};
  my $miRNA = $miRNAs{$query->hseqname};
  my @mature;
  my $status = "PUTATIVE";
  unless ($miRNA){
  print STDERR "Unable to locate miRNA fom embl file that corresponds to ".
	       $query->hseqname."$@\n ";
    return 0;
    }       
  my @feats = $miRNA->can('top_SeqFeatures') ? $miRNA->top_SeqFeatures : ();
  foreach my $sf ( @feats ) {
    my @fth = Bio::SeqIO::FTHelper::from_SeqFeature($sf);
    foreach my $fth ( @fth ) {
      if( ! $fth->isa('Bio::SeqIO::FTHelper') ) {
	$self->throw("Cannot process FTHelper... $fth $@\n");
      }
      my $location =  $fth->loc;
      $status = "KNOWN" 
	if ($fth->field->{"evidence"}[0] && $fth->field->{"evidence"}[0] eq "experimental");
      if ($location =~ /(\d+)\.+(\d+)/){
	push @mature,{ 'start' => $1,
		       'end'   => $2
		     };
      } else {
	return 0;
      }
    }
  }
  return\@ mature,$status;
}
get_miRNAsdescriptionprevnextTop
sub get_miRNAs {
  my ($self)=@_;
  my %miRNA;
  my $seqIO = Bio::SeqIO->new(
			      -file   => $self->analysis->db_file,
			      -format => "embl"
			     );
  $self->throw("unable to open file ".$self->analysis->db_file."\n")
    unless $seqIO;
  while (my $seq = $seqIO->next_seq){
    $miRNA{$seq->accession} = $seq;
  }
  $self->miRNAs(\%miRNA);
}
make_genedescriptionprevnextTop
sub make_gene {
  my ($self,$daf,$structure,$aligns) = @_;
  my %miRNAs = %{$self->miRNAs};
  my @mature;
  my %gene_hash;
  my $description = $miRNAs{$daf->hseqname}->display_id;
  my @attributes;
  # exons
my $slice = $daf->slice; my $exon = Bio::EnsEMBL::Exon->new ( -start => $daf->start, -end => $daf->end, -strand => $daf->strand, -slice => $slice, -phase => -1, -end_phase => -1 ); # dont let it fall of the slice
return if ($exon->start < 1 or $exon->end > $slice->length); $exon->add_supporting_features($daf); # transcripts
my $transcript = Bio::EnsEMBL::Transcript->new; $transcript->add_Exon($exon); $transcript->start_Exon($exon); $transcript->end_Exon($exon); $transcript->biotype("miRNA"); # add the transcript attributes for the position of the mature miRNA as well as
# the predicted structure
foreach my $code(@$structure){ my $str_attrib = Bio::EnsEMBL::Attribute->new (-CODE => 'ncRNA', -NAME => 'Structure', -DESCRIPTION => 'RNA secondary structure line', -VALUE => $code ); push @attributes,$str_attrib; } foreach my $align(@$aligns){ my $miRNA_attrib = Bio::EnsEMBL::Attribute->new (-CODE => 'miRNA', -NAME => 'Micro RNA', -DESCRIPTION => 'Coordinates of the mature miRNA', -VALUE => $align->start."-".$align->end, ); push @attributes,$miRNA_attrib; } # gene
my $gene = Bio::EnsEMBL::Gene->new; $gene->biotype('miRNA'); $gene->analysis($self->analysis); $gene->add_Transcript($transcript); $gene->status('NOVEL'); $gene_hash{'gene'} = $gene; $gene_hash{'attrib'} =\@ attributes; $self->output(\%gene_hash);
}
miRNAsdescriptionprevnextTop
sub miRNAs {
  my ($self, $miRNAs) = @_;
  if ($miRNAs){
    $self->{'_miRNAs'} = $miRNAs;
  }
  return $self->{'_miRNAs'};
}
newdescriptionprevnextTop
sub new {
  my ($class,@args) = @_;
  my $self = $class->SUPER::new(@args);
  my ($queries,$thresholds) = rearrange(['QUERIES'], @args);
  $self->queries($queries); 
  $self->throw("miRNA: dying because cannot find database".$self->analysis->db_file."\n")
    unless (-e $self->analysis->db_file);
  return $self;
}
outputdescriptionprevnextTop
sub output {
  my ($self, $output) = @_;
  if(!$self->{'output'}){
    $self->{'output'} = [];
  }
  if($output){
    throw("Must pass Runnable:output an hashref not a ".$output)
      unless(ref($output) eq 'HASH');
    push(@{$self->{'output'}}, $output);
  }
  return $self->{'output'};
}
1;
}
queriesdescriptionprevnextTop
sub queries {
  my ($self, $queries) = @_;
  if ($queries){
    $self->{'_queries'} = $queries;
  }
  return $self->{'_queries'};
}
rundescriptionprevnextTop
sub run {
  my ($self) = @_;
  print STDERR "get miRNAs\n"  if $verbose;
  # fetch the coordinates of the mature miRNAs
$self->get_miRNAs; my %queries = %{$self->queries}; $self->throw("Cannot find query sequences $@\n") unless %queries; print STDERR "Run analysis\n" if $verbose; FAM: foreach my $family (keys %queries){ DAF:foreach my $daf (@{$queries{$family}}){ # Does the alignment contain the mature sequence?
my ($align,$status) = $self->run_analysis($daf); next DAF unless ($align && scalar @$align > 0); # does the sequence fold into a hairpin ?
my $seq = Bio::PrimarySeq->new ( -display_id => $daf->hseqname, -seq => $daf->seq, ); my $RNAfold = Bio::EnsEMBL::Analysis::Runnable::RNAFold->new ( -analysis => $self->analysis, -sequence => $seq ); $RNAfold->run; # get the final structure encoded by run length
my $structure = $RNAfold->encoded_str; next DAF unless ($RNAfold->score < -20); next DAF unless ($RNAfold->structure); $self->display_stuff($daf,$RNAfold->structure,$align,$RNAfold->score) if $verbose; # create the gene
$self->make_gene($daf,$structure,$align); } } print STDERR "delete temp files\n" if $verbose; $self->delete_files;
}
run_analysisdescriptionprevnextTop
sub run_analysis {
  my ($self,$daf)=@_;
  my %miRNAs = %{$self->miRNAs};
  my ($all_mature, $status);
  my @mature_aligns;
  if ($self->get_mature($daf)){
    ($all_mature,$status) = $self->get_mature($daf)
  } else {
    # ignore if you dont have a mature form identified.
$self->warning("No mature sequence identified for sequence ".$daf->hseqname." $@"); return 0; } # can have more than 1 mature sequence per hairpin
foreach my $mature(@$all_mature){ my $miRNA = $miRNAs{$daf->hseqname}; # length is 1 longer than end - start because it includes both the start and end base
my $miRNA_length = $mature->{'end'} - $mature->{'start'} +1; # change the U to T so as not to confuse the BioPerl
my $seq = $miRNA->seq; $seq =~ s/[uU]/T/g; # make a fake slice to use as the hit sequence
my $slice = Bio::EnsEMBL::Slice->new ( -seq_region_name => $daf->hseqname, -start => 1, -end => $miRNA->length, -seq => $seq, -strand => 1, -coord_system => $daf->slice->coord_system, ); $daf->hslice($slice); # make an alignment of just the mature sequence
my $temp_align = $daf->restrict_between_positions($mature->{'start'},$mature->{'end'},"HSEQ"); unless ($temp_align) { # print "rejecting ".$daf->p_value." cos of not getting mature alignment\n";
next; } my $align = $temp_align->transfer($daf->feature_Slice); # is the enitre mature sequence present in the alignment, no indels?
unless ( $align->alignment_length == $miRNA_length && $align->alignment_length == $align->length) { # print "rejecting ".$daf->dbID." coz of not getting aligned length\n";
next; } # is the mature alignment of high enough percent identity?
# also no gaps in the mature alignment
unless ( $align->get_SimpleAlign->overall_percentage_identity >= 90 ) { # print "rejecting ".$daf->p_value." cos of aligned ID\n";
next; } push @mature_aligns,$align; } return\@ mature_aligns,$status;
}
General documentation
CONTACTTop
Post questions to the Ensembl development list: ensembl-dev@ebi.ac.uk