Bio::EnsEMBL::Analysis::Runnable
miRNA
Toolbar
Summary
Bio::EnsEMBL::Analysis::Runnable:miRNA
Package variables
Privates (from "my" definitions)
$verbose = "yes"
Included modules
Inherit
Synopsis
my $runnable = Bio::EnsEMBL::Analysis::Runnable::miRNA->new
(
-queries => \%families,
-analysis => $self->analysis,
);
$self->runnable($runnable);
Description
Takes blast alignments in the form of a hash ref of Bio::EnsEMBL::DnaDnaAlignFeatures
grouped by miRNA family.
Opens an EMBL format file of miRNA sequences used to create the blast database and
parses out the positions of the mature RNA for each dna align feature.
Determines if the mature sequence is wholly contained within the dna align feature.
Applies a percent identity filter to the blast alignments.
If the alignment meet these criteria it then runs RNAfold and parses the results.
If the results of RNAfold indicate that the sequence can form a hairpin structure with
a score < -20 it is classed as a miRNA.
Single exon gene objects are made and the DnaDnaAlignFeature is added as supporting evidence.
The predicted structure and coordinates of the mature sequence are added to the transcript as
transcript attributes
Methods
Methods description
Title : get_mature Usage : my @mature = $self->get_mature($dna_align_feature); Function : Returns the coordinates of the mature miRNA on the target sequence Returns : Array containing 2 elements, the start and stop position (inclusive) Exceptions : Throws if coordinates cannot be parsed or the miRNA cannot be found Args : Bio::EnsEMBL::DnaDnaAlignFeature |
Title : get_miRNAs Usage : $self->get_miRNAs; Function : Opens and reads EMBL formatted file of miRNA sequences used to make : the miRNA blast database Returns : None Exceptions : Throws if sequence file is not found / cannot be opened Args : None |
Title : make_gene Usage : my $gene = $runnable->make_gene($dna_align_feature,$structure,$simple_alignment) Function : Creates the non coding gene object from the parsed result file. Returns : Hashref of Bio::EnsEMBL::Gene, Bio::EnsEMBL::Attribute and Bio::EnsEMBL::DBEntry Exceptions : None Args : dna_align_feature (Bio::EnsEMBL::DnaDnaAlignFeature) : structure (String) : alignment (Bio::SimpleAlign) |
Title : miRNAs Usage : my %miRNAs = %{$self->miRNAs}; Function : Container for storing miRNA BioSeq objects Returns : Hash reference to Bio::Seq object Exceptions : None Args : Hash reference to Bio::Seq object |
Title : run Usage : my $runnable->run; Function : Run method. Returns : None Exceptions : Throws if no query sequences found Args : None |
Arg [1] : Bio::EnsEMBL::Analysis::Runnable Arg [2] : hashref of output Function : overrides runnable output method to allow storing of hash refs Exceptions: throws if not passed an hashref Example : |
Title : queries Usage : my %queries = %$runnable->queries Function : Returns : Hash reference Exceptions : None Args : |
Title : run_analysis Usage : my $align = $self->run_analysis($dna_align_feature); Function : makes an alignment that represents the mature sequence of the miRNA : tests to check that it contains the entire mature sequence and is of : high enough percent identity Returns : Bio::SimpleAlign object Exceptions : Throws if sequence file is not found / cannot be opened Args : Bio::EnsEMBL::DnaDnaAlignFeature |
Methods code
display_stuff | description | prev | next | Top |
sub display_stuff
{ my ($self,$daf,$structure,$aligns,$score)=@_;
my $all_mature;
my $status;
my %miRNAs = %{$self->miRNAs};
my $description = $miRNAs{$daf->hseqname}->display_id;
($all_mature,$status) = $self->get_mature($daf);
print STDERR "DAF ".$daf->dbID." chr ".$daf->seq_region_name." ".$daf->seq_region_start." ".$daf->seq_region_end."\n";
foreach my $mature (@$all_mature){
print STDERR $daf->hseqname." $description miRNA at ".$mature->{'start'}." ".$mature->{'end'}."\n ";
}
print STDERR "target strand ".$daf->strand."\n";
print STDERR "Query start end ".$daf->hstart.":".$daf->hend.
" strand ".$daf->hstrand."\n";
print STDERR "Start ".$daf->start." end ".$daf->end.
" Hit end ".$daf->hstart." hit end ".$daf->hend."\n";
print STDERR "Score $score\n";
foreach my $align(@{$aligns}){
my $simple = $align->get_SimpleAlign;
foreach my $seq ( $simple->each_seq() ) {
print STDERR $seq->seq."\n";
}
print STDERR $simple->match_line."\n";
print STDERR "\n";
$simple = $daf->get_SimpleAlign;
foreach my $seq ( $simple->each_seq() ) {
print STDERR $seq->seq."\n";
}
print STDERR $simple->match_line."\n";
print STDERR "\n";
print STDERR "$structure\n";
for (my $i=1 ; $i< $align->start;$i++) {
print STDERR ".";
}
print STDERR substr($daf->seq,$align->start-1,$align->end-$align->start+1);
print STDERR "\nMirna position = ".$align->start." ". $align->end."\n";
}
}
} |
sub get_mature
{ my ($self,$query)=@_;
my %miRNAs = %{$self->miRNAs};
my $miRNA = $miRNAs{$query->hseqname};
my @mature;
my $status = "PUTATIVE";
unless ($miRNA){
print STDERR "Unable to locate miRNA fom embl file that corresponds to ".
$query->hseqname."$@\n ";
return 0;
}
my @feats = $miRNA->can('top_SeqFeatures') ? $miRNA->top_SeqFeatures : ();
foreach my $sf ( @feats ) {
my @fth = Bio::SeqIO::FTHelper::from_SeqFeature($sf);
foreach my $fth ( @fth ) {
if( ! $fth->isa('Bio::SeqIO::FTHelper') ) {
$self->throw("Cannot process FTHelper... $fth $@\n");
}
my $location = $fth->loc;
$status = "KNOWN"
if ($fth->field->{"evidence"}[0] && $fth->field->{"evidence"}[0] eq "experimental");
if ($location =~ /(\d+)\.+(\d+)/){
push @mature,{ 'start' => $1,
'end' => $2
};
} else {
return 0;
}
}
}
return\@ mature,$status; } |
sub get_miRNAs
{ my ($self)=@_;
my %miRNA;
my $seqIO = Bio::SeqIO->new(
-file => $self->analysis->db_file,
-format => "embl"
);
$self->throw("unable to open file ".$self->analysis->db_file."\n")
unless $seqIO;
while (my $seq = $seqIO->next_seq){
$miRNA{$seq->accession} = $seq;
}
$self->miRNAs(\%miRNA); } |
sub make_gene
{ my ($self,$daf,$structure,$aligns) = @_;
my %miRNAs = %{$self->miRNAs};
my @mature;
my %gene_hash;
my $description = $miRNAs{$daf->hseqname}->display_id;
my @attributes;
my $slice = $daf->slice;
my $exon = Bio::EnsEMBL::Exon->new
(
-start => $daf->start,
-end => $daf->end,
-strand => $daf->strand,
-slice => $slice,
-phase => -1,
-end_phase => -1
);
return if ($exon->start < 1 or $exon->end > $slice->length);
$exon->add_supporting_features($daf);
my $transcript = Bio::EnsEMBL::Transcript->new;
$transcript->add_Exon($exon);
$transcript->start_Exon($exon);
$transcript->end_Exon($exon);
$transcript->biotype("miRNA");
foreach my $code(@$structure){
my $str_attrib = Bio::EnsEMBL::Attribute->new
(-CODE => 'ncRNA',
-NAME => 'Structure',
-DESCRIPTION => 'RNA secondary structure line',
-VALUE => $code
);
push @attributes,$str_attrib;
}
foreach my $align(@$aligns){
my $miRNA_attrib = Bio::EnsEMBL::Attribute->new
(-CODE => 'miRNA',
-NAME => 'Micro RNA',
-DESCRIPTION => 'Coordinates of the mature miRNA',
-VALUE => $align->start."-".$align->end,
);
push @attributes,$miRNA_attrib;
}
my $gene = Bio::EnsEMBL::Gene->new;
$gene->biotype('miRNA');
$gene->analysis($self->analysis);
$gene->add_Transcript($transcript);
$gene->status('NOVEL');
$gene_hash{'gene'} = $gene;
$gene_hash{'attrib'} =\@ attributes;
$self->output(\%gene_hash); } |
sub miRNAs
{ my ($self, $miRNAs) = @_;
if ($miRNAs){
$self->{'_miRNAs'} = $miRNAs;
}
return $self->{'_miRNAs'}; } |
sub new
{ my ($class,@args) = @_;
my $self = $class->SUPER::new(@args);
my ($queries,$thresholds) = rearrange(['QUERIES'], @args);
$self->queries($queries);
$self->throw("miRNA: dying because cannot find database".$self->analysis->db_file."\n")
unless (-e $self->analysis->db_file);
return $self; } |
sub output
{ my ($self, $output) = @_;
if(!$self->{'output'}){
$self->{'output'} = [];
}
if($output){
throw("Must pass Runnable:output an hashref not a ".$output)
unless(ref($output) eq 'HASH');
push(@{$self->{'output'}}, $output);
}
return $self->{'output'};
}
1; } |
sub queries
{ my ($self, $queries) = @_;
if ($queries){
$self->{'_queries'} = $queries;
}
return $self->{'_queries'}; } |
sub run
{ my ($self) = @_;
print STDERR "get miRNAs\n" if $verbose;
$self->get_miRNAs;
my %queries = %{$self->queries};
$self->throw("Cannot find query sequences $@\n") unless %queries;
print STDERR "Run analysis\n" if $verbose;
FAM: foreach my $family (keys %queries){
DAF:foreach my $daf (@{$queries{$family}}){
my ($align,$status) = $self->run_analysis($daf);
next DAF unless ($align && scalar @$align > 0);
my $seq = Bio::PrimarySeq->new
(
-display_id => $daf->hseqname,
-seq => $daf->seq,
);
my $RNAfold = Bio::EnsEMBL::Analysis::Runnable::RNAFold->new
(
-analysis => $self->analysis,
-sequence => $seq
);
$RNAfold->run;
my $structure = $RNAfold->encoded_str;
next DAF unless ($RNAfold->score < -20);
next DAF unless ($RNAfold->structure);
$self->display_stuff($daf,$RNAfold->structure,$align,$RNAfold->score) if $verbose;
$self->make_gene($daf,$structure,$align);
}
}
print STDERR "delete temp files\n" if $verbose;
$self->delete_files; } |
sub run_analysis
{ my ($self,$daf)=@_;
my %miRNAs = %{$self->miRNAs};
my ($all_mature, $status);
my @mature_aligns;
if ($self->get_mature($daf)){
($all_mature,$status) = $self->get_mature($daf)
} else {
$self->warning("No mature sequence identified for sequence ".$daf->hseqname." $@");
return 0;
}
foreach my $mature(@$all_mature){
my $miRNA = $miRNAs{$daf->hseqname};
my $miRNA_length = $mature->{'end'} - $mature->{'start'} +1;
my $seq = $miRNA->seq;
$seq =~ s/[uU]/T/g;
my $slice = Bio::EnsEMBL::Slice->new
(
-seq_region_name => $daf->hseqname,
-start => 1,
-end => $miRNA->length,
-seq => $seq,
-strand => 1,
-coord_system => $daf->slice->coord_system,
);
$daf->hslice($slice);
my $temp_align = $daf->restrict_between_positions($mature->{'start'},$mature->{'end'},"HSEQ");
unless ($temp_align)
{
next;
}
my $align = $temp_align->transfer($daf->feature_Slice);
unless ( $align->alignment_length == $miRNA_length && $align->alignment_length == $align->length)
{
next;
}
unless ( $align->get_SimpleAlign->overall_percentage_identity >= 90 )
{
next;
}
push @mature_aligns,$align;
}
return\@ mature_aligns,$status; } |
General documentation