Bio::EnsEMBL::Funcgen::Parsers miranda
Included librariesPackage variablesGeneral documentationMethods
Toolbar
WebCvsRaw content
Package variables
No package variables defined.
Included modules
Bio::EnsEMBL::DBEntry
Bio::EnsEMBL::Funcgen::ExternalFeature
Bio::EnsEMBL::Funcgen::Parsers::BaseExternalParser
Inherit
Bio::EnsEMBL::Funcgen::Parsers::BaseExternalParser
Synopsis
No synopsis!
Description
No description!
Methods
new
No description
Code
parse_and_load
No description
Code
Methods description
None available.
Methods code
newdescriptionprevnextTop
sub new {
  my $caller = shift;
  my $class = ref($caller) || $caller;

  my $self = $class->SUPER::new(@_);

  #Set default feature_type and feature_set config
$self->{'feature_types'} = { 'miRanda' => { name => 'miRanda Target', class => 'RNA', description => 'miRanda microRNA target', }, }; $self->{feature_sets} = { 'miRanda miRNA targets' => { feature_type =>\$ self->{'feature_types'}{'miRanda'}, display_name => 'miRanda Targets', analysis => { -logic_name => 'miRanda', #-description => 'miRanda microRNA target prediction (http://www.microrna.org)',
-description => 'miRanda microRNA target prediction (http://microrna.sanger.ac.uk/targets)', -display_label => 'miRanda Target', -displayable => 1, }, xrefs => 1, }, }; $self->validate_and_store_feature_types; $self->set_feature_sets; return $self;
}
parse_and_loaddescriptionprevnextTop
sub parse_and_load {
  my ($self, $file, $old_assembly, $new_assembly) = @_;

  $self->log_header("Parsing miRanda data from:\t$file");

  my $analysis_adaptor = $self->db->get_AnalysisAdaptor();
  my $ftype_adaptor    = $self->db->get_FeatureTypeAdaptor();
  my $extf_adaptor     = $self->db->get_ExternalFeatureAdaptor;
  my $dbentry_adaptor     = $self->db->get_DBEntryAdaptor; 
  my %features_by_name; # name -> feature_type
my %slice_cache; # this object is only used for projection
my $dummy_analysis = new Bio::EnsEMBL::Analysis(-logic_name => 'miRandaProjection'); my $skipped = 0; my $cnt = 0; my $skipped_xref = 0; my $species = $self->db->species; if(! $species){ throw('Must define a species to define the external_db'); } #Just to make sure we hav homo_sapiens and not Homo Sapiens
($species = lc($species)) =~ s/ /_/; open (FILE, "<$file") || die "Can't open $file"; while (<FILE>) { next if ($_ =~ /^\s*\#/o || $_ =~ /^\s*$/o); #Sanger
##GROUP SEQ METHOD FEATURE CHR START END STRAND PHASE SCORE PVALUE_OG TRANSCRIPT_ID EXTERNAL_NAME
#Similarity mmu-miR-707 miRanda miRNA_target 2 120824620 120824640 + . 15.3548 2.796540e-02 ENST00000295228 INHBB
#MSKCC
#UCSC ID mRNA Gene ID miRNA acc miRNA miRNA align alignment gene align align score conservation miRNA start miRNA end gene start gene end %ID %Similar energy organism prediction_date
#uc001abs.1 AK091100 LOC643837 MIMAT0000062 hsa-let-7a uuGAUAUGUUGGAUGAUGGAGu ||: || |:| ||:|||| guCUGCUCACCUUCCUGCCUCa 144 0.615759 2 21 396 417 63 78 0 9606 2008-05-16
#uc001afh.1 NM_001039577 CCNL2 MIMAT0000062 hsa-let-7a uugauAUGUUGGAUGAUGGAGu |::||||| ||:|||| cucacUGUAACCU-CUGCCUCc 147 0.767611 2 18 2290 2310 75 93 0 9606 2008-05-16
my ($group, $seq, $method, $feature, $chr, $start, $end, $strand, undef, undef, undef, $ens_id, $display_name) = split; $strand = ($strand =~ /\+/o) ? 1 : -1; ##my $id = $ens_id =~ s/[\"\']//g; # strip quotes
my $id = $ens_id.':'.$seq; if(! exists $slice_cache{$chr}){ if($old_assembly){ $slice_cache{$chr} = $self->slice_adaptor->fetch_by_region('chromosome', $chr, undef, undef, undef, $old_assembly); }else{ $slice_cache{$chr} = $self->slice_adaptor->fetch_by_region('chromosome', $chr); } if(! defined $slice_cache{$chr}){ warn "Can't get slice $chr for sequence $id\n"; $skipped++; next; } } #We can add coding xref to feature type based on the miRbase name
#.e.g hsa-mir-24-1
#However, this isn't stored as an xref
#It is stored in the gene.description
#e.g. hsa-mir-24-1 [Source:miRBase;Acc:MI0000080]
#Not easy to fetch as descriptions not indexed!
#
#Cache/store FeatureType
if(! exists $features_by_name{$seq}){ $features_by_name{$seq} = $ftype_adaptor->fetch_by_name($seq); if(! defined $features_by_name{$seq}){ ($features_by_name{$seq}) = @{$ftype_adaptor->store(Bio::EnsEMBL::Funcgen::FeatureType->new ( -name => $seq, -class => 'RNA', -description => $method.' '.$feature, ))}; } } $feature = Bio::EnsEMBL::Funcgen::ExternalFeature->new ( -display_label => $id, -start => $start, -end => $end, -strand => $strand, -feature_type => $features_by_name{$seq}, -feature_set => $self->{'feature_sets'}{'miRanda miRNA'}, -slice => $slice_cache{$chr}, ); # project if necessary
if ($new_assembly) { $feature = $self->project_feature($feature, $new_assembly); if(! defined $feature){ $skipped ++; next; } } ($feature) = @{$extf_adaptor->store($feature)}; $cnt++; #Build xref
#This should enever happen, as the search regions are defined by ens transcript
if (! $ens_id) { warn("No xref available for miRNA $id\n"); $skipped_xref++; next; } #use external_name first, else try and get it from the core DB
#should we just get it from the core DB anyway?
if(! defined $display_name){ $display_name = $self->get_core_display_name_by_stable_id($self->db->dnadb, $ens_id, 'transcript'); } #Handle release/version in xref version as stable_id version?
my $dbentry = Bio::EnsEMBL::DBEntry->new( -dbname => $species.'_core_Transcript', #-release => $self->db->dnadb->dbc->dbname,
-status => 'KNOWNXREF', #-display_label_linkable => 1,
#-db_display_name => $self->db->dnadb->dbc->dbname,
-db_display_name => 'EnsemblTranscript', -type => 'MISC', -primary_id => $ens_id, -display_id => $display_name, -info_type => 'MISC', -info_text => 'TRANSCRIPT', #-linkage_annotation => 'miRanda miRNA negative influence',
-linkage_annotation => 'miRanda target - negative influence', #could have version here if we use the correct dnadb to build the cache
); $dbentry_adaptor->store($dbentry, $feature->dbID, 'ExternalFeature', 1);#1 is ignore release flag
} close FILE; $self->log("Stored $cnt miRanda miRNA ExternalFeatures"); $self->log("Skipped $skipped miRanda miRNA imports"); $self->log("Skipped an additional $skipped_xref DBEntry imports"); return; } 1;
}
General documentation
No general documentation available.