Raw content of RegulatoryFeatureParser::BaseParser package RegulatoryFeatureParser::BaseParser; use strict; use Bio::EnsEMBL::DBSQL::DBAdaptor; use Bio::EnsEMBL::SimpleFeature; # Base functionality for regualatory feature parsers # -------------------------------------------------------------------------------- # Delete existing regulatory features etc sub delete_existing { my ($db_adaptor, $type) = @_; my $t = lc($type); print "Deleting existing features & related data for type $type\n"; # Delete any regulatory_feature_coding entries first my $sth = $db_adaptor->dbc->prepare("DELETE rft FROM regulatory_feature rfeat, regulatory_factor_coding rft, analysis a WHERE rfeat.regulatory_factor_id=rft.regulatory_factor_id AND a.analysis_id=rfeat.analysis_id AND LOWER(a.logic_name)=?"); $sth->execute($t); # now delete interlinked regulatory_feature, regulatory_factor and regulatory_feature_object entries $sth = $db_adaptor->dbc->prepare("DELETE rfeat, rfact, rfo FROM regulatory_feature rfeat, regulatory_factor rfact, regulatory_feature_object rfo, analysis a WHERE rfeat.regulatory_feature_id=rfo.regulatory_feature_id AND rfeat.regulatory_factor_id=rfact.regulatory_factor_id AND rfeat.analysis_id=a.analysis_id AND LOWER(a.logic_name)=?"); $sth->execute($t); # delete dangling regulatory_factors $sth = $db_adaptor->dbc->prepare("DELETE rfact FROM regulatory_feature rfeat, regulatory_factor rfact, analysis a WHERE rfeat.regulatory_factor_id=rfact.regulatory_factor_id AND a.analysis_id=rfeat.analysis_id AND LOWER(a.logic_name)=?"); $sth->execute($t); # and finally any dangling regulatory features $sth = $db_adaptor->dbc->prepare("DELETE rfeat FROM regulatory_feature rfeat, analysis a WHERE a.analysis_id=rfeat.analysis_id AND LOWER(a.logic_name)=?"); $sth->execute($t); # Delete search regions; they have a different analysis_id if ($type = "cisred") { my $sr_type = $type . "_search"; die "Can't find analysis for $sr_type " unless validate_type($db_adaptor, $sr_type); my $anal_sth = $db_adaptor->dbc->prepare("SELECT analysis_id FROM analysis WHERE LOWER(logic_name)=?"); $anal_sth->execute($sr_type); my $anal = ($anal_sth->fetchrow_array())[0]; $sth = $db_adaptor->dbc->prepare("DELETE FROM regulatory_search_region WHERE analysis_id=?"); $sth->execute($anal); } } # -------------------------------------------------------------------------------- # Check that the type specified corresponds to an analysis sub validate_type { my ($db_adaptor, $type) = @_; # LBL enhancers have both positive and negative varieties if (lc($type) eq 'enhancer') { return (validate_type($db_adaptor, 'enhancer_positive') && validate_type($db_adaptor, 'enhancer_negative')); } my $sth = $db_adaptor->dbc->prepare("SELECT analysis_id FROM analysis WHERE LOWER(logic_name)=?"); $sth->execute(lc($type)); if ($sth->fetchrow_array()) { print "Type $type is valid\n"; } else { print "Type $type is not valid - is there an entry for $type in the analysis table?\n"; return 0; } return 1; } # -------------------------------------------------------------------------------- # Find the maximum existing ID in a table. sub find_max_id { my ($self, $db_adaptor, $table) = @_; my $row = @{$db_adaptor->dbc->db_handle->selectall_arrayref("SELECT MAX(${table}_id) FROM $table")}[0]; my $max_id = @{$row}[0]; if (!defined $max_id) { print "No existing ${table}_ids, will start from 1\n"; $max_id = 0; } else { print "Maximum existing ${table}_id = $max_id\n"; } return $max_id; } # -------------------------------------------------------------------------------- # Build a cache of ensembl stable ID -> internal ID # Return hashref keyed on {$type}{$stable_id} # Type is always all lower case sub build_stable_id_cache { my ($self, $db_adaptor) = @_; my %stable_id_to_internal_id; foreach my $type ('gene', 'transcript', 'translation') { # Add exon here if required print "Caching stable ID -> internal ID links for ${type}s\n"; my $count = 0; my $sth = $db_adaptor->dbc->prepare("SELECT ${type}_id, stable_id FROM ${type}_stable_id"); $sth->execute(); my ($internal_id, $stable_id); $sth->bind_columns(\$internal_id, \$stable_id); while ($sth->fetch) { $stable_id_to_internal_id{$type}{$stable_id} = $internal_id; $count++; } print "Got $count $type stable ID -> internal ID mappings\n"; } return \%stable_id_to_internal_id; } # -------------------------------------------------------------------------------- # Upload features and factors etc to database sub upload_features_and_factors { my ($self, $db_adaptor, $objects) = @_; my $dbc = $db_adaptor->dbc; my $feature_sth = $dbc->prepare("INSERT INTO regulatory_feature (regulatory_feature_id, name, seq_region_id, seq_region_start, seq_region_end, seq_region_strand, analysis_id, regulatory_factor_id) VALUES(?,?,?,?,?,?,?,?)"); my $factor_sth = $dbc->prepare("INSERT INTO regulatory_factor (regulatory_factor_id, name, type) VALUES(?,?,?)"); my $feature_object_sth = $dbc->prepare("INSERT INTO regulatory_feature_object (regulatory_feature_id, ensembl_object_type, ensembl_object_id, influence, evidence) VALUES(?,?,?,?,?)"); my $sr_sth = $dbc->prepare("INSERT INTO regulatory_search_region (name, seq_region_id, seq_region_start, seq_region_end, seq_region_strand, ensembl_object_type, ensembl_object_id, analysis_id ) VALUES(?,?,?,?,?,?,?,?)"); print "Uploading " . scalar(@{$objects->{FEATURES}}) . " features ...\n"; foreach my $feature (@{$objects->{FEATURES}}) { $feature_sth->execute($feature->{INTERNAL_ID}, $feature->{NAME}, $feature->{SEQ_REGION_ID}, $feature->{START}, $feature->{END}, $feature->{STRAND}, $feature->{ANALYSIS_ID}, $feature->{FACTOR_ID}); if ($feature->{ENSEMBL_TYPE} && $feature->{ENSEMBL_ID}) { $feature_object_sth->execute($feature->{INTERNAL_ID}, $feature->{ENSEMBL_TYPE}, $feature->{ENSEMBL_ID}, $feature->{INFLUENCE}, $feature->{EVIDENCE}); } } if ($objects->{FACTORS}) { print "Uploading " . scalar(@{$objects->{FACTORS}}) . " factors ...\n"; foreach my $factor (@{$objects->{FACTORS}}) { $factor_sth->execute($factor->{INTERNAL_ID}, $factor->{NAME}, $factor->{TYPE}); } } if ($objects->{SEARCH_REGIONS}) { print "Uploading " . scalar(@{$objects->{SEARCH_REGIONS}}) . " search regions ...\n"; foreach my $search_region (@{$objects->{SEARCH_REGIONS}}) { $sr_sth->execute($search_region->{NAME}, $search_region->{SEQ_REGION_ID}, $search_region->{START}, $search_region->{END}, $search_region->{STRAND}, $search_region->{ENSEMBL_OBJECT_TYPE}, $search_region->{ENSEMBL_OBJECT_ID}, $search_region->{ANALYSIS_ID}); } } print "Done\n"; } # -------------------------------------------------------------------------------- # Project a feature from one slice to another sub project_feature { my ($self, $start, $end, $strand, $chr, $slice, $analysis, $new_assembly, $slice_adaptor, $label) = @_; # just use a SimpleFeature for convenience my $feat = Bio::EnsEMBL::SimpleFeature->new (-start => $start, -end => $end, -strand => $strand, -slice => $slice, -analysis => $analysis, -display_label => $label, -score => 0); # project feature to new assembly my $feat_slice = $feat->feature_Slice; my @segments = @{ $feat_slice->project('chromosome', $new_assembly) }; next unless (@segments); next if (scalar(@segments) > 1); my $proj_slice = $segments[0]->to_Slice; next unless ($feat_slice->length == $proj_slice->length); # everything looks fine, so adjust the coords of the feature $feat->start($proj_slice->start); $feat->end($proj_slice->end); $feat->strand($proj_slice->strand); my $slice_new_asm = $slice_adaptor->fetch_by_region('chromosome', $chr, undef, undef, undef, $new_assembly); $feat->slice($slice_new_asm); return $feat; } # -------------------------------------------------------------------------------- sub get_blank_factor_id () { my ($self, $db_adaptor) = @_; my $sth = $db_adaptor->dbc->prepare("SELECT regulatory_factor_id FROM regulatory_factor WHERE name=''"); $sth->execute(); my ($factor_id) = $sth->fetchrow_array(); if ($factor_id) { print "Found existing blank factor, id = $factor_id\n"; } else { $db_adaptor->dbc->do("INSERT INTO regulatory_factor (name) VALUES ('')"); $sth->execute(); ($factor_id) = $sth->fetchrow_array(); print "Created new blank factor, id = $factor_id\n"; } return $factor_id; } 1;