Raw content of RegulatoryFeatureParser::enhancer
package RegulatoryFeatureParser::enhancer;
use strict;
# Parse data from LBL enhancers, see http://enhancer.lbl.gov/cgi-bin/imagedb.pl?show=1;search.result=yes;form=search;search.form=no;action=search;search.sequence=1
# e.g.
#
# >chr16:84987588-84988227 | element 1 | positive | neural tube[12/12] | hindbrain (rhombencephalon)[12/12] | limb[3/12] | cranial nerve[8/12]
# AACTGAAGGGACCCCGTTAGCATAtaaacaaaaggtggggggtagccccgagcctcttct
# ctgacagccagtggcggcagtgatgaatttgtgaagttatctaattttccactgttttaa
# ttagagacttgggctctgaggcctcgcagctggcttctttgtgctgtattctgttgcctg
# acagag
use RegulatoryFeatureParser::BaseParser;
use Bio::EnsEMBL::DBSQL::DBAdaptor;
use vars qw(@ISA);
@ISA = qw(RegulatoryFeatureParser::BaseParser);
# Parse file and return hashref containing:
#
# - arrayref of features
# - arrayref of factors
sub parse {
my ($self, $db_adaptor, $file, $old_assembly, $new_assembly) = @_;
my %result;
print "Parsing $file with enhancer parser\n";
my $feature_internal_id = ($self->find_max_id($db_adaptor, "regulatory_feature")) + 1;
my $highest_factor_id = ($self->find_max_id($db_adaptor, "regulatory_factor")) + 1;
my $analysis_adaptor = $db_adaptor->get_AnalysisAdaptor();
my $slice_adaptor = $db_adaptor->get_SliceAdaptor();
my @features;
my %feature_objects;
my %anal;
# this object is only used for projection
my $dummy_analysis = new Bio::EnsEMBL::Analysis(-logic_name => 'EnhancerProjection');
# use separate analyses to distinguish positive and negative influence
my $analysis_positive = $analysis_adaptor->fetch_by_logic_name("enhancer_positive")->dbID();
my $analysis_negative = $analysis_adaptor->fetch_by_logic_name("enhancer_negative")->dbID();
if (!$analysis_positive) {
print STDERR "Can't get analysis for enhancer_positive, skipping\n";
exit(1);
}
if (!$analysis_negative) {
print STDERR "Can't get analysis for enhancer_negative, skipping\n";
exit(1);
}
# link to a blank factor so the SQL still works
my $blank_factor_id = $self->get_blank_factor_id($db_adaptor);
# read file
open (FILE, "<$file") || die "Can't open $file";
while () {
next if ($_ !~ /^>/); # only read headers
my %feature;
# >chr16:84987588-84988227 | element 1 | positive | neural tube[12/12] | hindbrain (rhombencephalon)[12/12] | limb[3/12] | cranial nerve[8/12]
my ($coords, $element, $posneg, @stuff) = split /\s+\|\s+/;;
# parse co-ordinates
my ($chr, $start, $end) = $coords =~ /chr([^:]+):(\d+)-(\d+)/;
# parse element name
my ($element_number) = $element =~ /\s*element\s*(\d+)/;
# ----------------------------------------
# Feature name
$feature{NAME} = "LBNL-$element_number";
# ----------------------------------------
# Analysis
$feature{ANALYSIS_ID} = $posneg eq 'positive' ? $analysis_positive : $analysis_negative;
# ----------------------------------------
# Seq_region ID and co-ordinates
my $chr_slice;
if ($old_assembly) {
$chr_slice = $slice_adaptor->fetch_by_region('chromosome', $chr, undef, undef, undef, $old_assembly);
} else {
$chr_slice = $slice_adaptor->fetch_by_region('chromosome', $chr);
}
if (!$chr_slice) {
print STDERR "Can't get slice for chromosme $chr\n";
next;
}
my $seq_region_id = $slice_adaptor->get_seq_region_id($chr_slice);
if (!$seq_region_id) {
print STDERR "Can't get seq_region_id for chromosome $chr\n";
next;
}
$feature{SEQ_REGION_ID} = $seq_region_id;
# Assume these are all on the positive strand
my $strand = 1;
# project if necessary
if ($new_assembly) {
#print join("\t", "OLD: ", $start, $end, $strand, $chr, $feature{NAME}) . "\n";
my $projected_feature = $self->project_feature($start, $end, $strand, $chr, $chr_slice, $dummy_analysis, $new_assembly, $slice_adaptor, $feature{NAME});
$start = $projected_feature->start();
$end = $projected_feature->end();
$strand = $projected_feature->strand();
$feature{SEQ_REGION_ID} = $slice_adaptor->get_seq_region_id($projected_feature->feature_Slice);
#print join("\t", "NEW: ", $start, $end, $strand, $chr, $feature{NAME}) . "\n";
}
$feature{START} = $start;
$feature{END} = $end;
$feature{STRAND} = $strand;
# ----------------------------------------
# Feature internal ID
# note this is not the "id" referred to above
$feature{INTERNAL_ID} = $feature_internal_id++;
# ----------------------------------------
# Evidence
$feature{EVIDENCE} = "";
# ----------------------------------------
# Factor - blank so that the SQL for retrieving still works
$feature{FACTOR_ID} = $blank_factor_id;
# ----------------------------------------
# Add to object to be returned
push @features, \%feature;
#print "Feature: ";
#foreach my $field (keys %feature) {
# print $field . ": " . $feature{$field} . " ";
#}
#print "\n";
}
close FILE;
$result{FEATURES} = \@features;
print "Parsed " . scalar(@{$result{FEATURES}}) . " features\n";
return \%result;
}
sub new {
my $self = {};
bless $self, "RegulatoryFeatureParser::enhancer";
return $self;
}
1;