Bio::EnsEMBL::Funcgen::Parsers vista
Included librariesPackage variablesGeneral documentationMethods
Toolbar
WebCvsRaw content
Package variables
No package variables defined.
Included modules
Bio::EnsEMBL::Funcgen::Parsers::BaseExternalParser
Inherit
Bio::EnsEMBL::Funcgen::Parsers::BaseExternalParser
Synopsis
No synopsis!
Description
No description!
Methods
new
No description
Code
parse_and_load
No description
Code
Methods description
None available.
Methods code
newdescriptionprevnextTop
sub new {
  my $caller = shift;
  my $class = ref($caller) || $caller;

  my $self = $class->SUPER::new(@_);

  #Set default feature_type and feature_set config
$self->{'feature_types'} = {( 'VISTA Target' => { name => 'VISTA Target', class => 'Region', description => 'VISTA target region', }, 'VISTA Enhancer' => { name => 'VISTA Enhancer', class => 'Enhancer', description => 'Enhancer identified by positive VISTA assay', }, 'VISTA Target - Negative' => { name => 'VISTA Target - Negative', class => 'Region', description => 'Enhancer negative region identified by VISTA assay', }, )}; $self->{feature_sets} = { 'VISTA enhancer set' => { feature_type =>\$ self->{'feature_types'}{'VISTA Target'}, display_label => 'VISTA Enhancers', analysis => { -logic_name => 'VISTA', -description => 'VISTA Enhancer Assay (http://enhancer.lbl.gov/)', -display_label => 'VISTA', -displayable => 1, }, }, }; $self->validate_and_store_feature_types; $self->set_feature_sets; return $self; } # Parse file and return hashref containing:
#
# - arrayref of features
# - arrayref of factors
}
parse_and_loaddescriptionprevnextTop
sub parse_and_load {
  my ($self, $file, $old_assembly, $new_assembly) = @_;

  my %result;


  #we want to set up the new external_feature set here or in the caller?
$self->log_header("Parsing and loading LBNL VISTA enhancer data from:\t$file"); #my $feature_internal_id = ($self->find_max_id("external_feature")) + 1;
#This is now feature_type
#but we don't want to just import a flat file for this, as we'd have to remove all the previous
#entries. Either check each one, either via the API or by dumping to file and checking the sorted file?
#my $highest_factor_id = ($self->find_max_id("feature_type")) + 1;
my $extfeat_adaptor = $self->db->get_ExternalFeatureAdaptor; my @features; my %feature_objects; my %anal; # this object is only used for projection
my $dummy_analysis = new Bio::EnsEMBL::Analysis(-logic_name => 'EnhancerProjection'); my $feature_positive = $self->{'feature_types'}{'VISTA Enhancer'}; my $feature_negative = $self->{'feature_types'}{'VISTA Target - Negative'}; #we should separate this into rna_feature_type for cisRED?
#The adaptor should then conditionally look up the rna_feature_type table instead of the feature_type table
# read file
open (FILE, "<$file") || die "Can't open $file"; my $cnt = 0; my $skipped = 0; while (<FILE>) { next if ($_ !~ /^>/o); # only read headers
my %feature; # >chr16:84987588-84988227 | element 1 | positive | neural tube[12/12] | hindbrain (rhombencephalon)[12/12] | limb[3/12] | cranial nerve[8/12]
my ($coords, $element, $posneg, @stuff) = split /\s+\|\s+/o; # parse co-ordinates
my ($chr, $start, $end) = $coords =~ /chr([^:]+):(\d+)-(\d+)/o; # parse element name
my ($element_number) = $element =~ /\s*element\s*(\d+)/o; # ----------------------------------------
# Feature name
#$feature{NAME} = "LBNL-$element_number";
# ----------------------------------------
# Analysis
#$feature{ANALYSIS_ID} = $posneg eq 'positive' ? $analysis_positive : $analysis_negative;
#Feature type
#$feature{FEATURE_TYPE_ID} = $posneg eq 'positive' ? $feature_positive_id : $feature_negative_id;
# ----------------------------------------
# Seq_region ID and co-ordinates
my $chr_slice; if ($old_assembly) { $chr_slice = $self->slice_adaptor->fetch_by_region('chromosome', $chr, undef, undef, undef, $old_assembly); } else { $chr_slice = $self->slice_adaptor->fetch_by_region('chromosome', $chr); } if (!$chr_slice) { warn "Can't get slice for chromosme $chr\n"; next; } my $seq_region_id = $chr_slice->get_seq_region_id; throw("Can't get seq_region_id for chromosome $chr") if (!$seq_region_id); #$feature{SEQ_REGION_ID} = $seq_region_id;
# Assume these are all on the positive strand? Is this correct?
my $strand = 1; #build feature here, but don't store?
my $feature = Bio::EnsEMBL::Funcgen::ExternalFeature->new ( #-dbID => $feature_internal_id++,
#-analysis => $self->analysis,
-start => $start,#is this in UCSC coords?
-end => $end, #is this in UCSC coords?
-strand => $strand, -feature_type => $posneg eq 'positive' ? $feature_positive : $feature_negative, -slice => $self->slice_adaptor->fetch_by_region('chromosome', $chr, undef, undef, $strand, $old_assembly), -display_label => "LBNL-$element_number", -feature_set => $self->{'feature_sets'}{'VISTA enhancer set'}, ); # project if necessary
if ($new_assembly) { $feature = $self->project_feature($feature, $dummy_analysis, $new_assembly); if(! defined $feature){ $skipped ++; next; } } $cnt ++; $extfeat_adaptor->store($feature); } close FILE; #$result{FEATURES} = \@features;
$self->log('Parsed '.($cnt+$skipped).' features'); $self->log("Loaded $cnt features"); $self->log("Skipped $skipped features"); return; } 1;
}
General documentation
No general documentation available.