Raw content of Bio::EnsEMBL::Funcgen::Parsers::BaseExternalParser
package Bio::EnsEMBL::Funcgen::Parsers::BaseExternalParser;
use strict;
use Bio::EnsEMBL::DBSQL::DBAdaptor;
use Bio::EnsEMBL::Utils::Argument qw( rearrange );
use Bio::EnsEMBL::Utils::Exception qw( throw );
use Bio::EnsEMBL::Funcgen::FeatureSet;
use Bio::EnsEMBL::Funcgen::FeatureType;
use Bio::EnsEMBL::Analysis;
use Bio::EnsEMBL::Funcgen::Utils::Helper;
use vars qw(@ISA);
@ISA = ('Bio::EnsEMBL::Funcgen::Utils::Helper');
# Base functionality for external_feature parsers
#Make this inherit from Helper?
#Then change all the prints to logs
sub new {
my $caller = shift;
my $class = ref($caller) || $caller;
my $self = $class->SUPER::new(@_);
#validate and set type, analysis and feature_set here
my ($type, $db, $clobber, $archive, $import_fsets) = rearrange(['TYPE', 'DB', 'CLOBBER', 'ARCHIVE', 'IMPORT_SETS'], @_);
throw('You must define a type of external_feature to import') if(! defined $type);
if (! ($db && ref($db) &&
$db->isa('Bio::EnsEMBL::Funcgen::DBSQL::DBAdaptor'))){
throw('You must provide a valid Bio::EnsEMBL::Funcgen::DBSQL::DBAdaptor');
}
throw('You can only specify either -clobber or -archive, but not both') if($clobber && $archive);
$self->{'display_name_cache'} = {};
$self->{'db'} = $db;
$self->{type} = $type;
$self->{'clobber'} = $clobber if defined $clobber;
$self->{'archive'} = $archive if defined $archive;
#This is not fully implemented yet and need to be validated against the config feature_set
#pass something like set1,set2 and split and validate each.
#Or do this in the calling script?
throw('-import_sets not fully implemented yet') if defined $import_fsets;
$self->{'import_sets'} = (defined $import_fsets) ? @{$import_fsets} : undef;
$self->log("Parsing and loading $type ExternalFeatures");
return $self;
}
=head2 db
Args : None
Example : my $feature_set_adaptor = $seld->db->get_FeatureSetAdaptor
Description: Getter for the DBAdaptor.
Returntype : Bio::EnsEMBL::Funcgen::DBSQL::DBAdaptor
Exceptions : None
Caller : General
Status : Medium Risk
=cut
sub db{
my $self = shift;
return $self->{'db'};
}
=head2 import_sets
Args : None
Example : foreach my $import_set_name(@{$self->import_sets}){ ... do the import ... }
Description: Getter for the list of import feature set names, defaults to all in parser config.
Returntype : Arrayref of import feature_set names
Exceptions : None
Caller : General
Status : Medium Risk
=cut
sub import_sets{
my $self = shift;
return $self->{'import_sets'} || [keys %{$self->{'feature_sets'}}];
}
=head2 set_feature_sets
Args : None
Example : $self->set_feature_sets;
Description: Imports feature sets defined by import_sets.
Returntype : None
Exceptions : Throws if feature set already present and clobber or archive not set
Caller : General
Status : Medium Risk
=cut
sub set_feature_sets{
my $self = shift;
throw('Must provide a set feature_set config hash') if ! defined $self->{'feature_sets'};
my $fset_adaptor = $self->db->get_FeatureSetAdaptor;
my $analysis_adaptor = $self->db->get_AnalysisAdaptor;
foreach my $fset_name(@{$self->import_sets}){
warn "Setting $fset_name";
#check feature_type is validated?
my $fset = $fset_adaptor->fetch_by_name($fset_name);
#what about data sets?
#we don't need data sets for external_sets!
if(defined $fset){
$self->log("Found previous FeatureSet $fset_name");
if($self->{'clobber'}){
warn "Implement rollback_FeatureSet here, need option to leave feature_set entry?";
#Need to clobber any DBEntries first!!!
if(exists $self->{'feature_sets'}{$fset_name}{'xrefs'} &&
$self->{'feature_sets'}{$fset_name}{'xrefs'}){
my @ext_feat_ids = map @{$_}, @{$self->db->dbc->db_handle->selectall_arrayref('select external_feature_id from external_feature where feature_set_id='.$fset->dbID)};
if(@ext_feat_ids){
#Why only core xrefs?
#my ($core_ext_dbid) = $self->db->dbc->db_handle->selectrow_array('select external_db_id from external_db where db_name="core"');
my $core_ext_dbid;
if($core_ext_dbid){
#double table delete?
throw('This xref delete is wrong');
my $sql = "delete x, ox from object_xref ox, xref x where ox.ensembl_object_type='ExternalFeature' and x.external_db_id=$core_ext_dbid and ox.xref_id=x.xref_id and ensembl_id in(".join(', ', @ext_feat_ids).')';
#should be something like
#delete ox from object_xref ox, external_feature ef where ox.ensembl_object_type='ExternalFeature' and ox.ensembl_id=ef.external_feature_id and ef.feature_set_id in(64,65);
$self->log("Clobbering xrefs for $fset_name");
throw("xref delete is wrong :$sql");
#$self->db->dbc->do($sql);
}
}
}
$self->log("Clobbering old features for external feature_set:\t$fset_name");
my $sql = 'delete from external_feature where feature_set_id='.$fset->dbID;
$self->db->dbc->do($sql);
}
elsif($self->{'archive'}){
my $archive_fset = $fset_adaptor->fetch_by_name($fset_name."_v".$self->{'archive'});
if(defined $archive_fset){
throw("You are trying to create an archive external feature_set which already exists:\t${fset_name}_v".$self->{archive});
}
my $sql = "UPDATE feature_set set name='$fset_name}_v".$self->{archive}."' where name='$fset_name'";
$self->db->dbc->do($sql);
undef $fset;
}else{
throw("You are trying to create an external feature_set which already exists:\t$fset_name\nMaybe to want to clobber or archive?");
}
}
if(!defined $fset){
#don't need to use RNAFeatureType here as this is the setwide generic feature_type
#or do we have separate tables for external_feature and external_rna_feature?
#validate analysis first
my $analysis = $analysis_adaptor->fetch_by_logic_name($self->{'feature_sets'}{$fset_name}{'analysis'}{'-logic_name'});
if(! defined $analysis){
$self->log('Analysis '.$self->{'feature_sets'}{$fset_name}{'analysis'}{'-logic_name'}.
" not found, storing from config hash");
#warn Data::Dumper::Dumper({$self->{'feature_sets'}{$fset_name}{'analysis'}});
#Why does this no work the first time??
$analysis_adaptor->store(Bio::EnsEMBL::Analysis->new
(
%{$self->{'feature_sets'}{$fset_name}{'analysis'}}
#-logic_name => $self->{'feature_sets'}{$fset_name}{'analysis'}{'-logic_name'},
#-description => $self->{'feature_sets'}{$fset_name}{'analysis'}{'-description'},
#-display_label => $self->{'feature_sets'}{$fset_name}{'analysis'}{'-display_label'},
#-diplayable => $self->{'feature_sets'}{$fset_name}{'analysis'}{'-displayable'},
)
);
$analysis = $analysis_adaptor->fetch_by_logic_name($self->{'feature_sets'}{$fset_name}{'analysis'});
}
#warn "analysis is $analysis";
#replace hash config with object
$self->{'feature_sets'}{$fset_name}{'analysis'} = $analysis;
my $display_name = (exists $self->{'feature_sets'}{$fset_name}{'display_label'}) ? $self->{'feature_sets'}{$fset_name}{'display_label'} : $fset_name;
$fset = Bio::EnsEMBL::Funcgen::FeatureSet->new(
-name => $fset_name,
-type => 'external',
-analysis => $self->{'feature_sets'}{$fset_name}{'analysis'},
-feature_type => ${$self->{'feature_sets'}{$fset_name}{'feature_type'}},
-display_label => $display_name,
);
($fset) = @{$self->db->get_FeatureSetAdaptor->store($fset)};
}
#Now replace config hash with object
$self->{feature_sets}{$fset_name} = $fset;
}
return;
}
=head2 validate_and_store_feature_types
Args : None
Example : $self->validate_and_store_feature_types;
Description: Imports feature types defined by import_sets.
Returntype : None
Exceptions : None
Caller : General
Status : Medium Risk
=cut
sub validate_and_store_feature_types{
my $self = shift;
# LBL enhancers have both positive and negative varieties
#feature type class/naming is logical but not intuitive
#+-----------------+-------------------------+----------+----------------------------------------------------+
#| feature_type_id | name | class | description |
#+-----------------+-------------------------+----------+----------------------------------------------------+
#| 398680 | VISTA Enhancer | Enhancer | Enhancer identified by positive VISTA assay |
#| 398681 | VISTA Target - Negative | Region | Enhancer negative region identified by VISTA assay |
#+-----------------+-------------------------+----------+----------------------------------------------------+
#if (lc($type) eq 'VISTA') {
# return (validate_type($db_adaptor, 'VISTA Enhancer') && validate_type($db_adaptor, 'VISTA Target - Negative'));
#}
#my $sth = $self->db->dbc->prepare("SELECT analysis_id FROM analysis WHERE logic_name=?");
#remove lc as mysql doesn't care about case
#$sth->execute($type);
#if ($sth->fetchrow_array()) {
# print "Type $type is valid\n";
#} else {
# print "Type $type is not valid - is there an entry for $type in the analysis table?\n";
# return 0;
#}
my $ftype_adaptor = $self->db->get_FeatureTypeAdaptor;
foreach my $import_set(@{$self->import_sets}){
my $ftype_config = ${$self->{'feature_sets'}{$import_set}{'feature_type'}};
my $ftype = $ftype_adaptor->fetch_by_name($ftype_config->{'name'});
if(! defined $ftype){
$self->log("FeatureType '".$ftype_config->{'name'}."' for external feature_set ".$self->{'type'}." not present");
$self->log("Storing using type hash definitions");
$ftype = Bio::EnsEMBL::Funcgen::FeatureType->new(
-name => $ftype_config->{'name'},
-class => $ftype_config->{'class'},
-description => $ftype_config->{'description'},
);
($ftype) = @{$ftype_adaptor->store($ftype)};
}
#Replace hash config with object
$self->{'feature_types'}{$ftype_config->{'name'}} = $ftype;
}
return;
}
#Moved these to Helper
#
#
#=head2 get_display_name_by_stable_id
#
# Args [0] : stable ID from core DB.
# Args [1] : stable feature type e.g. gene, transcript, translation
# Example : $self->validate_and_store_feature_types;
# Description: Builds a cache of stable ID to display names.
# Returntype : string - display name
# Exceptions : Throws is type is not valid.
# Caller : General
# Status : At risk
#
#=cut
#
## --------------------------------------------------------------------------------
## Build a cache of ensembl stable ID -> display_name
## Return hashref keyed on {$type}{$stable_id}
##Need to update cache if we're doing more than one 'type' at a time
## as it will never get loaded for the new type!
#
#sub get_display_name_by_stable_id{
# my ($self, $stable_id, $type) = @_;
#
# $type = lc($type);
#
# if($type !~ /(gene|transcript|translation)/){
# throw("Cannot get display_name for stable_id $stable_id with type $type");
# }
#
# if(! exists $self->{'display_name_cache'}->{$stable_id}){
# ($self->{'display_name_cache'}->{$stable_id}) = $self->db->dnadb->dbc->db_handle->selectrow_array("SELECT x.display_label FROM ${type}_stable_id s, $type t, xref x where t.display_xref_id=x.xref_id and s.${type}_id=t.gene_id and s.stable_id='${stable_id}'");
# }
#
# return $self->{'display_name_cache'}->{$stable_id};
#}
#
#
#=head2 get_stable_id_by_display_name
#
# Args [0] : display name (e.g. from core DB or GNC name)
# Example :
# Description: Builds a cache of stable ID to display names.
# Returntype : string - gene stable ID
# Exceptions : None
# Caller : General
# Status : At risk
#
#=cut
#
## --------------------------------------------------------------------------------
## Build a cache of ensembl stable ID -> display_name
## Return hashref keyed on {$type}{$stable_id}
##Need to update cache if we're doing more than one 'type' at a time
## as it will never get loaded for the new type!
#
#sub get_stable_id_by_display_name{
# my ($self, $display_name) = @_;
#
# #if($type !~ /(gene|transcript|translation)/){
## throw("Cannot get display_name for stable_id $stable_id with type $type");
## }
#
# if(! exists $self->{'stable_id_cache'}->{$display_name}){
# ($self->{'stable_id_cache'}->{$display_name}) = $self->db->dnadb->dbc->db_handle->selectrow_array("SELECT s.stable_id FROM gene_stable_id s, gene g, xref x where g.display_xref_id=x.xref_id and s.gene_id=g.gene_id and x.display_label='${display_name}'");
# }
#
# return $self->{'stable_id_cache'}->{$display_name};
#}
=head2 project_feature
Args [0] : Bio::EnsEMBL::Feature
Args [1] : string - Assembly e.g. NCBI37
Example : $self->project($feature, $new_assembly);
Description: Projects a feature to a new assembly via the AssemblyMapper
Returntype : Bio::EnsEMBL::Feature
Exceptions : Throws is type is not valid.
Caller : General
Status : At risk - is this in core API? Move to Utils?
=cut
# --------------------------------------------------------------------------------
# Project a feature from one slice to another
sub project_feature {
my ($self, $feat, $new_assembly) = @_;
# project feature to new assembly
my $feat_slice = $feat->feature_Slice;
my @segments = @{ $feat_slice->project('chromosome', $new_assembly) };
if(! @segments){
$self->log("Failed to project feature:\t".$feat->display_label);
}
elsif(scalar(@segments) >1){
$self->log("Failed to project feature to distinct location:\t".$feat->display_label);
return;
}
my $proj_slice = $segments[0]->to_Slice;
if($feat_slice->length != $proj_slice->length){
$self->log("Failed to project feature to comparable length region:\t".$feat->display_label);
return;
}
# everything looks fine, so adjust the coords of the feature
$feat->start($proj_slice->start);
$feat->end($proj_slice->end);
$feat->strand($proj_slice->strand);
my $slice_new_asm = $self->slice_adaptor->fetch_by_region('chromosome', $proj_slice->seq_region_name, undef, undef, undef, $new_assembly);
$feat->slice($slice_new_asm);
return $feat;
}
sub slice_adaptor{
my $self = shift;
if(! defined $self->{'slice_adaptor'}){
$self->{'slice_adaptor'} = $self->db->get_SliceAdaptor;
}
return $self->{'slice_adaptor'};
}
1;