Raw content of BioMart::Initializer
#Id: Initializer.pm,v 1.71 2006/01/25 16:47:24 ds5 Exp $
#
# BioMart module for BioMart::Initializer
#
# You may distribute this module under the same terms as perl
# itself.
# POD documentation - main docs before the code.
=head1 NAME
BioMart::Initializer
=head1 SYNOPSIS
TODO: Synopsis here.
=head1 DESCRIPTION
The BioMart::Initializer module reads the MartRegistry.xml
configuration file containing the information about the
databases and datasets that are to be used. This is done
by the constructor. MartRegistry.xml files can contain
MartDBLocation/MartURLLocation pointing to a Mart Database
or web server respectively containing
DatasetConfig.xml and the tables to be queried,
or RegistryDBLocation/RegistryURLLocation elements pointing to a
Database/Web server containing other MartRegistry.xml files.
See the file "config/defaultMartRegistry.xml"
for an example of the format of this file.
The Initializer is only used once, at the
beginning of a BioMart session, to get the
BioMart::Registry object.
=head1 AUTHOR - Arek Kasprzyk, Syed Haider, Andreas Kahari, Darin London, Damian Smedley, Gudmundur Arni Thorisson
=head1 CONTACT
This module is part of the BioMart project
http://www.biomart.org
Questions can be posted to the mart-dev mailing list:
mart-dev@ebi.ac.uk
=head1 METHODS
=cut
#------------------------------------------------------------------------
package BioMart::Initializer;
use strict;
use warnings;
use IO::File;
use BioMart::Configurator;
use BioMart::Registry;
use Data::Dumper;
use BioMart::Configuration::VirtualSchema;
use BioMart::Configuration::RegistryDBPointer;
use BioMart::Configuration::RegistryURLPointer;
use BioMart::Configuration::MartDBLocation;
use BioMart::Configuration::MartURLLocation;
use BioMart::Web::SiteDefs;
use XML::Simple qw(:strict);
use XML::DOM;
use Cwd;
use File::Path;
use Storable qw(store retrieve freeze nfreeze thaw);
local $Storable::Deparse = 1;
$Storable::forgive_me = 1;
# Extends BioMart::Root
use base qw(BioMart::Root);
use vars qw($REGISTRY);
use constant DEFAULTSCHEMANAME => 'default';
use constant INITBATCHSIZE =>200;
use constant MAXBATCHSIZE => 50000;
use constant VERSION => '0.5';
#------------------------------------------------------------------------
=head2 new
Usage : my $initializer = BioMart::Initializer->new('registryFile'
=>$confFile)
Description: Builds BioMart configuration from a registry file.
Return type: BioMart::Initializer
Exceptions :
Caller :
=cut
sub _new {
my ($self, @params) = @_;
$self->SUPER::_new(@params);
my(%params) = @params;
my $registryFile=$params{'registryFile'};
if (!defined ($registryFile)){
BioMart::Exception::Configuration->throw ("Initializer needs a registry file");
}
$registryFile =~ m/(.*\/)([^\/]*)/;
$self->attr('confDir', $1);
$self->attr('cachedRegistries', $1.'cachedRegistries/');
$self->attr('regFileName', $2);
my $cahcedDirectory = $1.'cachedRegistries';
if(!-e $cahcedDirectory) { system("mkdir $cahcedDirectory"); }
#print "\n",$registryFile, "\n",$self->get('cachedRegistries'), "\n", $self->get('regFileName') ;
#print "\n\n\n", $self->get('registryFileDOM');
#exit;
$self->attr('dirPath', $1."Cached/"); ## absolute path to registry file directory
### this dollar one capturing in regex is path to registry folder and is used below at many places
my $copyRegistryFile = $1.'registry_DOM_XML';
$self->attr('registryFileDOM', $copyRegistryFile);
system("cp $registryFile $copyRegistryFile");
$self->attr('orderedLocations', undef);
my $mart_registry;
$self->attr('registry',undef);
$self->attr('configurationUpdate','true'); ## onlt used by martview, to guess if there was anything updated
$registryFile = $self->get('cachedRegistries') . $self->get('regFileName'); # changing it only for backup rules below
if ($params{'registryFile'} && ( defined($params{'action'}) && ($params{'action'} eq 'clean')))
{
if (-e "$registryFile.cached") { system("cp $registryFile.cached $registryFile.cached_backup"); }
if (-e "$registryFile.min_cached_mem") { system("cp $registryFile.min_cached_mem $registryFile.min_cached_mem_backup"); }
if (-e "$registryFile.min_cached_disk") { system("cp $registryFile.min_cached_disk $registryFile.min_cached_disk_backup"); }
if (-e "$1Cached") { system("cp -r $1Cached $1Cached_backup"); }
$mart_registry = $self->init_clean(@params);
}
elsif ($params{'registryFile'} && ( defined($params{'action'}) && ($params{'action'} eq 'update')))
{
if (-e "$registryFile.cached") { system("cp $registryFile.cached $registryFile.cached_backup"); }
if (-e "$registryFile.min_cached_mem") { system("cp $registryFile.min_cached_mem $registryFile.min_cached_mem_backup"); }
if (-e "$registryFile.min_cached_disk") { system("cp $registryFile.min_cached_disk $registryFile.min_cached_disk_backup"); }
if (-e "$1Cached") { system("cp -r $1Cached $1Cached_backup"); }
$self->_init(@params);
$mart_registry = $self->init_update(@params);
}
elsif ($params{'registryFile'} && ( defined($params{'action'}) && ($params{'action'} eq 'backup'))) ## just as default
{
if (-e "$registryFile.cached_backup") { system("mv $registryFile.cached_backup $registryFile.cached"); }
if (-e "$registryFile.min_cached_mem_backup") { system("mv $registryFile.min_cached_mem_backup $registryFile.min_cached_mem"); }
if (-e "$registryFile.min_cached_disk_backup") { system("mv $registryFile.min_cached_disk_backup $registryFile.min_cached_disk"); }
if (-e "$1Cached_backup") {
system("rm -r $1Cached");
system("mv $1Cached_backup $1Cached"); }
$mart_registry = $self->init_cached(@params);
}
else ### defaults now to cached options if ($params{'registryFile'} && ($params{'action'} eq 'cached' ))
{
$mart_registry = $self->init_cached(@params);
}
$self->set('registry',$mart_registry);
if (-e $self->get('registryFileDOM')) { unlink $self->get('registryFileDOM');}
}
#------------------------------------------------------------------------
=head2 _init
Usage : self->_init(@params) where @params as received by _new
Description: gets the contents of meta_conf table, and populates initializer object
Return type: none
Exceptions :
Caller : $self
=cut
sub _init
{
my ($self, @params) = @_;
my(%params) = @params;
$self->attr('path','/biomart/martservice');
if( defined $REGISTRY ){
$self->attr('registry', $REGISTRY);
return $self;
}
$REGISTRY = undef;
my $initbs = INITBATCHSIZE;
$initbs = $params{'init_batchsize'} if ($params{'init_batchsize'});
my $maxbs = MAXBATCHSIZE;
$maxbs = $params{'max_batchsize'} if ($params{'max_batchsize'});
$self->attr('init_batchsize',$initbs);
$self->attr('max_batchsize',$maxbs);
my $registryFile=$params{'registryFile'};
# set the registryXML with the initial XML
my $fh = IO::File->new($registryFile, "<") or
BioMart::Exception::Configuration->throw ("Unable to open configuration file '$registryFile', check file existence and permissions");
my $newxml;
while (<$fh>){
$newxml .= $_;
}
# close the network connection
close($fh);
#$self->_registryXML($newxml);
$self->_registryXML('','');
$self->set('registry',undef);
$fh = IO::File->new($registryFile, "<");
$self->_loadConfigFrom($fh);
$fh->close();
$REGISTRY = $self->_populateRegistry;
#---- setting DirPaths for registry and datasetI via Registry.pm,
#---- and making driectory structure based on available VS
$REGISTRY->setDirPath($self->get('dirPath'));
my $v_schemas = $REGISTRY->getAllVirtualSchemas();
foreach my $schema (@$v_schemas)
{
my $confDir = $self->get('dirPath').$schema->name()."/confTrees";
my $ex_im_portablesDir = $self->get('dirPath').$schema->name()."/_portables";
my $XMLDir = $self->get('dirPath').$schema->name()."/XML";
unless(-d $confDir)
{
mkpath($confDir, 1, 0711);
}
unless(-d $ex_im_portablesDir)
{
mkpath($ex_im_portablesDir, 1, 0711);
}
unless(-d $XMLDir)
{
mkpath($XMLDir, 1, 0711);
}
}
#----
unless (@{$REGISTRY->getAllVirtualSchemas} > 0)
{
BioMart::Exception::Configuration->throw(" Problems with the retrieval of dataset configuration
Please check:
that your mart Registry files contains correct connection params,
that you are using the correct version on XML::Simple,
that BioMart databases contain a populated meta_conf tables and
that you have set martUser correctly if you are running in restricted data
access mode (populated meta_conf__user__dm)\n\n");
}
$self->set('registry',$REGISTRY);
}
#------------------------------------------------------------------------
=head2 init_cached
Usage : $mart_registry = self->init_cached(@params) where @params as received by _new
Description: gets the mart_registry object, if possible from disk
otherwise reconfigure it from scratch
Return type: $mart_registry object
Exceptions :
Caller : $self
=cut
sub init_cached
{
my ($self, @params) = @_;
my(%params) = @params;
my $mart_registry;
my $registryFile = $self->get('cachedRegistries') . $self->get('regFileName');# pointing to cachedRegistries directory
#my $cachefile = $params{'registryFile'}.".cached";
my $cachefile = $registryFile.".cached";
if (-e $cachefile )
{
print STDERR "\nProcessing Cached Registry: $cachefile\n\n";
eval{ $mart_registry = retrieve($cachefile); };
$self->set('registry', $mart_registry);
$self->set('configurationUpdate','false');
}
else
{
print "\nCached Registry Unavailable...\n";
my $cachefile_min = undef;
#my $cachefile_min_disk = $params{'registryFile'}.".min_cached_disk";
my $cachefile_min_disk = $registryFile.".min_cached_disk";
#my $cachefile_min_mem = $params{'registryFile'}.".min_cached_mem";
my $cachefile_min_mem = $registryFile.".min_cached_mem";
my $previous_mode = undef;
if (-e $cachefile_min_disk)
{
$previous_mode = 'LAZYLOAD';
eval{ $mart_registry = retrieve($cachefile_min_disk); };
unlink $cachefile_min_disk;
}
if (-e $cachefile_min_mem)
{
$previous_mode = 'MEMORY';
eval{ $mart_registry = retrieve($cachefile_min_mem); };
unlink $cachefile_min_mem;
}
if(!$previous_mode)
{
print "\nRunning Complete Clean...\n";
$mart_registry = $self->init_clean(@params);
}
else
{
print "\n[RUNNING UPDATE]";
$self->_init(@params);
$mart_registry = $self->get('registry');
if (defined($params{'mode'}) && ($params{'mode'} eq 'lazyload'))
{
print ".... WITH LAZYLOADING\n";
$mart_registry->setMode('LAZYLOAD'); ### ========== should come here now, rather registry.
store($mart_registry, $cachefile_min_disk);
}
else ### default to --memory option
{
print " .... WITH MEMORY [default]\n";
store($mart_registry, $cachefile_min_mem);
}
$mart_registry->configure; # need to do this to load all dset links
store($mart_registry, $cachefile);
}
}
return $mart_registry;
}
#------------------------------------------------------------------------
=head2 init_clean
Usage : $mart_registry = self->init_clean(@params) where @params as received by _new
Description: reconfigure a new mart_regitry object,
requesting new XMLs from RDBMS
Return type: $mart_registry object
Exceptions :
Caller : $self
=cut
sub init_clean
{
my ($self, @params) = @_;
my(%params) = @params;
$self->_init(@params);
my $mart_registry = $self->get('registry');
my $registryFile = $self->get('cachedRegistries') . $self->get('regFileName');# pointing to cachedRegistries directory
#my $cachefile = $params{'registryFile'}.".cached";
my $cachefile = $registryFile.".cached";
#my $cachefile_min_disk = $params{'registryFile'}.".min_cached_disk";
my $cachefile_min_disk = $registryFile.".min_cached_disk";
#my $cachefile_min_mem = $params{'registryFile'}.".min_cached_mem";
my $cachefile_min_mem = $registryFile.".min_cached_mem";
if (-e $cachefile) { unlink $cachefile; }
if (-e $cachefile_min_disk)
{ unlink $cachefile_min_disk; }
if (-e $cachefile_min_mem)
{ unlink $cachefile_min_mem; }
$mart_registry->cleanXMLs(); ### clean all XMLs, should be implemented here, rather registry
if (defined($params{'mode'}) && ($params{'mode'} eq 'lazyload'))
{
print "\n[NEW CONFIGURATION] .... WITH LAZYLOADING\n";
$mart_registry->setMode('LAZYLOAD'); ### should be implemented here, rather registry
store($mart_registry, $cachefile_min_disk);
}
else ### default to --memory option
{
print "\n[NEW CONFIGURATION] .... WITH MEMORY [default]\n";
store($mart_registry, $cachefile_min_mem);
}
$mart_registry->configure; # need to do this to load all dset links
store($mart_registry, $cachefile);
return $mart_registry;
}
#------------------------------------------------------------------------
=head2 init_update
Usage : $mart_registry = self->init_update(@params) where @params as received by _new
Description: runs an update on existing XMLs based on their
modified DATE/TIME STAMP, and updates any XML
if needed, followed by reconfiguring registry object
Return type: $mart_registry object
Exceptions :
Caller : $self
=cut
sub init_update
{
my ($self, @params) = @_;
my(%params) = @params;
my $mart_registry = $self->get('registry');
my $registryFile = $self->get('cachedRegistries') . $self->get('regFileName');# pointing to cachedRegistries directory
my $reConfigure = 'false';
my $previous_mode = undef ; # 'MEMORY', or 'LAZYLOAD'
my $cachefile_min;
#my $cachefile = $params{'registryFile'}.".cached";
#my $cachefile_min_disk = $params{'registryFile'}.".min_cached_disk";
#my $cachefile_min_mem = $params{'registryFile'}.".min_cached_mem";
my $cachefile = $registryFile.".cached";
my $cachefile_min_disk = $registryFile.".min_cached_disk";
my $cachefile_min_mem = $registryFile.".min_cached_mem";
if (-e $cachefile_min_disk)
{
$cachefile_min = $cachefile_min_disk;
$previous_mode = 'LAZYLOAD';
}
if (-e $cachefile_min_mem)
{
$cachefile_min = $cachefile_min_mem;
$previous_mode = 'MEMORY';
}
if (! -e $cachefile || !$previous_mode)
{
print "\n[REGISTRY OBJECT DOESNT EXIST] Reconfiguring using possible cached information !!!\t";
$reConfigure = 'true';
}
else
{
my $mart_registry_min;
eval{ $mart_registry_min = retrieve($cachefile_min); }; # old mart registry from disk
my $v_schemasA = $mart_registry->getAllVirtualSchemas();
foreach my $schemaA (@$v_schemasA)
{
my $schemaB = $mart_registry_min->getVirtualSchemaByName($schemaA->name());
if( ($schemaB) && ($schemaA->name() eq $schemaB->name()) )
{
my $allLocationsA = $schemaA->getAllLocations();
my $allLocationsB = $schemaB->getAllLocations();
#print scalar @$allLocationsA, " ::: ", scalar @$allLocationsB, "\n";
if(scalar @$allLocationsA == scalar @$allLocationsB) ## check number of locations
{
for (my $i=0; $i < scalar @$allLocationsA; $i++)
{
#print $schemaA->name(), " :::: ", $$allLocationsB[$i]->name(), " : ", $$allLocationsB[$i]->host(), " : " ,$$allLocationsB[$i]->host() ;
if ( ( (defined($$allLocationsA[$i]->name()) || defined($$allLocationsB[$i]->name())) && ($$allLocationsA[$i]->name() ne $$allLocationsB[$i]->name()) )
|| ( (defined($$allLocationsA[$i]->displayName()) || defined($$allLocationsB[$i]->displayName())) && ($$allLocationsA[$i]->displayName() ne $$allLocationsB[$i]->displayName()) )
|| ( (defined($$allLocationsA[$i]->host()) || defined($$allLocationsB[$i]->host())) && ($$allLocationsA[$i]->host() ne $$allLocationsB[$i]->host()) )
|| ( (defined($$allLocationsA[$i]->port()) || defined($$allLocationsB[$i]->port())) && ($$allLocationsA[$i]->port() ne $$allLocationsB[$i]->port()) )
|| ( (defined($$allLocationsA[$i]->default()) || defined($$allLocationsB[$i]->default())) && ($$allLocationsA[$i]->default() ne $$allLocationsB[$i]->default()) )
|| ( (defined($$allLocationsA[$i]->visible()) || defined($$allLocationsB[$i]->visible())) && ($$allLocationsA[$i]->visible() ne $$allLocationsB[$i]->visible()) )
|| ( (defined($$allLocationsA[$i]->includeDatasets()) || defined($$allLocationsB[$i]->includeDatasets())) && ($$allLocationsA[$i]->includeDatasets() ne $$allLocationsB[$i]->includeDatasets()) )
|| ( (defined($$allLocationsA[$i]->martUser()) || defined($$allLocationsB[$i]->martUser())) && ($$allLocationsA[$i]->martUser() ne $$allLocationsB[$i]->martUser()) )
|| ( (defined($$allLocationsA[$i]->schema()) || defined($$allLocationsB[$i]->schema())) && ($$allLocationsA[$i]->schema() ne $$allLocationsB[$i]->schema()) )
|| ( (defined($$allLocationsA[$i]->databaseType()) || defined($$allLocationsB[$i]->databaseType())) && ($$allLocationsA[$i]->databaseType() ne $$allLocationsB[$i]->databaseType()) )
|| ( (defined($$allLocationsA[$i]->database()) || defined($$allLocationsB[$i]->database())) && ($$allLocationsA[$i]->database() ne $$allLocationsB[$i]->database()) )
|| ( (defined($$allLocationsA[$i]->user()) || defined($$allLocationsB[$i]->user())) && ($$allLocationsA[$i]->user() ne $$allLocationsB[$i]->user()) )
|| ( (defined($$allLocationsA[$i]->password()) || defined($$allLocationsB[$i]->password())) && ($$allLocationsA[$i]->password() ne $$allLocationsB[$i]->password()) )
|| ( (defined($$allLocationsA[$i]->proxy()) || defined($$allLocationsB[$i]->proxy())) && ($$allLocationsA[$i]->proxy() ne $$allLocationsB[$i]->proxy()) )
|| ( (defined($$allLocationsA[$i]->path()) || defined($$allLocationsB[$i]->path())) && ($$allLocationsA[$i]->path() ne $$allLocationsB[$i]->path()) )
|| ( (defined($$allLocationsA[$i]->serverVirtualSchema()) || defined($$allLocationsB[$i]->serverVirtualSchema())) && ($$allLocationsA[$i]->serverVirtualSchema() ne $$allLocationsB[$i]->serverVirtualSchema()) ) )
{
$reConfigure = 'true'; #### Location Parameters are different
#print "IAM HERE, as PARAMETERS DIFFER\n";
}
}
if ($reConfigure ne 'true')
{ my $databasesA = $mart_registry->getAllDatabaseNames($schemaA->name()); ## databases are locations as per old API calls
foreach my $database_nameA (@$databasesA)
{
my $datasetsA = $mart_registry->getAllDataSetsByDatabaseName($schemaA->name(), $database_nameA);
foreach my $dataset_nameA(@$datasetsA)
{
## dataset is of type TABLESET/GS so you can call methods of DATASETI on it.
my $datasetA = $mart_registry->getDatasetByName($schemaA->name(), $dataset_nameA);
my $datasetB = $mart_registry_min->getDatasetByName($schemaA->name(), $dataset_nameA);
if ($datasetA->modified() ne $datasetB->modified())
{
# unlink the xml file, if it exists
#print "\n I'm NOT HAPPY with modified DATE : TIME, b'coz have to out extra effort now...";
my $cleanFile .= $self->get('dirPath').$schemaA->name()."/XML/";
$cleanFile .= $datasetB->locationName().".".$datasetB->name();
#$cleanFile .= $datasetB->getParam('configurator')->get('location')->database().".".$datasetB->name();
my $interfacesList = $datasetB->interfaces(); # should return a comma separated list of interfaces
my @interfacesArray = split /,/,$interfacesList;
foreach my $interface(@interfacesArray)
{
my $temp = $cleanFile;
$temp .= ".".$interface;
if(-e $temp) { unlink $temp; } #### may be its a new dataset
}
$reConfigure = 'true';
}
}
}
}
}
else ### reconfigure because number of locations under the same virtual schema differ
{
$reConfigure = 'true';
}
}
else ## reconfigure because, virtualschemas name is different
{
$reConfigure = 'true';
}
}
}
### Reconfigure Registry object again
if ($reConfigure eq 'true' || ($previous_mode eq 'MEMORY' && $params{'mode'} eq 'lazyload') || ($previous_mode eq 'LAZYLOAD'
&& $params{'mode'} ne 'lazyload'))
{
if ($reConfigure eq 'false') {$self->set('configurationUpdate','false'); }
if (-e $cachefile) { unlink $cachefile; }
if (-e $cachefile_min_disk) { unlink $cachefile_min_disk; }
if (-e $cachefile_min_mem) { unlink $cachefile_min_mem; }
if ((defined($params{'mode'}) && ($params{'mode'} eq 'lazyload')))
{
print "\n[UPDATING] .... WITH LAZYLOADING\n";
$mart_registry->setMode('LAZYLOAD'); ### needs to be shifted here, I guess rather in registry
store($mart_registry, $cachefile_min_disk);
}
else ### default to --memory option
{
print "\n[UPDATING] .... WITH MEMORY [default]\n";
store($mart_registry, $cachefile_min_mem);
}
$mart_registry->configure; # need to do this to load all dset links
store($mart_registry, $cachefile);
}
else ### retrieve the old mart registry and set it to current object;
{
#my $existingRegistry = $params{'registryFile'}.".cached";
my $existingRegistry = $registryFile.".cached";
eval{ $mart_registry = retrieve($existingRegistry); };
$self->set('configurationUpdate','false');
}
return $mart_registry;
}
=head2 configurationUpdated
Usage : $init->configurationUpdated()
Description: Returns true or false to check if there was anything updated, ONLY for martview
Return type: true/false
Exceptions :
Caller : configure.pl
=cut
sub configurationUpdated
{
my ($self) = @_;
return $self->get('configurationUpdate');
}
=head2 getRegistry
Usage : my $registry = $initializer->getRegistry();
Description: Returns the BioMart::Registry object
containing information for all loaded
BioMart::Dataset objects.
Return type: BioMart::Registry
Exceptions :
Caller :
=cut
sub getRegistry {
my $self = shift;
# temper registry object to embed settings.conf parameters.
my $registryObj = $self->get('registry');
my $settingsHash = BioMart::Web::SiteDefs->getSettings($self->get('confDir'));
$registryObj->settingsParams($settingsHash);
return $registryObj;
}
=head2 reloadRegistry
Usage : $initializer->reloadRegistry();
Description: adds an already created registry object
Return type:
Exceptions :
Caller :
=cut
sub reloadRegistry
{
my $self = shift;
$REGISTRY = $self->get('registry');
}
#--------------------------------------------------------------
# $source can be an IO::Handle object, or a
# string of xml text. See the documentation
# for XML::Simple::XMLin for details. If $source
# is null, returns undef
sub _loadConfigFrom {
my ($self, $source, $vSchemaName, $vSchemaDisplayName, $includeMarts,$proxy) = @_;
return undef unless($source);
#-------------------
my $hashLocations;
my $configurePass = 0;
my $parserDOM = XML::DOM::Parser->new();
my $doc = $parserDOM->parsefile($self->get('registryFileDOM'));
my $vSchemaNodes = $doc->getElementsByTagName('virtualSchema');
if($vSchemaNodes->getLength() > 0) {
if($vSchemaNodes->getLength() == 1 && !$vSchemaNodes->[0]->getAttribute('default')) {
$configurePass = 1;
}
foreach my $vSchemaNode(@$vSchemaNodes) { ## check if there exists a VS with a default=1 otherwise no need to configure
if ($vSchemaNode->getAttribute('default')) {
$configurePass = 1;
}
}
if (!$configurePass) {
BioMart::Exception::Configuration->throw("\n\t\tInitializer.pm: Set at least one virtaulSchema attribute default=\"1\" ");
exit;
}
foreach my $vSchemaNode(@$vSchemaNodes) {
my $children = $vSchemaNode->getChildNodes;
if($children) {
foreach my $childNode (@$children) {
if($childNode->isa('XML::DOM::Element')) {
push @{$hashLocations->{$vSchemaNode->getAttribute('name')}}, $childNode->getAttribute('name');
}
}
}
}
}
else ## assume its a 'default' VS
{
my $martRegistryNode = $doc->getElementsByTagName('MartRegistry');
foreach my $allNodes (@$martRegistryNode) {
my $node = $allNodes->[0];
foreach my $location (@$node) {
if($location->isa('XML::DOM::Element')) {
push @{$hashLocations->{'default'}}, $location->getAttribute('name');
}
}
}
}
$self->set('orderedLocations',$hashLocations);
$doc->dispose();
my $config = XMLin($source, forcearray=> [qw(virtualSchema
RegistryDBPointer RegistryURLPointer MartDBLocation MartURLLocation)],
keyattr => []);
#the first time this method is called, $vSchemaName will be null,
#which signals it to load locations without a virtualSchema into
#defaultSchema. Subsequent recursive calls will have $vSchemaName
#defined, which will signal it to load locations without a
#virtualSchema into the given $vSchemaName (which could actually
#still be the defaultSchema from the original call).
#Thus, locations without a virtualSchema can be explicitly
#defined into a virtualSchema at the Registry level, by
#placing a virtualSchema wrapper around the RegistryDBLocation, but
#any location within a virtualSchema wrapper in the registry pointed
#to down the chain will override this virtualSchema
my $registry = $self->get('registry');
if (!defined $registry) {
$registry = BioMart::Registry->new();
$self->set('registry', $registry);
}
my $dSchema = (defined($vSchemaName)) ? $vSchemaName : DEFAULTSCHEMANAME;
my $schemaDisplayName = (defined($vSchemaDisplayName)) ?
$vSchemaDisplayName : DEFAULTSCHEMANAME;
my $virtualSchema = $registry->getVirtualSchemaByName(DEFAULTSCHEMANAME);
if (!defined $virtualSchema) {
$virtualSchema = BioMart::Configuration::VirtualSchema->new(
name => $dSchema,
displayName => $schemaDisplayName
);
$virtualSchema->visible(0);
}
$virtualSchema = $self->_loadLocationsFrom($virtualSchema, $config,
$includeMarts,$proxy);
if (!defined $registry->getVirtualSchemaByName(DEFAULTSCHEMANAME)) {
$registry->addVirtualSchema($virtualSchema)
if ($virtualSchema && (@{$virtualSchema->getAllLocations} > 0));
}
foreach my $vSchemaNode (@{$config->{'virtualSchema'}}) {
$virtualSchema = $registry->getVirtualSchemaByName($vSchemaNode->{'name'});
if (!defined $virtualSchema){
$virtualSchema = BioMart::Configuration::VirtualSchema->new(
name => $vSchemaNode->{'name'},
displayName => $vSchemaNode->{'displayName'} || ''
);
$virtualSchema->visible(0) if (!$vSchemaNode->{'visible'});
$registry->addVirtualSchema($virtualSchema);
}
$virtualSchema = $self->_loadLocationsFrom($virtualSchema,
$vSchemaNode, $includeMarts,$proxy);
}
$self->set('registry', $registry);
$registry->toXML($self->_registryXML);
}
sub _loadLocationsFrom {
my ($self, $virtualSchema, $node, $includeMarts,$proxy) = @_;
my $vSchemaName = $virtualSchema->name;
my $vSchemaDisplayName = $virtualSchema->displayName;
if ($node->{'default'}){
$virtualSchema->default(1);
}
#-------------------------------------------------------
my $orderedLocations = $self->get('orderedLocations');
# print "\n ============ ", $vSchemaName;
if($self->get('orderedLocations')->{$vSchemaName})
{
foreach my $locationName(@{$self->get('orderedLocations')->{$vSchemaName}})
{
#print "\nDOM LOCATION ==== ",$locationName, "\n";
my @rtypes=('RegistryDBPointer','RegistryURLPointer');
foreach my $rtype (@rtypes){
foreach my $regdbloc (@{$node->{$rtype}}) {
#print "\nREG LOCATION ==== ",$regdbloc->{'name'}, "\n";
if ($regdbloc->{'name'} eq $locationName)
{
$self->_setRegistryPointer($rtype,$vSchemaName, $vSchemaDisplayName,$regdbloc);
}
}
}
my @mtypes=('MartDBLocation','MartURLLocation');
foreach my $mtype (@mtypes){
foreach my $dbloc (@{ $node->{$mtype} }) {
if ($dbloc->{'name'} eq $locationName) {
# if includeMarts set then check if on list - if not next
if ($includeMarts){
my $seen;
foreach my $martName(split(/,/,$includeMarts)) {
if ($martName !~ /\./){
$martName = 'default.'.$martName;
}
if ($vSchemaName.'.'.$dbloc->{'name'} eq $martName) {
$seen++;
last;
}
}
next if (!$seen);
}
my $martLocation = $self->_setMartLocation($mtype,$virtualSchema,$dbloc,$proxy);
next if (!$martLocation);
# serverVirtualSchema for martservice,
if (!$dbloc->{'serverVirtualSchema'}) {
$dbloc->{'serverVirtualSchema'} = $virtualSchema->name();
}
$self->_registryXML($mtype, $dbloc);
$virtualSchema->addLocation($martLocation);
}
}
}
}
}
#-------------------------------------------------------
warn("\n");
# validation of Registry file
my @knownTypes=('virtualSchema', 'RegistryDBPointer','RegistryURLPointer',
'MartDBLocation','MartURLLocation','DatabaseLocation','RegistryDBLocation');
foreach my $locType (keys %$node){
# skip empty keys
next if (!ref $node->{$locType} || !@{$node->{$locType}});
if (! grep $locType eq $_, @knownTypes){
warn("... Unknown location type:$locType\n");
next;
}
foreach my $loc (@{$node->{$locType}}){
# replace warns with die before 0_4 release
warn("Warning: DatabaseLocation is replaced with MartDBLocation in 0_4. Fix your registry for ".$loc->{'name'}."\n")
if ($locType eq 'DatabaseLocation');
warn("Warning: RegistryDBLocation is replaced with RegistryDBPointer in 0_4. Fix your registry for ".$loc->{'name'}."\n")
if ($locType eq 'RegistryDBLocation');
}
}
warn("\n");
return $virtualSchema;
}
sub _setRegistryPointer {
my ($self,$type,$vSchemaName,$vSchemaDisplayName,$regdbloc)=@_;
my $pointer = $self->_setLocation($type,$regdbloc);
my $regXML = $pointer->getRegistryXML();
my $regDOMFILE = $self->get('registryFileDOM');
open (STDXML, ">$regDOMFILE");
print STDXML $regXML;
close(STDXML);
$self->_loadConfigFrom($regXML, $vSchemaName,$vSchemaDisplayName,$regdbloc->{'includeMarts'},$regdbloc->{'proxy'});
}
sub _setMartLocation {
my ($self,$type,$virtualSchema,$dbloc,$proxy)=@_;
my $martLocation=$self->_setLocation($type,$dbloc,$proxy);
return $martLocation;
}
sub _setLocation {
my ($self,$type,$dbloc,$proxy)=@_;
my $module = sprintf("BioMart::Configuration::%s", $type);
$self->loadModule($module);
# validate parameters
my (@required,@optional,$name);
if ($type eq 'RegistryURLPointer'){
@required = qw(host port);
@optional = qw(includeMarts path);
}
elsif ($type eq 'RegistryDBPointer'){
@required = qw(host port database schema databaseType user password);
@optional = qw(includeMarts);
}
elsif ($type eq 'MartURLLocation'){
@required = qw(name displayName host port);
@optional = qw(serverVirtualSchema visible default martUser
includeDatasets path);
}
elsif ($type eq 'MartDBLocation'){
@required = qw(name displayName host port schema databaseType database
user password);
@optional = qw(visible default martUser includeDatasets );
}
foreach (@required){
$name = defined $dbloc->{'name'} ? $dbloc->{'name'} : '';
# replace warn with die for 0_4 release
if (!defined($dbloc->{$_}))
{
BioMart::Exception::Configuration->throw("Initializer.pm: No setting for required parameter $_ in $type location:$name, Please check your registry file for parameter '$_' ");
}
}
foreach (@optional){
$name = defined $dbloc->{'name'} ? $dbloc->{'name'} : '';
# replace warn with die for 0_4 release
warn("Optional setting for $_ in $type location:$name not defined - setting to default values \n") if (!defined($dbloc->{$_}));
}
my $location;
eval {
$location = $module->new(
name => $dbloc->{'name'},
displayName => $dbloc->{'displayName'},
host => $dbloc->{'host'},
port => $dbloc->{'port'},
default => $dbloc->{'default'} || '',
visible => $dbloc->{'visible'} || 0,
includeDatasets => $dbloc->{'includeDatasets'} || '',
martUser => $dbloc->{'martUser'} || '',
schema => $dbloc->{'schema'},
databaseType => $dbloc->{'databaseType'},
database => $dbloc->{'database'},
user => $dbloc->{'user'},
password =>$dbloc->{'password'},
proxy => $dbloc->{'proxy'} || $proxy,
path => $dbloc->{'path'} || $self->get('path'),
serverVirtualSchema => $dbloc->{'serverVirtualSchema'} || 'default',);
};
if($@ || !$location) {
warn("\n\n\n !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
\n COULD NOT CONNECT TO DATABASE ".$dbloc->{'database'}.".CHECK YOUR SETTINGS\n
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! \n\n\n");
}
else {
if($dbloc->{'displayName'}){
print STDERR "\nConnection parameters of [".$dbloc->{'displayName'}."]\t[ OK ]";
}
}
return $location;
}
sub _populateRegistry {
my $self = shift;
my $registry = $self->get('registry');
my %configurators;
# below stops ref problems if virtualSchemas get removed during loop
my $virtualSchemas = [ @{$registry->getAllVirtualSchemas} ];
foreach my $virtualSchema (@{$virtualSchemas}){
# below stops ref problems if locations get removed during loop
my $locations = [ @{$virtualSchema->getAllLocations} ];
foreach my $location (@{$locations}){
############################################### hack to change include datasets list to include invisible ones too
############################################### For 0.6, simply shift delete this code in # OR the following if block
if($location->includeDatasets())
{
my %configurators_1;
my @dataSets = $location->retrieveDatasetInfo($virtualSchema->name, $virtualSchema->default);
my %pointerDS;
if(!scalar (@dataSets))
{ my $name=$location->name;
#BioMart::Exception::Configuration->throw("\n
#No datasets available with given parameters for Location: $name\n");
warn("\n
No datasets available with given parameters for Location: $name\n");
}
foreach my $datasetData (@dataSets){
my $dataSetName = lc($datasetData->{'dataset'});
if ($virtualSchema->getDatasetByName($dataSetName)){
BioMart::Exception::Configuration->throw("
Dataset '${dataSetName}' is duplicated in virtualSchema '$virtualSchema->name'
Please rename datasets in meta_conf tables or separate conflicting datasets
into different virtual schemas in defaultMartRegistry.xml\n\n");
}
my $configuratorKey_1;
if($location->database)
{
$configuratorKey_1 = $location->database.'_'.$location->host.'_'.$location->port;
}
else ## dicty case
{
$configuratorKey_1 = $location->name.'_'.$location->host.'_'.$location->port;
}
my $configurator_1 = $configurators_1{$configuratorKey_1};
if (!defined $configurator_1) {
$configurator_1 = BioMart::Configurator->new($registry,
$location);
$configurators_1{$configuratorKey_1} = $configurator_1;
}
my $datasetModule =
sprintf("BioMart::Dataset::%s", $datasetData->{'type'});
$self->loadModule($datasetModule);
my $dataset = $datasetModule->new(
'name' => $datasetData->{'dataset'},
'display_name' => $datasetData->{'displayName'} || '',
'configurator' => $configurator_1,
'initial_batchsize' => $datasetData->{'initialBatchSize'} ||
$self->get('init_batchsize'),
'max_batchsize' => $datasetData->{'maxBatchSize'} ||
$self->get('max_batchsize'),
'visible' => $datasetData->{'visible'} || 0,
'version' => $datasetData->{'version'} || '',
'interfaces' => $datasetData->{'interfaces'} ||
'default',
'modified' => $datasetData->{'modified'} ||
'MODIFIED_UNAVAILABLE',
'locationDisplayName' => $location->displayName,
'locationName' => $location->name,
'virtualSchema' => $virtualSchema->name);
my $configTree;
my $xml;
my @interfaces = split(/\,/,$dataset->interfaces);
foreach my $interface(@interfaces){
#$configTree = $dataset->getConfigurationTree($interface,'CREATE_ALL_LINKS');
$xml = $dataset->getConfigurator->get('location')->getDatasetConfigXML($virtualSchema->name,
$dataset->name,
$interface,
0,1); #last one is for not printing configure message
}
my $tempXMLHash = XMLin($xml, forcearray => [qw(AttributePage AttributeGroup
AttributeCollection AttributeDescription FilterPage FilterGroup
FilterCollection FilterDescription Importable Exportable Key
MainTable BatchSize SeqModule Option PushAction)], keyattr => []);
my $softwareVersion = $tempXMLHash->{'softwareVersion'};
if (!$softwareVersion || ($softwareVersion eq '0.4'))
{
#print STDERR "-> upgrading to 0.5 ... ";
my $params=BioMart::Web::CGIXSLT::read_https();
open(STDOUTTEMP, ">temp.xml");
print STDOUTTEMP $xml;
close(STDOUTTEMP);
$params->{'source'} = 'temp.xml';
$params->{'style'} = $self->get('confDir').'/mart_0_4_0_5.xsl';
my $new_xml;
eval{$new_xml=BioMart::Web::CGIXSLT::transform();};
if($@){BioMart::Web::CGIXSLT::print_error("Exception: Configurator Cannot parse xml as per xsl. $@\n"); exit;};
#Now, we are printing and saving what we get
$xml = BioMart::Web::CGIXSLT::print_output($new_xml);
if (-e 'temp.xml')
{
unlink 'temp.xml';
}
}
my $xmlHash = XMLin($xml, forcearray => [qw(AttributePage AttributeGroup
AttributeCollection AttributeDescription AttributeList FilterPage FilterGroup
FilterCollection FilterDescription Importable Exportable Key
MainTable BatchSize SeqModule Option PushAction)], keyattr => []);
foreach my $xmlAttributeTree (@{ $xmlHash->{'AttributePage'} }) {
next if ($xmlAttributeTree->{'hidden'} && $xmlAttributeTree->{'hidden'} eq 'true');
foreach my $xmlAttributeGroup (@{ $xmlAttributeTree->{'AttributeGroup'} }) {
next if ($xmlAttributeGroup->{'hidden'} && $xmlAttributeGroup->{'hidden'} eq 'true');
foreach my $xmlAttributeCollection(@{ $xmlAttributeGroup->{'AttributeCollection'} }) {
next if ($xmlAttributeCollection->{'hidden'} && $xmlAttributeCollection->{'hidden'}eq 'true');
foreach my $xmlAttribute (@{ $xmlAttributeCollection->{'AttributeDescription'} }) {
next if ($xmlAttribute->{'hidden'} && $xmlAttribute->{'hidden'} eq 'true');
if ($xmlAttribute->{'pointerDataset'}) ## ACTION TIME
{
$pointerDS{$xmlAttribute->{'pointerDataset'}}++; # increamenting for debugggni purpose only
}
}
}
}
}
foreach my $xmlFilterTree (@{ $xmlHash->{'FilterPage'} }) {
next if ($xmlFilterTree->{'hidden'} && $xmlFilterTree->{'hidden'} eq 'true');
foreach my $xmlFilterGroup (@{ $xmlFilterTree->{'FilterGroup'} }) {
next if ($xmlFilterGroup->{'hidden'} && $xmlFilterGroup->{'hidden'} eq 'true');
foreach my $xmlFilterCollection (@{ $xmlFilterGroup->{'FilterCollection'} }) {
next if ($xmlFilterCollection->{'hidden'} && $xmlFilterCollection->{'hidden'} eq 'true');
foreach my $xmlFilter (@{ $xmlFilterCollection->{'FilterDescription'} }) {
next if ($xmlFilter->{'hidden'} && $xmlFilter->{'hidden'} eq 'true');
if ($xmlFilter->{'pointerDataset'}) ## ACTION TIME
{
$pointerDS{$xmlFilter->{'pointerDataset'}}++;# increamenting for debugggni purpose only
}
}
}
}
}
} ## end of for loop each dataset
if (%pointerDS) {
## first add the ones which already exists
my @oldList = split (/\,/,$location->includeDatasets());
foreach (@oldList){
$pointerDS{$_}++; # increamenting for debugggni purpose only
}
my $includeList;
foreach (keys %pointerDS) {
if($includeList){ $includeList .= ','.$_ ; }
else {$includeList .= $_ ;}
}
$location->includeDatasets($includeList);
}
} ## end of if block - hack for tempering includeDataset list
##################################################################################
my @datasets = $location->retrieveDatasetInfo($virtualSchema->name, $virtualSchema->default);
if(!@datasets)
{ my $name=$location->name;
#BioMart::Exception::Configuration->throw("\n
#No datasets available with given parameters for Location: $name\n");
warn("\n
No datasets available with given parameters for Location: $name\n");
}
foreach my $datasetData (@datasets){
my $dataSetName = lc($datasetData->{'dataset'});
if ($virtualSchema->getDatasetByName($dataSetName)){
BioMart::Exception::Configuration->throw("
Dataset '${dataSetName}' is duplicated in virtualSchema '$virtualSchema->name'
Please rename datasets in meta_conf tables or separate conflicting datasets
into different virtual schemas in defaultMartRegistry.xml\n\n");
}
my $configuratorKey;
if($location->database)
{
$configuratorKey = $location->database.'_'.$location->host.'_'.$location->port;
}
else ## dicty case
{
$configuratorKey = $location->name.'_'.$location->host.'_'.$location->port;
}
my $configurator = $configurators{$configuratorKey};
if (!defined $configurator) {
$configurator = BioMart::Configurator->new($registry,
$location);
$configurators{$configuratorKey} = $configurator;
}
my $datasetModule =
sprintf("BioMart::Dataset::%s", $datasetData->{'type'});
$self->loadModule($datasetModule);
my $dataset = $datasetModule->new(
'name' => $datasetData->{'dataset'},
'display_name' => $datasetData->{'displayName'} || '',
'configurator' => $configurator,
'initial_batchsize' => $datasetData->{'initialBatchSize'} ||
$self->get('init_batchsize'),
'max_batchsize' => $datasetData->{'maxBatchSize'} ||
$self->get('max_batchsize'),
'visible' => $datasetData->{'visible'} || 0,
'version' => $datasetData->{'version'} || '',
'interfaces' => $datasetData->{'interfaces'} ||
'default',
'modified' => $datasetData->{'modified'} ||
'MODIFIED_UNAVAILABLE',
'locationDisplayName' => $location->displayName,
'locationName' => $location->name,
'virtualSchema' => $virtualSchema->name);
if ($location->isa("BioMart::Configuration::MartURLLocation")){
$dataset->serverType("web");
}
else{
$dataset->serverType("rdbms");
$dataset->schema($location->schema);
}
$location->addDataset($dataset);
}
if (@{$location->getAllDatasets} == 0){
$virtualSchema->removeLocation($location);
}
}
if (@{$virtualSchema->getAllLocations} == 0){
$registry->removeVirtualSchema($virtualSchema);
}
}
return $registry;
}
sub _registryXML {
my ($self, $type, $contentsHash) = @_;
if ($type && $contentsHash) {
my @attributes = qw(name displayName host port schema serverVirtualSchema databaseType database user password visible default martUser includeDatasets path redirect proxy );
my $reg_xml = $self->{'registryXML'};
my $node = '';
# first time here, add XML DOC, and MartRegistry Tags
if (!$reg_xml)
{
$reg_xml .= "<\!DOCTYPE MartRegistry><\/MartRegistry>";
}
$node .= "<$type ";
foreach my $key(@attributes)
{
no warnings 'uninitialized';
if(exists $contentsHash->{$key})
{
# print "\n LINE ", $key . ' = ' . '"' . $contentsHash->{$key}. '"';
$node .= $key . ' = ' . '"' . $contentsHash->{$key}. '" ';
}
}
$node .= " \/>";
$node .= "<\/MartRegistry>";
$reg_xml =~ s/<\/MartRegistry>/$node/;
$self->{'registryXML'} = $reg_xml;
}
return $self->{'registryXML'};
}
1;
# vim: et