Raw content of BioMart::Query
#
# BioMart module for BioMart::Query
#
# You may distribute this module under the same terms as perl itself
# POD documentation - main docs before the code
=head1 NAME
BioMart::Query
=head1 SYNOPSIS
Object which encapsulates a Query against a BioMart Dataset.
=head1 DESCRIPTION
The BioMart::Query object encapsulates a query against a BioMart Dataset.
This may involve complex data merging between multiple Datasets and the
chosen Dataset, using the Links system. Query objects hold lists of
BioMart::Configuration::Attribute objects, and lists of
BioMart::Configuration::BaseFilter implementing objects. They can also hold
one or more BioMart::Configuration::AttributeList objects
representing the Exportables from Datasets Exporting their ResultTable
to some other Dataset via a Link. They can also hold one or more
BioMart::Links objects describing requests to linking Multiple Datasets
together using their Exportable - Importable relationship.
=head1 AUTHOR - Arek Kasprzyk, Syed Haider, Darin London, Damian Smedley
=head1 CONTACT
This module is part of the BioMart project http://www.biomart.org
Questions can be posted to the mart-dev mailing list:
mart-dev@ebi.ac.uk
=head1 METHODS
=cut
package BioMart::Query;
use strict;
use warnings;
use Digest::MD5;
use XML::Simple qw(:strict);
use Data::Dumper;
use base qw(BioMart::Root);
=head2 _new
Usage : my $query = BioMart::Query->new;
Description: creates an empty Query object.
Returntype : BioMart::Query
Exceptions : none
Caller : caller
=cut
sub _new {
my ($self, @param) = @_;
local($^W) = 0; # prevent "odd number of elements" warning with -w.
my(%param) = @param;
my $registry = $param{'registry'};
$self->attr('registry',$registry);
# empty/default settings for all constructed Query objects
$self->attr('dataset_names', {});
$self->attr('ordered_dataset_names', []);
$self->attr('attributes', []);
$self->attr('filters', []);
$self->attr('limitStart',undef);
$self->attr('limitSize',undef);
$self->attr('count',undef);
$self->attr('header',undef);
$self->attr('completionStamp',undef);
$self->attr('virtualSchema', undef);
$self->attr('attribute_lists', []);
$self->attr('links', []);
$self->attr('orderby',undef);
$self->attr('oldFilterListValues',{});
$self->attr('formatter', 'TSV');#default is tab sep output
$self->attr('finalDatasetOrder', undef);
$self->attr('softwareVersion', undef);
$self->attr('currentDS', undef);
$self->attr('attsAndAttListsForXMLDisplay', undef);
my $virtualSchemaName = $param{'virtualSchemaName'};
if (!defined $virtualSchemaName){
BioMart::Exception::Query->throw ("You need to define virtual schema name in order to create a Query object");
}
$self->virtualSchema($virtualSchemaName);
if ($param{'xml'}){
# populate query object from XML string
$self->_populateFromXML($param{'xml'});
}
elsif ($param{'mql'}){
# TODO: populate query object from MQL string
}
elsif ($param{'cgi'}){
# TODO: populate query object from CGI object
}
}
=head2 toXML
Usage : my $query = BioMart::Query->newFromXML($xml);
Description: creates a populated Query object.
Returntype : BioMart::Query
Exceptions : none
Caller : caller
=cut
sub toXML {
my ($self,$limit_start,$limit_size,$count, $webClientTempering) = @_;
my $registry = $self->getRegistry;
### incase this query has been declared via QR, or its a martservice query where NO version
### was specified and any other martservice query. martservice queries which needs to be directed to
### another martservice, eg dicty tends to lose software version because of XML to QUERY and back to XML
### conversion
if(!$self->get('softwareVersion') )
{
my $virtualSchema = $self->_getSchemaName($self->virtualSchema);
my $datasetNames = $self->getDatasetNames;
foreach my $datasetName(@$datasetNames)
{
my $queryAtts = $self->getAllAttributes($datasetName);
my $dataset = $registry->getDatasetByName($virtualSchema, $datasetName);
my $confTree = $dataset->getConfigurationTree($self->getInterfaceForDataset($datasetName));
$self->set('softwareVersion', $confTree->software_version);
}
}
if($webClientTempering) # only for display purpose of Martview
{
undef $limit_start;
undef $limit_size;
undef $count;
if($self->get('softwareVersion') eq '0.4')
{
my $xml = $self->_toXML_old($limit_start,$limit_size,$count);
return $xml;
}
else ## latest xml query
{
my $xml = $self->_toXML_latest($limit_start,$limit_size,$count);
return $xml;
}
}
else ### for rest of the API calls; Query, QueryRunner, Tableset and family
{
my $xml = $self->_toXML_old($limit_start,$limit_size,$count);
return $xml;
}
}
=head2 _getSchemaName
Usage : internal function, called by toXML and _toXML_latest
Description: to check if the given schema name is a URLpointer serverVirtualSchema or
a VirtualSchema from local registry
Returntype : schemaName
Exceptions : none
Caller : caller
=cut
sub _getSchemaName
{
my ($self,$trickyVSchema) = @_;
my $registry = $self->getRegistry;
my $datasetNames = $self->getDatasetNames;
my $schemaExists = 0;
foreach my $schema (@{$registry->getAllVirtualSchemas()})
{
if ($schema->name eq $trickyVSchema)
{ $schemaExists = 1; } ## fine
}
if($schemaExists == 0) # may be dicty so look into location->servervirtualschema for match
{
my $realSchema = $trickyVSchema;
foreach my $schema (@{$registry->getAllVirtualSchemas()})
{
my $allMarts = $schema->getAllMarts();
foreach my $mart (@$allMarts)
{
if($mart->serverVirtualSchema eq $trickyVSchema)
{
$realSchema = $schema->name();
}
}
}
$trickyVSchema = $realSchema;
}
return $trickyVSchema;
}
=head2 _visibleDataset
Usage : internal function, called by _toXML_latest
Description: to check if datasets is visible or not
Returntype : true/false
Exceptions : none
Caller : caller
=cut
sub _visibleDataset
{
my ($self,$dataset) = @_;
my $registry = $self->getRegistry;
my $virtualSchema = $self->_getSchemaName($self->virtualSchema);
my $datasetNames = $self->getDatasetNames;
foreach my $datasetName(@$datasetNames)
{
if($dataset eq $datasetName)
{
# if($virtualSchema eq 'dictyMart')
# {$virtualSchema = 'dicty';}
my $datasetObj = $registry->getDatasetByName($virtualSchema, $datasetName);
if ($datasetObj->visible)
{ return 1; }
else
{ return 0; }
}
}
}
sub getActualDS
{
my ($self, $dataset, $vDataset) = @_ ;
my $actualDS;
my $links;
#$links = $registry->__Dijkstra($self->_getSchemaName($self->virtualSchema), $_);
## magic to find out e.g peptide or any genomic sequence attribute, which dataset it belongs to
## say if we have a query with hsapiens_gene_ensembl and hsapiens_gene_vega, and a peptide from
## hsapiens_genomic_sequence
my $allLinks = $self->get('links');
foreach my $link (@$allLinks)
{
$links->{$link->targetDataset()} = $link->sourceDataset();
}
my $interface = $self->getInterfaceForDataset($dataset);
if($self->_visibleDataset($dataset)) #### its already a visible dataset, so just append to this
{
$actualDS = $dataset;
}
else ## decide which visible dataset this att/filter should go to, need to use links
{
foreach(keys %$vDataset)
{
if ($self->_getActualDS($links, $dataset, $_) == 1)
{
$actualDS = $_;
}
}
}
return $actualDS;
}
=head2 _getActualDS
Usage : internal function, called by getactualDS
Description: to find the actual visible DS to shown in XML
Returntype : true/false
Exceptions : none
Caller : caller
=cut
sub _getActualDS
{
my ($self,$links, $dataset, $targetVisibleDS) = @_;
if($links->{$dataset})
{
if($links->{$dataset} eq $targetVisibleDS)
{
return 1;
}
else
{
if($self->_visibleDataset($links->{$dataset})) ## if we have reached another visible targetdataset.
{
return 0;
}
else
{
$self->_getActualDS($links, $links->{$dataset}, $targetVisibleDS);
}
}
}
else
{
return 0;
}
}
sub getActualDS_reverseLinks
{
my ($self, $dataset, $vDataset) = @_;
my $temp = $self->get('links');
my $links;
my $allLinks = $self->get('links');
foreach my $link (@$allLinks)
{
$links->{$link->sourceDataset()} = $link->targetDataset();
}
if(exists $links->{$dataset})
{
foreach my $dsName (keys %$vDataset)
{
if ($dsName eq $links->{$dataset})
{
return $dsName;
}
}
}
}
=head2 _toXML_latest
Usage : internal function, called by toXML
Description: for new xml query
Returntype : BioMart::Query
Exceptions : none
Caller : caller
=cut
sub _toXML_latest
{
my ($self,$limit_start,$limit_size,$count) = @_;
# Dumps query object into an xml string
my $registry = $self->getRegistry;
$limit_size ||= q{};
$limit_start ||= q{};
$count ||= q{};
my $datasetBlock_open = 0;
my $softwareVersion = $self->get('softwareVersion');
my $xml = qq|
|;
my $datasets = $self->getDatasetNames;
## open dataset tags for visible datasets only first and then
## append the filters and atts to the block they belong to.
## the is done on the naming convention which biomart follows
## datasetName_content_type
my $visibleDSCount=0;
my $actualDS;
my %vDataset;
foreach my $dataset(@$datasets)
{
my $interface = $self->getInterfaceForDataset($dataset);
if($self->_visibleDataset($dataset)) ## only for visible datasets,
{
$vDataset{$dataset} = qq |
|;
$visibleDSCount++;
}
}
## Filters
my $filts = $self->getAllFilters();
foreach my $filter (@$filts)
{
$actualDS = $self->getActualDS($filter->dataSetName, \%vDataset);
# e.g if filter is from gnf_xxx or evoc_xxx datasets which are generic for all but
# used for human so far. the only way to assign such filters to the corresponding dataset
# is to find an the representative visible dataset in reverse order in LINKS Target and source pairs
if(!$actualDS)
{
$actualDS = $self->getActualDS_reverseLinks($filter->dataSetName, \%vDataset);
}
if ($filter->isa("BioMart::Configuration::ValueFilter")
|| $filter->isa("BioMart::Configuration::FilterList_List"))
{
my @values;
my @rows;
my $atable = $filter->getTable;
while (my $row = $atable->nextRow) {
push @rows,$row;
foreach my $col (@$row) {
push @values,$col;
}
}
# need to regenerate AttributeTable cols for subsequent calls
$atable->addRows(\@rows);
my $value = join(',',@values);
$vDataset{$actualDS} .= qq |
|;
}
elsif ($filter->isa("BioMart::Configuration::FilterList"))
{
my @values;
my $filts = $filter->get('filters');
my @filters = @$filts;
my $attribute_table = $filter->get('attribute_table');
my $rows_avail = $attribute_table->hasMoreRows();
my $value;
# deal with non-batching invisible datasets for webservice
# need to keep reusing the same values for the filterlist
if (!$rows_avail)
{
if (!$filter->batching || $filter->batching != 1)
{
my $oldFilterListValues = $self->get('oldFilterListValues');
$value = $oldFilterListValues->{$filter->name};
}
}
else
{
while ($rows_avail && $filter->_inBatch($attribute_table)) {
my $row = $attribute_table->nextRow();
my $val = '';
my $separator = '';
foreach my $col (@$row) {
$val = $val.$separator.$col;
$separator = '|';
}
push @values,$val;
}
$value = join(',',@values);
}
# needed for correct batching behaviour
$filter->set('exhausted', 1) unless ($rows_avail);
my $oldFilterListValues = $self->get('oldFilterListValues');
$oldFilterListValues->{$filter->name} = $value;
$self->set('oldFilterListValues',$oldFilterListValues);
unless( defined $value) {$value="";}
$vDataset{$actualDS} .= qq |
|;
}
elsif ($filter->isa("BioMart::Configuration::BooleanFilter")) {
$vDataset{$actualDS} .= qq |
|;
}
}
## Attributes and AttributeLists
my $attsAndAttLists = $self->get('attsAndAttListsForXMLDisplay'); ## this hash is populated in addAttribute() and _populateFromXML() only
foreach my $attribute (@$attsAndAttLists)
{
foreach my $attName (keys %$attribute) {
$actualDS = $self->getActualDS($attribute->{$attName}, \%vDataset);
$vDataset{$actualDS} .= qq |
|;
}
}
my $ds;
foreach (keys %vDataset)
{
$vDataset{$_} .= qq |
|;
$ds=$vDataset{$_};
}
# so it does not forget to stick dataset for counts
if ($count eq '1') { $xml .= qq |$ds|}
# ------ Determine correct order of datasets in the query without calling QueryRunner
# ------ using getAllAttributes to find corresponding datasets and then ascertain
# ------ which dataset comes first in XML representation
my $allAtts = $self->getAllAttributes();
foreach (@{$allAtts})
{
if ($vDataset{$_->dataSetName})
{
$xml .= qq |
$vDataset{$_->dataSetName}|;
delete $vDataset{$_->dataSetName}; # so this never added twice
}
# may be its a query with only structure or GS atts. forexample peptide, transcript_id query
# you will only see invisible datasets
if (!$vDataset{$_->dataSetName})
{
my $temp_actualDS = $self->getActualDS($_->dataSetName,\%vDataset);
if ($temp_actualDS && $vDataset{$temp_actualDS})
{
$xml .= qq |
$vDataset{$temp_actualDS}|;
delete $vDataset{$temp_actualDS}; # so this never added twice
}
}
}
# ----------------------------------------------------------------------------------
$xml .= qq|
|;
return $xml;
}
=head2 _toXML_old
Usage : internal function, called by toXML
Description: for old style xml query, still used by dicty and wormbase
Returntype : BioMart::Query
Exceptions : none
Caller : caller
=cut
sub _toXML_old
{
my ($self,$limit_start,$limit_size,$count) = @_;
# Dumps query object into an xml string
$limit_size ||= q{};
$limit_start ||= q{};
$count ||= q{};
my $softwareVersion = $self->get('softwareVersion');
my $xml = qq|
|;
my $datasets = $self->getDatasetNames;
foreach my $dataset(@$datasets)
{
my $interface = $self->getInterfaceForDataset($dataset);
$xml .= qq |
|;
my $atts = $self->getAllAttributeLists($dataset);
foreach my $attribute_list (@$atts)
{
my $attributeString = $attribute_list->attributeString;
my @attributeNames = split(/,/,$attributeString);
foreach my $attributeName (@attributeNames){
$xml .= qq |
|;
}
}
$atts = $self->getAllAttributes($dataset);
foreach my $attribute (@$atts)
{
$xml .= qq |
|;
}
my $filts = $self->getAllFilters($dataset);
foreach my $filter (@$filts)
{
if ($filter->isa("BioMart::Configuration::ValueFilter")
|| $filter->isa("BioMart::Configuration::FilterList_List"))
{
my @values;
my @rows;
my $atable = $filter->getTable;
while (my $row = $atable->nextRow)
{
push @rows,$row;
foreach my $col (@$row)
{
push @values,$col;
}
}
# need to regenerate AttributeTable cols for subsequent calls
$atable->addRows(\@rows);
my $value = join(',',@values);
$xml .= qq |
|;
}
elsif ($filter->isa("BioMart::Configuration::FilterList"))
{
my @values;
my $filts = $filter->get('filters');
my @filters = @$filts;
my $attribute_table = $filter->get('attribute_table');
my $rows_avail = $attribute_table->hasMoreRows();
my $value;
# deal with non-batching invisible datasets for webservice
# need to keep reusing the same values for the filterlist
if (!$rows_avail)
{
if (!$filter->batching || $filter->batching != 1)
{
my $oldFilterListValues = $self->get('oldFilterListValues');
$value = $oldFilterListValues->{$filter->name};
}
}
else
{
while ($rows_avail && $filter->_inBatch($attribute_table))
{
my $row = $attribute_table->nextRow();
my $val = '';
my $separator = '';
foreach my $col (@$row)
{
$val = $val.$separator.$col;
$separator = '|';
}
push @values,$val;
}
$value = join(',',@values);
}
# needed for correct batching behaviour
$filter->set('exhausted', 1) unless ($rows_avail);
my $oldFilterListValues = $self->get('oldFilterListValues');
$oldFilterListValues->{$filter->name} = $value;
$self->set('oldFilterListValues',$oldFilterListValues);
# removing batching from second dataset onwards/invisible datasets
$xml =~ s/limitStart.*?limitSize\s*=\s*\"\d*\"//g;
unless( defined $value) {$value="";}
$xml .= qq |
|;
}
elsif ($filter->isa("BioMart::Configuration::BooleanFilter"))
{
$xml .= qq |
|;
}
}
$xml .= qq |
|;
}
my $links = $self->get('links');
foreach my $link (@$links)
{
$xml .= qq |
|;
}
$xml .= qq|
|;
return $xml;
}
=head2 validate
Usage : $query->validate;
Description: validates a populated Query object.
Returntype : none
Exceptions : BioMart::Exception::Query if any of validation steps fail
Caller : caller
=cut
sub validate {
my $self = shift;
my $registry = $self->getRegistry;
my $visibleDataset = 0;
my $datasetNames = $self->getDatasetNames;
if(!$datasetNames)
{
BioMart::Exception::Usage->throw('Problem: No dataset names in the Query');
}
# validate if there are any attributes in query as yet, only fires exception if results are requested. Shouldnt moan if count is requested
my $queryAttsExist = 0;
foreach my $datasetName(@$datasetNames){
my $atts = $self->getAllAttributes($datasetName);
if($atts) {$queryAttsExist = 1; last;}
}
if (!$queryAttsExist && $self->count()==0){
BioMart::Exception::Usage->throw('No attributes selected, please select at least one');
}
foreach my $datasetName(@$datasetNames){
my $queryAtts = $self->getAllAttributes($datasetName);
# validate the configuration trees
my $maxGroupSelect;
my $maxCollectionSelect;
my $groupSelect;
my $collectionSelect;
my $prevCollection;
my $apageSelected;
my $dataset = $registry->getDatasetByName($self->virtualSchema,
$datasetName);
$visibleDataset++ if ($dataset->visible());
BioMart::Exception::Usage->throw('Only two visible datasets allowed in query') if ($visibleDataset > 2);
my $confTree = $dataset->getConfigurationTree(
$self->getInterfaceForDataset($datasetName));
# do apage validation first - should be possible to get all query atts
# from a single page
my ($failed,$apage);
PAGE:foreach my $currentPage (@{$confTree->getAllAttributeTrees}){
$failed = 0;
ATT:foreach my $attribute (@$queryAtts){
if (!$currentPage->getAttributeByName($attribute->name)){
# check the attribute is not present in a hideDispay page instead
foreach my $hiddenPage (@{$confTree->getAllAttributeTrees}){
next if ($hiddenPage->name eq $currentPage->name ||
!$hiddenPage->hideDisplay ||
$hiddenPage->hideDisplay ne 'true');
next ATT if ($hiddenPage->getAttributeByName(
$attribute->name));
}
$failed = 1;
next PAGE;
}
}
$apage = $currentPage;
last;
}
BioMart::Exception::Usage->throw('Attributes from multiple attribute pages are not allowed') if ($failed);
#------------- this section deals with max number of groups selected in a query. eg. ensembl - homologs tree
if($apage->maxSelect())
{
my $maxPageSelect = $apage->maxSelect();
my $pageSelectCount = 0;
my $groupFlag;
foreach my $attgroup (@{$apage->getAllAttributeGroups}){
$groupFlag = 0;
foreach my $acollection(@{$attgroup->getAllCollections}){
foreach my $att( @{$acollection->getAllAttributes} ){
foreach my $attribute (@$queryAtts){
if ($attribute->name eq $att->name){
$groupFlag = 1; ## group marked for presence in query
}
}
}
}
if ($groupFlag == 1)
{
$pageSelectCount++;
}
}
if($pageSelectCount > $maxPageSelect)
{
BioMart::Exception::Usage->throw('Too many groups select for '.$apage->name().' Max allowed : '.$maxPageSelect);
}
}
#-----------------------------------------------------------
foreach my $agroup (@{$apage->getAllAttributeGroups}){
$maxGroupSelect = $agroup->maxSelect || 0;
$groupSelect = 0;
foreach my $acollection(@{$agroup->getAllCollections}){
$maxCollectionSelect = $acollection->maxSelect || 0;
$collectionSelect = 0;
$prevCollection = 0;
foreach my $att( @{$acollection->getAllAttributes} ){
foreach my $attribute (@$queryAtts){
if ($attribute->name eq $att->name){
# if ($apageSelected &&
# $apageSelected ne $apage->name){
# # throw Exception unless internal
# # placeholders are responsible
#
# BioMart::Exception::Usage->throw('Attributes from multiple attribute pages are not allowed - '.$att->name)
# if !($confTree->
# getAttributeTreeByName($apageSelected)
# ->getAttributeByName($att->name));
# next;
# }
# $apageSelected = $apage->name;
$groupSelect++ if (!$prevCollection);
$prevCollection++;
$collectionSelect++;
if ($maxGroupSelect &&
$groupSelect > $maxGroupSelect) {
BioMart::Exception::Usage->throw('Too many attributes selected for '.$agroup->displayName);
}
if ($maxCollectionSelect &&
$collectionSelect > $maxCollectionSelect) {
BioMart::Exception::Usage->throw('Too many attributes selected for '.$acollection->displayName);
}
}
}
}
}
}
# }
# validate the filters added to the query
my $filts = $self->getAllFilters($datasetName);
foreach my $filter (@$filts){
if ($filter->isa("BioMart::Configuration::ValueFilter")){
# validate valueFilter
my $regexp = $filter->regexp();
if ($regexp){
my $attribute_table = $filter->getTable;
my $new_attribute_table = BioMart::AttributeTable->new();
while (my $row = $attribute_table->nextRow()){
$new_attribute_table->addRow($row);
if ($$row[0] !~ /$regexp/) {
BioMart::Exception::Usage->throw('Wrong format value for '.$filter->displayName);
}
}
$filter->setTable($new_attribute_table);
}
}
}
}
}
=head2 virtualSchema
Usage : my $vSchema = $query->virtualSchema;
$query->virtualSchema($newSchema);
Description: gets/sets the virtualSchema for the query.
Defaults to 'defaultQuery'
Returntype : string virtualSchema
Exceptions : none
Caller : caller
=cut
sub virtualSchema {
my ($self, $vSchema) = @_;
if ($vSchema) {
$self->set('virtualSchema', $vSchema);
}
return $self->get('virtualSchema');
}
=head2 getRegistry
Usage : my $registry = $query->getRegistry;
Description: Returns a Registry object.
Returntype : Registry object.
Exceptions : none
Caller : caller
=cut
sub getRegistry {
my $self = shift;
return $self->get('registry');
}
=head2 getInterfaceForDataset
Usage : my $interface = $query->getInterfaceForDataset($dataset);
Description: Returns the interface name used for the supplied dataset
in this query.
Returntype : string interface name.
Exceptions : none
Caller : caller
=cut
sub getInterfaceForDataset {
my ($self,$dataset) = @_;
my $datasetHash = $self->get('dataset_names');
return $datasetHash->{$dataset};
}
=head2 getDatasetNames
Usage : my $dataSets = $query->getDatasetNames;
foreach my $subName (@{$dataSets}) { ... }
Description: Returns a list_ref of names for all Datasets required to
resolve this Query.
Returntype : list_ref of scalar dataSet names.
Exceptions : none
Caller : caller
=cut
sub getDatasetNames {
my $self = shift;
my @datasetNames = keys %{$self->get('dataset_names')};
return \@datasetNames;
}
=head2 getOrderedDatasetNames
Usage : my $dataSets = $query->getOrderedDatasetNames;
foreach my $subName (@{$dataSets}) { ... }
Description: Returns an ordered list_ref of names for all Datasets required to
resolve this Query.
Returntype : list_ref of scalar dataSet names.
Exceptions : none
Caller : caller
=cut
sub getOrderedDatasetNames {
my $self = shift;
my $datasets = $self->get('ordered_dataset_names');
return @$datasets ? $datasets:undef;
}
=head2 getAllAttributes
Usage : get all attributes, involving all datasets:
my $atts = $query->getAllAttributes;
get only those attributes involving a specific dataset:
my $atts = $query->getAllAttributes($subName);
Description: Returns all BioMart::Configuration::Attribute objects from a
Query, across all involved Datasets, or just for a particular
Dataset. BioMart::Configuration::Attribute objects contained
in any BioMart::Configuration::AttributeList objects added to
the Query will not be returned. These should be retrieved
from the Query using its getAllAttributeLists method.
Returntype : list_ref of BioMart::Configuration::Attribute objects
Exceptions : none
Caller : caller
=cut
sub getAllAttributes{
my ($self,$dataset_name) = @_;
my $attributes = $self->get('attributes');
if (!$dataset_name){
return @$attributes ? $attributes:undef;
}
my $specific_atts = [];
foreach my $attribute (@$attributes){
if ($attribute->dataSetName() eq $dataset_name){
push @{$specific_atts}, $attribute;
}
}
return @$specific_atts ? $specific_atts:undef;
}
sub removeAllAttributes{
my $self = shift;
$self->set('attributes',[]);
}
sub removeAllFilters{
my $self = shift;
$self->set('filters',[]);
}
=head2 getAllAttributeLists
Usage : get all attributeLists, involving all datasets:
my $attLists = $query->getAllAttributeLists;
get only those attributeLists involving a specific dataset:
my $attLists = $query->getAllAttributeLists($subName);
Description: Returns all BioMart::Configuration::AttributeList objects
from a Query, across all involved Datasets, or just for a
particular Dataset.
Returntype : list_ref of BioMart::Configuration::AttributeList objects
Exceptions : none
Caller : caller
=cut
sub getAllAttributeLists{
my ($self,$dataset_name) = @_;
my $attributeLists = $self->get('attribute_lists');
if (!$dataset_name){
return $attributeLists;
}
my $specific_attLists = [];
foreach my $attributeList (@$attributeLists){
if ($attributeList->dataSetName() eq $dataset_name){
push @{$specific_attLists}, $attributeList;
}
}
return $specific_attLists;
}
=head2 getAttributeListByLinkName
Usage : my $exportable = $query->getAttributeListByLinkName;
Description : Get an Exportable AttributeList for a given LinkName.
This can be used to get Fields from a ResultTable
based on the name of the Attributes in the AttributeList,
and their order. Mainly for BioMart::DatasetI implementations
needing to merge data from the exportable with its own data,
Returntype : BioMart::Configuration::AttributeList (may be undef)
Exceptions : none
Caller : BioMart::DatasetI.
=cut
sub getAttributeListByLinkName {
my ($self, $linkName) = @_;
my $ret;
my $attributeLists = $self->get('attribute_lists');
foreach my $attributeList (@{$attributeLists}){
if ($attributeList->linkName() eq $linkName){
$ret = $attributeList;
last;
}
}
return $ret;
}
=head2 getAttributeListByName
Usage : my $exportable = $query->getAttributeListByName;
Description : Get an Exportable AttributeList for a given Name.
Returntype : BioMart::Configuration::AttributeList (may be undef)
Exceptions : none
Caller : BioMart::DatasetI.
=cut
sub getAttributeListByName {
my ($self, $name) = @_;
my $ret;
my $attributeLists = $self->get('attribute_lists');
foreach my $attributeList (@{$attributeLists}){
if ($attributeList->name() eq $name){
$ret = $attributeList;
last;
}
}
return $ret;
}
=head2 getAllFilters
Usage : get all filters, involving all subsytems:
my $filts = $query->getAllFilters;
get only those filters involving a specific dataset:
my $filts = $query->getAllFilters($subName);
Description: Returns all BioMart::Configuration::BaseFilter implementing
objects from a Query, across all involved Datasets, or just
for a particular Dataset. This list will include any FilterList
objects added to the Query as Importables.
Returntype : list_ref of BioMart::Configuration::BaseFilter implementing
objects.
Exceptions : none
Caller : caller
=cut
sub getAllFilters{
my ($self,$dataset_name) = @_;
my $filters = $self->get('filters');
if (!$dataset_name){
return @$filters ? $filters:undef;
}
my $specific_filts = [];
foreach my $filter (@$filters){
if ($filter->dataSetName() eq $dataset_name){
push @{$specific_filts}, $filter;
}
}
return @$specific_filts ? $specific_filts:undef;
}
=head2 getAllPlaceholderFilters
Usage : get only those placeholder filters involving a specific dataset:
my $filts = $query->getAllPlaceholderFilters($subName);
Description: Returns all BioMart::Configuration::BaseFilter implementing
objects from a Query, for a particular Dataset that were
implemented as placeholders.
Returntype : list_ref of BioMart::Configuration::BaseFilter implementing
objects.
Exceptions : none
Caller : caller
=cut
sub getAllPlaceholderFilters{
my ($self,$dataset_name) = @_;
my $filters = $self->get('filters');
my $specific_filts = [];
foreach my $filter (@$filters){
if ($filter->pointedFromDataset() && $filter->pointedFromDataset
eq $dataset_name){
push @{$specific_filts}, $filter;
}
}
return $specific_filts;
}
=head2 getLinks
Usage : my $links = $query->getLinks($sourceDataset,$targetDataset)
Description: Finds the link (if any) set on the Query object between the
source and target datasets and returns it
Returntype : returntype
Exceptions : none
Caller : caller
=cut
sub getLinks{
my ($self,$sourceDataset,$targetDataset) = @_;# only use in 1 direction
my $links = $self->get('links');
foreach my $link (@$links){
if (($link->sourceDataset() eq $sourceDataset
&& $link->targetDataset() eq $targetDataset)){
return $link;
}
}
}
=head2 setDataset
Usage : $query->setDataset
Description: sets the dataset name for adding attributes and filters
Returntype : none
Exceptions : none
Caller : query object itself
=cut
sub setDataset
{
my ($self, $dataset) = @_;
if ($dataset) {
$self->set('currentDS', $dataset);
}
return $self->get('currentDS');
}
=head2 addLinks
Usage : $query->addLinks($link, $sourceInterface, $targetInterface);
Description: Adds a BioMart::Links object to the query
Returntype : none
Exceptions : none
Caller : caller
=cut
sub addLinks{
# adds links object
my ($self, $link, $sourceInterface, $targetInterface) = @_;
my $links = $self->get('links');
push @{$links}, $link;
$self->set('links', $links);
$self->addDatasetName($link->sourceDataset, $sourceInterface);
$self->addDatasetName($link->targetDataset, $targetInterface);
}
=head2 addAttribute
Usage : $query->addAttribute($attribute_name,
$interface);
Description: Adds a BioMart::Configuration::Attribute object to the Query,
first recovering it from the registry by name
Returntype : none
Exceptions : none
Caller : caller
=cut
sub addAttribute{
my ($self, $attributename, $interface) = @_;
my $schema_name = $self->virtualSchema() ||'default';
my $dataset_name = $self->get('currentDS');
$interface ||= 'default';
my $registry = $self->get('registry');
#my $attribute = $registry->getAttribute($dataset_name, $attributename,
# $schema_name, $interface);
my ($attribute, $softwareVersion) = $registry->getAttribute($dataset_name, $attributename,
$schema_name, $interface);
$self->set('softwareVersion', $softwareVersion);
## its an attribute list, so need to store some information about this attributeList
## which would help us recover the name of AttributeList in to_XML_latest as
## we donot want to display the names of individual attributes there
## The same logic goes into _populateFromXML
my $tempArray = $self->get('attsAndAttListsForXMLDisplay');
my $tempHash;
$tempHash->{$attribute->name} = $attribute->dataSetName;
push @{$tempArray}, $tempHash;
$self->set('attsAndAttListsForXMLDisplay', $tempArray);
if (UNIVERSAL::can($attribute,'getAllAttributes')) {
my @attributes = @{$attribute->getAllAttributes};
foreach my $attr (@attributes) {
$self->_addAttribute($attr);
}
}
else {
$self->_addAttribute($attribute);
}
}
=head2 addAttributeFilter
Usage : $query->addAttributeFilter($attribute_name,
$values,
$interface);
Description: Adds a BioMart::Configuration::Filter object to the Query,
first recovering it from the Attribute registry by name
Returntype : none
Exceptions : none
Caller : caller
=cut
sub addAttributeFilter{
my ($self, $attributename, $values, $interface) = @_;
my $schema_name = $self->virtualSchema() ||'default';
my $dataset_name = $self->get('currentDS');
$interface ||= 'default';
my $registry = $self->get('registry');
#my $attribute = $registry->getAttribute($dataset_name, $attributename,
# $schema_name, $interface);
my ($attribute, $softwareVersion) = $registry->getAttribute($dataset_name, $attributename,
$schema_name, $interface);
$self->set('softwareVersion', $softwareVersion);
if(!defined($values)) {
BioMart::Exception::Query->throw("Value not defined for getSetFilter");
}
my $atbl = BioMart::AttributeTable->new();
my $value_filter;
foreach my $value(@{$values}){
if ($value =~ /Only|Excluded/i){
if ($value =~ /Excluded/i)
{
$attribute->setExcluded(1);
}
if ($value =~ /Only/i)
{
$attribute->setExcluded(0);
}
last;
}
else{
$value_filter++;
$atbl->addRow([ $value ]);
}
}
if ($value_filter){
$attribute->setTable($atbl);
}
$self->_addFilter($attribute);
}
=head2 addFilter
Usage : $query->addFilter($filter_name,
$values,
$interface);
Description: Adds a BioMart::Configuration::BaseFilter implementing object
to the Query,first recovering it from the registry by name and
adding the values in the $values arrayref
Returntype : none
Exceptions : none
Caller : caller
=cut
sub addFilter{
my ($self, $filtername, $values, $interface) = @_;
my $schema_name = $self->virtualSchema() ||'default';
my $dataset_name = $self->get('currentDS');
$interface ||= 'default';
my $registry = $self->get('registry');
#my $filter = $registry->getFilter($dataset_name, $filtername,
# $schema_name, $interface);
my ($filter, $softwareVersion) = $registry->getFilter($dataset_name, $filtername,
$schema_name, $interface);
$self->set('softwareVersion', $softwareVersion);
if(!defined($values)) {
BioMart::Exception::Query->throw("Value not defined for getSetFilter");
}
my $atbl = BioMart::AttributeTable->new();
my $value_filter;
foreach my $value(@{$values}){
if ($value =~ /Only|Excluded/i){
if ($value =~ /Excluded/i)
{
$filter->setExcluded(1);
}
if ($value =~ /Only/i)
{
$filter->setExcluded(0);
}
last;
}
else{
$value_filter++;
$atbl->addRow([ $value ]);
}
}
if ($value_filter){
$filter->setTable($atbl);
}
$self->_addFilter($filter);
}
=head2 addAttributeWithoutLinking
Usage : $query->addAttributeWithoutLinking($att);
Description: Adds a BioMart::Configuration::Attribute object to the Query,
maintaining the order of addition. Unlike the _addAttribute
method links for placeholder attributes are not automatically
created. This is necessary for the single dataset subqueries
created within QueryRunner
Returntype : none
Exceptions : none
Caller : caller
=cut
sub addAttributeWithoutLinking{
my ($self, $attribute) = @_;
BioMart::Exception::Query->throw("Tried to add nonexistent attribute to query") if (!defined($attribute));
my $atts = $self->get('attributes');
push @{$atts}, $attribute;
$self->set('attributes', $atts);
$self->addDatasetName($attribute->dataSetName,$attribute->interface);
}
=head2 _addAttribute
Usage : $query->_addAttribute($att);
Description: Adds a BioMart::Configuration::Attribute object to the Query,
maintaining the order of addition.
Returntype : none
Exceptions : none
Caller : caller
=cut
sub _addAttribute{
my ($self, $attribute) = @_;
# Check it really is an attribute, not a filter. If it is a filter, ignore it.
if (UNIVERSAL::can($attribute,'displayType')) { return; }
BioMart::Exception::Query->throw("Tried to add nonexistent attribute to query") if (!defined($attribute));
my $atts = $self->get('attributes');
push @{$atts}, $attribute;
$self->set('attributes', $atts);
$self->addDatasetName($attribute->dataSetName,$attribute->interface);
# add the link if a placeholder attribute
if ($attribute->pointedFromDataset && $attribute->pointedFromInterface){
my @path = $self->getRegistry()->getPath($self->virtualSchema,
$attribute->pointedFromDataset,
$attribute->dataSetName);
foreach (my $j = 1; $j < @path; $j++){
my $link = $self->getRegistry()->getLinkBetween($self->virtualSchema,
$path[$j-1],
$path[$j]);
my $attributeLink = $attribute->datasetLink;
if ($attributeLink && ($path[$j] eq $attribute->dataSetName)){
$link->defaultLink($attributeLink);
}
next if (!$link->validateLink($self->virtualSchema,
$attribute->pointedFromInterface,
$attribute->interface,
$link->defaultLink));
$self->addLinks($link, $attribute->pointedFromInterface,
$attribute->interface);
}
}
}
=head2 addAttributes
Usage : $query->addAttributes($atts);
Description: Adds a listref of BioMart::Configuration::Attribute objects to
the Query
Returntype : none
Exceptions : none
Caller : caller
=cut
sub addAttributes{
my ($self, $attributes) = @_;
foreach my $attribute (@$attributes){
$self->_addAttribute($attribute);
}
}
=head2 addAttributeListFirst
Usage : $query->addAttributeListFirst($attList);
Description: Adds a BioMart::Configuration::AttributeList object to the
Query, as the first one - useful for maintaining correct order
in ResultTable.
Returntype : none
Exceptions : none
Caller : caller
=cut
sub addAttributeListFirst{
my ($self, $alist) = @_;
my $aLists = $self->get('attribute_lists');
unshift @{$aLists}, $alist;
$self->set('attribute_lists', $aLists);
$self->addDatasetName($alist->dataSetName,$alist->interface);
}
=head2 addAttributeList
Usage : $query->addAttributeList($attList);
Description: Adds a BioMart::Configuration::AttributeList object to the
Query,
maintaining the order of addition.
Returntype : none
Exceptions : none
Caller : caller
=cut
sub addAttributeList{
my ($self, $alist) = @_;
my $aLists = $self->get('attribute_lists');
push @{$aLists}, $alist;
$self->set('attribute_lists', $aLists);
$self->addDatasetName($alist->dataSetName,$alist->interface);
}
=head2 addFilterWithoutLinking
Usage : $query->addFilterWithoutLinking($filt);
Description: Adds a BioMart::Configuration::BaseFilter implementing object
to the Query.
Returntype : none
Exceptions : none
Caller : caller
=cut
sub addFilterWithoutLinking{
my ($self, $filter) = @_;
BioMart::Exception::Query->throw("Tried to add nonexistent filter to query")
if (!defined($filter));
my $filts = $self->get('filters');
my $registry = $self->getRegistry();
my $virtualSchema = $self->virtualSchema;
push @{$filts}, $filter;
$self->set('filters', $filts);
$self->addDatasetName($filter->dataSetName,$filter->interface);
}
=head2 _addFilter
Usage : $query->_addFilter($filt);
Description: Adds a BioMart::Configuration::BaseFilter implementing object
to the Query.
Returntype : none
Exceptions : none
Caller : caller
=cut
sub _addFilter{
my ($self, $filter) = @_;
BioMart::Exception::Query->throw("Tried to add nonexistent filter to query")
if (!defined($filter));
my $filts = $self->get('filters');
my $registry = $self->getRegistry();
my $virtualSchema = $self->virtualSchema;
push @{$filts}, $filter;
$self->set('filters', $filts);
$self->addDatasetName($filter->dataSetName,$filter->interface);
# add the link if a placeholder filter
if ($filter->pointedFromDataset && $filter->pointedFromInterface){
my @path = $registry->getPath($virtualSchema,
$filter->dataSetName,
$filter->pointedFromDataset);
# if no path try other way round incase placeholder filter
# on attribute page
if (@path == 1){
@path = $registry->getPath($virtualSchema,
$filter->pointedFromDataset,
$filter->dataSetName);
}
foreach (my $j = 1; $j < @path; $j++){
my $link = $registry->getLinkBetween($self->virtualSchema,
$path[$j-1],
$path[$j]);
next if (!$link->validateLink($virtualSchema,
$filter->interface,
$filter->pointedFromInterface,
$link->defaultLink));
$self->addLinks($link, $filter->interface,
$filter->pointedFromInterface);
}
}
}
sub finalProcess {
my $self = shift;
# add the otherFilters
my $filters = $self->getAllFilters();
my $registry = $self->getRegistry();
my $virtualSchema = $self->virtualSchema;
foreach my $filter(@$filters){
# if filter is meant to set otherFilters handle it here
# PROBLEMS - 1 - uses old style placeholder format
# - 2 - interface for placeholder not defined - HACKED FOR NOW
if ($filter->isa("BioMart::Configuration::ValueFilter")
&& $filter->otherFilters()){
my $otherFilters = $filter->otherFilters;
my @otherFilts = split(/;/,$otherFilters);
foreach (@otherFilts){
my @names = split(/\./,$_);
my $otherDataset = $registry->getDatasetByName($virtualSchema,
$names[0]);
next if (!$otherDataset);
# should only add to query if otherDataset already
# involved in query
my $datasetHash = $self->get('dataset_names');
next if (!$datasetHash->{$names[0]});
my $otherFilter = $otherDataset->getConfigurationTree(
$filter->interface)->getFilterByName($names[1]);
next if (!$otherFilter);
my $att_table = BioMart::AttributeTable->new();
my $rows = $filter->getTable()->getRows();
$att_table->addRows($rows);
next if ($otherFilter->
isa("BioMart::Configuration::BooleanFilter"));
$otherFilter->setTable($att_table);
my $dataSets = $self->getDatasetNames;
foreach my $subName (@{$dataSets}) {
if ($otherFilter->dataSetName eq $subName){
$self->_addFilter($otherFilter);
last;
}
}
}
}
}
# create a link between the second visible dataset and the first one
# if one does not exist. uses attribute and/or filter order to find 1st
# dataset added and 2nd and links 2nd->1st
my ($sourceDataset,$targetDataset);
# removed the dataset order switching code - order should be that set on query->{'dataset_names'}
# driven by the api,webservices or interface code
# my $attributes = $self->getAllAttributes();
# foreach my $attribute (@$attributes){
# my $datasetName = $attribute->pointedFromDataset()
# || $attribute->dataSetName();
# my $dataset = $registry->getDatasetByName($virtualSchema,$datasetName);
# next if (!$dataset->visible);
# if ($targetDataset && $datasetName ne $targetDataset){
# $sourceDataset = $datasetName;
# }
# else{
# $targetDataset = $datasetName;
# }
# last if ($sourceDataset && $targetDataset);
# }
# if (!($sourceDataset && $targetDataset)){
# my $filters = $self->getAllFilters();
# foreach my $filter (@$filters){
# my $datasetName = $filter->pointedFromDataset()
# || $filter->dataSetName();
# my $dataset = $registry->getDatasetByName($virtualSchema,
# $datasetName);
# next if (!$dataset->visible);
# if ($targetDataset && $datasetName ne $targetDataset){
# $sourceDataset = $datasetName;
# }
# else{
# $targetDataset = $datasetName;
# }
# last if ($sourceDataset && $targetDataset);
# }
# }
#
# if (!($sourceDataset && $targetDataset)){
# # may have a two dataset query with one of them having no filts/atts
# $sourceDataset = '';
# $targetDataset = '';
my $datasets = $self->getOrderedDatasetNames;
foreach my $datasetName (reverse @{$datasets}) {
my $dataset = $registry->getDatasetByName($virtualSchema,
$datasetName);
next if (!$dataset->visible);
if (!$sourceDataset || $sourceDataset eq ''){
$sourceDataset = $datasetName;
}
else{
$targetDataset = $datasetName;
}
}
# }
# warn("NOW TRY TO ADD A LINK FROM $sourceDataset TO $targetDataset");
if ($sourceDataset && $targetDataset &&
!$self->getLinks($sourceDataset,$targetDataset) &&
!$self->getLinks($targetDataset,$sourceDataset)){
# if link not already defined between the first and second visible
# datasets on query object
my $link = $registry->getLinkBetween($virtualSchema,$sourceDataset,
$targetDataset);
my $sourceInterface = $self->getInterfaceForDataset($sourceDataset);
my $targetInterface = $self->getInterfaceForDataset($targetDataset);
next if (!$link->validateLink($virtualSchema,
$sourceInterface,
$targetInterface,
$link->defaultLink));
$self->addLinks($link, $sourceInterface,$targetInterface);
}
}
=head2 addFilters
Usage : $query->addFilters($filts);
Description: Adds a listref of BioMart::Configuration::Filter objects to
the Query
Returntype : none
Exceptions : none
Caller : caller
=cut
sub addFilters{
my ($self, $filters) = @_;
foreach my $filter (@$filters){
$self->_addFilter($filter);
}
}
=head2 orderBy
Usage : my $orderByAtts = $query->orderBy(); $query->orderBy($atts_ref);
Description: get/sets the order by attributes for this Query
Returntype : listref of order by BioMart::Attribute objects
Exceptions : none
Caller : caller
=cut
sub orderBy {
my ($self, $atts) = @_;
if ($atts) {
$self->set('orderby', $atts);
}
return $self->get('orderby');
}
=head2 header
Usage : my $header = $query->header(); $query->header($header);
Description: get/sets the current header settings on the Query
Returntype : scalar $header
Exceptions : none
Caller : caller
=cut
sub header {
my ($self, $header) = @_;
if (defined $header) {
$self->set('header', $header);
}
return $self->get('header');
}
=head2 count
Usage : my $count = $query->count(); $query->count($count);
Description: get/sets the current count on the Query
Returntype : scalar $count
Exceptions : none
Caller : caller
=cut
sub count {
my ($self, $count) = @_;
if (defined $count) {
$self->set('count', $count);
}
return $self->get('count');
}
=head2 completionStamp
Usage : my $completionStamp = $query->completionStamp(); $query->completionStamp($completionStamp);
Description: get/sets the completionStamp flag on the Query
Returntype : scalar $completionStamp
Exceptions : none
Caller : caller
=cut
sub completionStamp {
my ($self, $completionStamp) = @_;
if (defined $completionStamp) {
$self->set('completionStamp', $completionStamp);
}
return $self->get('completionStamp');
}
=head2 limitStart
Usage : my $limitStart = $query->limitStart();
$query->limitStart($limitStart);
Description: get/sets the limitStart for this Query
Returntype : scalar $limitStart
Exceptions : none
Caller : caller
=cut
sub limitStart {
my ($self, $limitStart) = @_;
if (defined($limitStart)) {
$self->set('limitStart', $limitStart);
}
return $self->get('limitStart');
}
=head2 limitSize
Usage : my $limitSize = $query->limitSize();
$query->limitSize($limitSize);
Description: get/sets the limitSize for this Query
Returntype : scalar $limitSize
Exceptions : none
Caller : caller
=cut
sub limitSize {
my ($self, $limitSize) = @_;
if (defined($limitSize)) {
$self->set('limitSize', $limitSize);
}
return $self->get('limitSize');
}
sub addDatasetName {
my ($self, $dataSetName, $interface) = @_;
my $dataSetNames = $self->get('dataset_names');
$dataSetNames->{$dataSetName} = $interface;
$self->set('dataset_names',$dataSetNames);
my $orderedDatasetNames = $self->get('ordered_dataset_names');
my $seen = 0;
foreach (@{$orderedDatasetNames}){
$seen = 1 if ($_ eq $dataSetName);
}
push @{$orderedDatasetNames}, $dataSetName if ($seen == 0);
$self->set('ordered_dataset_names',$orderedDatasetNames);
}
=head2 finalDatasetOrder
Usage : my $finalDatasetOrder = $query->finalDatasetOrder;
$query->finalDatasetOrder($finalDatasetOrder);
Description: get/set the FinalDatasetOrder decided by QueryRunner
for the Query.
Returntype : listref of names of the final dataset order.
Exceptions : none
Caller : caller
=cut
sub finalDatasetOrder {
my ($self, $finalDatasetOrder) = @_;
if ($finalDatasetOrder) {
$self->set('finalDatasetOrder', $finalDatasetOrder);
}
return $self->get('finalDatasetOrder');
}
=head2 formatter
Usage : my $formatterName = $query->formatter;
$query->formatter($formatterName);
Description: get/set the name of a BioMart::FormatterI implementing
object to render the ResultTable resulting for the Query.
Returntype : scalar $formatterName.
Exceptions : none
Caller : caller
=cut
sub formatter {
my ($self, $formatterName) = @_;
if ($formatterName) {
$self->set('formatter', $formatterName);
}
return $self->get('formatter');
}
sub _populateFromXML {
my ($self,$xml)=@_;
my $registry = $self->getRegistry;
my $config = XMLin($xml, forcearray=> [qw(Query Dataset Attribute
ValueFilter BooleanFilter
Filter Links)], keyattr => []);
# overrides default settings
my $virtualSchemaName = $config->{'virtualSchemaName'} || 'default';
$self->virtualSchema($virtualSchemaName);
$self->formatter($config->{'formatter'}) if ($config->{'formatter'});
$self->set('softwareVersion', $config->{'softwareVersion'});
$self->limitStart($config->{'limitStart'});
$self->limitSize($config->{'limitSize'});
if ($config->{'count'} && $config->{'count'} > 1)
{ BioMart::Exception::Usage->throw ("INVALID COUNT VALUE");
}
$self->count($config->{'count'});
if ($config->{'header'} && $config->{'header'} ne '1')
{ BioMart::Exception::Usage->throw ("INVALID HEADER VALUE");
}
$self->header($config->{'header'});
$self->completionStamp($config->{'completionStamp'});
my ($sourceDataset, $sourceInterface, $targetDataset, $targetInterface);
foreach my $dataset (@{$config->{'Dataset'}}) {
my $interface = $dataset->{'interface'} || 'default';
if (!$targetDataset){
$targetDataset = $dataset->{'name'};
$targetInterface = $interface;
}
elsif (!$sourceDataset){
$sourceDataset = $dataset->{'name'};
$sourceInterface = $interface;
}
# incase no filts or atts set
$self->addDatasetName($dataset->{'name'},$interface);
my $datasetObj = $registry->getDatasetByName($virtualSchemaName,
$dataset->{'name'});
if (!$datasetObj){
BioMart::Exception::Usage->throw ("WITHIN Virtual Schema : $virtualSchemaName, Dataset ".
$dataset->{'name'}." NOT FOUND");
}
my $confTree = $registry->getDatasetByName($virtualSchemaName,
$dataset->{'name'})->getConfigurationTree($interface);
if (!$confTree){
BioMart::Exception::Usage->throw ("Cannot find Configuration Tree for $virtualSchemaName.".$dataset->{'name'});
}
foreach my $attributeNode (@{$dataset->{'Attribute'}}){
my $attribute = $confTree->
getAttributeByName($attributeNode->{'name'});
if (!$attribute) {
BioMart::Exception::Usage->throw ("Attribute ".
$attributeNode->{'name'}." NOT FOUND");
}
else {
## its an attribute list, so need to store some information about this attributeList
## which would help us recover the name of AttributeList in to_XML_latest as
## we donot want to display the names of individual attributes there
## The same logic goes into getAttribute
my $tempArray = $self->get('attsAndAttListsForXMLDisplay');
my $tempHash;
$tempHash->{$attribute->name} = $attribute->dataSetName;
push @{$tempArray}, $tempHash;
$self->set('attsAndAttListsForXMLDisplay', $tempArray);
if (UNIVERSAL::can($attribute,'getAllAttributes')) {
my @attributes = @{$attribute->getAllAttributes};
foreach my $attr (@attributes) {
$self->_addAttribute($attr);
}
}
else {
$self->_addAttribute($attribute);
}
}
}
# reads Filter element
foreach my $filterNode (@{$dataset->{'Filter'}}){
if (defined $filterNode->{'excluded'}){
$self->_setBooleanFilter($confTree,$filterNode);
} elsif (defined $filterNode->{'value'}){
$self->_setValueFilter($confTree,$filterNode,
$virtualSchemaName,$dataset,$interface);
} else {
BioMart::Exception::Usage->throw ("Filter ".$filterNode->{'name'}." INVALID, FILTER NEEDS 'excluded' or 'value' attribute");
}
}
# Boolean and Value filter - to be phased out from xml
foreach my $filterNode (@{$dataset->{'BooleanFilter'}}){
$self->_setBooleanFilter($confTree,$filterNode);
}
foreach my $filterNode (@{$dataset->{'ValueFilter'}}){
$self->_setValueFilter($confTree,$filterNode,$virtualSchemaName,
$dataset,$interface);
}
}
# add links
foreach my $linkNode (@{$config->{'Links'}}) {
my $sourceInterface = $linkNode->{'sourceInterface'} || 'default';
my $targetInterface = $linkNode->{'targetInterface'} || 'default';
my $link = $registry->getLinkBetween($virtualSchemaName,
$linkNode->{'source'},
$linkNode->{'target'});
$link->defaultLink($linkNode->{'defaultLink'})
if ($linkNode->{'defaultLink'});
if (!$link || !$link->validateLink($virtualSchemaName,
$sourceInterface,
$targetInterface,
$link->defaultLink())) {
BioMart::Exception::Usage->throw("LINK FROM ".$linkNode->{'source'}." TO ".$linkNode->{'target'}." NOT FOUND");
}
$link->operation($linkNode->{'operation'})
if ($linkNode->{'operation'});
$self->addLinks($link,$sourceInterface,$targetInterface);
}
}
sub _setBooleanFilter {
my ($self,$confTree,$filterNode)=@_;
my $filter = $confTree->getFilterByName($filterNode->{'name'});
if (!$filter){
$filter = $confTree->getOptionByName($filterNode->{'name'})->filter;
}
if ($filterNode->{'excluded'} eq "1"){
$filter->setExcluded;
}
else{
$filter->setIncluded;
}
if (!$filter) {
BioMart::Exception::Usage->throw("Filter ".$filterNode->{'name'}." NOT FOUND");
}
else{
$self->_addFilter($filter);
}
}
sub _setValueFilter {
my ($self,$confTree,$filterNode,$virtualSchemaName,$dataset,$interface)=@_;
my $registry = $self->getRegistry;
my $filter = $confTree->getFilterByName($filterNode->{'name'});
if (!$filter){
my $option = $confTree->getOptionByName($filterNode->{'name'});
$filter = $option->filter if ($option);
}
if (!$filter){# must be a filterlist importable
$filter = $registry->getDatasetByName($virtualSchemaName,
$dataset->{'name'})->getImportables($filterNode->{'name'},
$interface);
}
if (!$filter) {
BioMart::Exception::Usage->throw("Filter ".$filterNode->{'name'}." NOT FOUND");
}
else{
my $atable = BioMart::AttributeTable->new();
my @values = split(",",$filterNode->{'value'});
if($filter->isa("BioMart::Configuration::FilterList")){
foreach my $val(@values){
my @data = split(/\|/,$val.'|end');
pop @data;# without adding this arbitary end element and
# removing it lose empty last elements
$atable->addRow(\@data);
}
}
else{
foreach (@values){
$atable->addRow([$_]);
}
}
$filter->setTable($atable);
$self->_addFilter($filter);
}
}
sub _hashCode {
my $self = shift;
my $digest = Digest::MD5->new;
my $attributes = $self->get('attributes');
foreach my $att (@{$attributes}) {
$digest->add($att->hashCode);
}
my $alists = $self->get('attribute_lists');
foreach my $alist (@{$alists}) {
$digest->add($alist->hashCode);
}
my $filters = $self->get('filters');
foreach my $filt (@{$filters}) {
$digest->add($filt->hashCode);
}
my $links = $self->get('links');
foreach my $link (@{$links}) {
$digest->add($link->hashCode);
}
return $digest->hexdigest;
}
sub _equals {
my ($self, $otherq) = @_;
return ($self->hashCode == $otherq->hashCode);
}
=head2 toPerl
Usage : my perlApiExample = $query->toPerl();
Description: display the PERL API Equivalent of Query
Returntype : string
Exceptions : none
Caller : Web.pm
=cut
sub toPerl {
my $self = shift;
my $xml = $self->toXML(1,1,1,1);
my $registry = $self->getRegistry;
my $perl_string;
$perl_string .= qq|
# An example script demonstrating the use of BioMart API.
# This perl API representation is only available for configuration versions >= 0.5
use strict;
use BioMart::Initializer;
use BioMart::Query;
use BioMart::QueryRunner;
my \$confFile = "PATH TO YOUR REGISTRY FILE UNDER biomart-perl/conf/. For Biomart Central Registry navigate to
http://www.biomart.org/biomart/martservice?type=registry";
#
# NB: change action to 'clean' if you wish to start a fresh configuration
# and to 'cached' if you want to skip configuration step on subsequent runs from the same registry
#
my \$action='cached';
my \$initializer = BioMart::Initializer->new('registryFile'=>\$confFile, 'action'=>\$action);
my \$registry = \$initializer->getRegistry;
my \$query = BioMart::Query->new('registry'=>\$registry,'virtualSchemaName'=>'default');
|;
# so far expecting to deal with only 0.5 style XML
my $config = XMLin($xml, forcearray=> [qw(Query Dataset Attribute
ValueFilter BooleanFilter
Filter Links)], keyattr => []);
my $virtualSchemaName = $config->{'virtualSchemaName'} || 'default';
my $formatter = $config->{'formatter'} if ($config->{'formatter'});
#DATASETS
foreach my $dataset (@{$config->{'Dataset'}}) {
my $interface = $dataset->{'interface'} || 'default';
$perl_string .= qq|
\$query->setDataset("|.$dataset->{'name'}.qq|");|;
# FILTERS
foreach my $filterNode (@{$dataset->{'Filter'}}) {
if (defined $filterNode->{'excluded'}) {
if($filterNode->{'excluded'} eq '1') {
$perl_string .= qq|
\$query->addFilter("|.$filterNode->{'name'}.qq|", ["Excluded"]);|;
}
else {
$perl_string .= qq|
\$query->addFilter("|.$filterNode->{'name'}.qq|", ["Only"]);|;
}
}
elsif (defined $filterNode->{'value'}) {
my $temp_str = $filterNode->{'value'};
$temp_str =~ s/\,/\"\,\"/g;
$perl_string .= qq|
\$query->addFilter("|.$filterNode->{'name'}.qq|", ["|.$temp_str.q|"]);|;
}
}
# ATTRIBUTES
foreach my $attributeNode (@{$dataset->{'Attribute'}}) {
$perl_string .= qq|
\$query->addAttribute("|.$attributeNode->{'name'}.qq|");|;
}
}
$perl_string .= qq|
my \$query_runner = BioMart::QueryRunner->new();
############################## GET COUNT ############################
# \$query->count(1);
# \$query_runner->execute(\$query);
# print \$query_runner->getCount();
#####################################################################
############################## GET RESULTS ##########################
# to obtain unique rows only
# \$query_runner->uniqueRowsOnly(1);
\$query_runner->execute(\$query);
\$query_runner->printHeader();
\$query_runner->printResults();
\$query_runner->printFooter();
#####################################################################
|;
return $perl_string;
}
1;