Raw content of Bio::Das::ProServer::SourceAdaptor::biomart
package Bio::Das::ProServer::SourceAdaptor::biomart;
# $Id: biomart.pm,v 1.1.1.1 2006/11/22 20:30:59 arek Exp $
use strict;
use warnings;
use base qw(Bio::Das::ProServer::SourceAdaptor);
use BioMart::Query;
use BioMart::QueryRunner;
use BioMart::Initializer;
use constant INIT_BATCHSIZE => 100; # batch all with 100 initially
use constant MAX_BATCHSIZE => 100000;
use constant DEFAULTVSCHEMA => "default";
use constant VIRTUALSCHEMA => "virtualSchema";
use constant DATASET => "dataset";
use constant MART => "mart";
use constant LINKNAME => "linkName";
use constant FEATUREKEYS => "feature_keys";
use constant REQPARAMS =>
[ DATASET, VIRTUALSCHEMA, MART, LINKNAME, FEATUREKEYS ];
use constant DEFAULTS => { 'start' => 0, 'end' => 0 };
use constant REQUIREDKEYS => [ "id", "type", "method" ];
sub init
{
my ($self) = @_;
$self->{'capabilities'}{'features'} = '1.0';
# Place other initialization code here. For BioMart I
# would suggest that persistant data be stored beneath
# $self->{'BioMart'}.
my $biomart = $self->{'BioMart'};
my $confPath = $self->config()->{'registryPath'}
|| die "BioMart DAS requires the path to a Configuration File "
. "using the registryPath ini entry\n";
# The configuration options from the ini-file may be
# referenced through $self->config()->{'option'}, where
# "option" is the key of the key-value pair in the ini-file.
my $reqParams = REQPARAMS;
foreach my $title ( @{$reqParams} ) {
$biomart->{$title} = $self->config()->{$title};
}
die "BioMart DAS requires a mart and dataset in the ini file\n"
unless ( defined( $biomart->{'mart'} )
&& defined( $biomart->{'dataset'} )
&& defined( $biomart->{'linkName'} )
&& defined( $biomart->{'feature_keys'} ) );
unless ( defined( $biomart->{'virtualSchema'} ) ) {
$biomart->{'virtualSchema'} = DEFAULTVSCHEMA;
}
# Remap the comma separated list in feature_keys to an actual
# arrayref.
$biomart->{'feature_keys'} =
[ ( split /,/, $biomart->{'feature_keys'} ) ];
my $initializer =
BioMart::Initializer->new( 'registryFile'=>"$confPath",
'init_batchsize'=>INIT_BATCHSIZE,
'max_batchsize'=>MAX_BATCHSIZE,
'action'=>"update")
or die "Could not load Initializer $!\n";
$biomart->{'registry'} = $initializer->getRegistry();
# Now make sure the DatasetConfig has the required
# Exportable/Importable.
my $dataset =
$biomart->{'registry'}
->getDatasetByName( $biomart->{'virtualSchema'},
$biomart->{'dataset'} );
my @importables = @{$dataset->getImportables( $biomart->{'linkName'} )};
my @exportables = @{$dataset->getExportables( $biomart->{'linkName'} )};
die "BioMart DAS Configuration must include an "
. "Importable-Exportable pair named by "
. $biomart->{'linkName'} . "\n"
unless ( @importables && @exportables );
# Make sure importable and exportable are compliant.
print keys(%{$importables[0]})."\n";
my $filts = $importables[0]->getAllFilters();
die "BioMart DAS Compliant Importables "
. "must contain at least one filter\n"
unless ( scalar( @{$filts} ) >= 1 );
foreach my $filt ( @${filts} ) {
warn( "Recieved filt " . $filt->name . "\n" );
}
my $atts = $exportables[0]->getAllAttributes();
die "BioMart DAS Compliant Exportables "
. "must contain the same number of "
. "attributes as keys in the feature_keys INI specification\n"
unless (
scalar( @{$atts} ) ==
scalar( @{ $biomart->{'feature_keys'} } ) );
# Determine the location of required keys.
my $reqKeys = REQUIREDKEYS;
my $i = 0;
foreach my $key ( @{ $biomart->{'feature_keys'} } ) {
$biomart->{'required_keys'}{$key} = $i++
if ( grep { ( $_ eq $key ) } @{$reqKeys} );
}
die "BioMart DAS Compliant feature_keys list "
. "must contain at least "
. join( ",", @{$reqKeys} ) . "\n"
unless (
scalar( keys %{ $biomart->{'required_keys'} } ) ==
scalar( @{$reqKeys} ) );
$biomart->{'attributes'} = $atts;
$biomart->{'filters'} = $filts;
$self->{'BioMart'}=$biomart;
}
sub build_features
{
my ( $self, $opts ) = @_;
my $segment = $opts->{'segment'} || return ();
my $segment_start = $opts->{'start'};
my $segment_end = $opts->{'end'};
# Use $segment and, if they are defined, $segment_start and
# $segment_end, to fetch the appropriate data from the mart.
my $biomart = $self->{'BioMart'};
my $query = BioMart::Query->new(
'registry' => $biomart->{'registry'},
'virtualSchemaName' => $biomart->{'virtualSchema'}
);
$query->setDataset($biomart->{'dataset'});
my $atts = $biomart->{'attributes'};
foreach my $att ( @{$atts} ) {
$query->addAttribute($att->name());
}
$query->addFilter($biomart->{'filters'}[0]->name(), [$segment]);
if ( $segment_start && $segment_end ) {
$query->addFilter($biomart->{'filters'}[1]->name(), [$segment_start]);
$query->addFilter($biomart->{'filters'}[2]->name(), [$segment_end]);
}
my $reqKeys = REQUIREDKEYS;
my $defaults = DEFAULTS;
my @features = ();
my $query_runner = BioMart::QueryRunner->new();
$query->formatter('TSV'); # tab-separated results
$query_runner->execute($query);
my $result_buffer;
open( my $RESULTS, '>', \$result_buffer );
$query_runner->printResults($RESULTS);
close($RESULTS);
my @rows = split /\n/, $result_buffer;
ROW: foreach my $rowLine (@rows) {
my @row = split /\t/, $rowLine;
my $feature = {};
# Skip this feature unless all keys in REQUIREDKEYS are defined.
my $good = 0;
KEY: foreach my $reqKey ( @{$reqKeys} ) {
my $pos = $biomart->{'required_keys'}{$reqKey};
next KEY unless ( $row[$pos] );
$good++;
}
next ROW unless ($good);
my $i = 0;
while ( $i < scalar(@row) ) {
my $key = $biomart->{'feature_keys'}[$i];
$feature->{$key} = $row[$i]
|| $defaults->{$key}; # May be either default, or undef.
$i++;
}
push @features, $feature;
}
return @features;
}
1;