Bio::EnsEMBL::ExternalData::DAS
SourceParser
Toolbar
Summary
Bio::EnsEMBL::ExternalData::DAS::SourceParser
Package variables
No package variables defined.
Included modules
Inherit
Exporter
Synopsis
my $parser = Bio::EnsEMBL::ExternalData::DAS::SourceParser->new(
-timeout => 5,
-proxy => 'http://proxy.company.com',
);
my $sources = $parser->fetch_Sources(
-location => 'http://www.dasregistry.org/das',
-species => 'Homo_sapiens'
);
for my $source (@{ $sources }) {
printf "URL: %s, Description: %s, Coords: %s\n",
$source->full_url,
$source->description,
join '; ', @{ $source->coord_systems };
}
Description
Parses XML produced by the 'sources' DAS command, creating object
representations of each source.
Methods
Methods description
Arg [1] : The URL of the server Arg [2] : Arrayref of sources, each being a hashref Example : $parser->_parse_dsn_output($server_url, $sources_set); Description: Parses the output of the dsn command. Returntype : none Exceptions : none Caller : _parse_server Status : Stable |
Arg [..] : none Example : $parser->_parse_server( @servers ); Description: Contacts the given DAS server(s) via the sources or dsn command and parses the results. Populates $self->{'_sources} as a hashref of DAS sources, organised by server: { http://... => [ Bio::EnsEMBL::ExternalData::DAS::Source, .. ], } Returntype : none Exceptions : If there is an error contacting the DAS registry/server. Caller : fetch_Sources Status : Stable |
Arg [1] : The URL of the server Arg [2] : Arrayref of sources, each being a hashref Example : $parser->_parse_sources_output($server_url, $sources_set); Description: Parses the output of the sources command. Returntype : none Exceptions : none Caller : _parse_server Status : Stable |
Arg [..] : List of named arguments: -LOCATION - A URL from which to obtain a list of sources XML. This is usually a DAS registry or server URL, but could be a local path to a directory containing an XML file named "sources?" or "dsn?" -SPECIES - (optional) scalar or arrayref species name filter -NAME - (optional) scalar or arrayref source name filter -LOGIC_NAME - (optional) scalar or arrayref logic_name filter Example: $arr = $parser->fetch_Sources( -location => 'http://www.dasregistry.org/das', -species => 'Homo_sapiens', -name => ['asd', 'atd', 'astd'], ); Example: $arr = $parser->fetch_Sources( -location => 'file:///registry', # parses "/registry/sources?" ); Description: Fetches DAS Source objects. The first call to this method initiates lazy parsing of the XML, and the results are stored. The different filter types supplied to this method are treated as a logical AND. Several filters of the same type are logical OR. Returntype : Arrayref of Bio::EnsEMBL::ExternalData::DAS::Source objects, sorted by label. Exceptions : If there is an error contacting the DAS registry/server. Caller : general Status : Stable |
Arg [..] : List of optional named arguments: -PROXY - A URL to use as an HTTP proxy server -NOPROXY - A list of domains/hosts not to use the proxy for -TIMEOUT - Timeout in seconds (default is 10) Example : my $parser = Bio::EnsEMBL::ExternalData::DAS::SourceParser->new( -proxy => 'http://proxy.company.com', -timeout => 10, ); Description: Constructor Returntype : Bio::EnsEMBL::ExternalData::DAS::SourceParser Exceptions : If no location is specified Caller : general Status : Stable |
Methods code
_find_mapmaster | description | prev | next | Top |
sub _find_mapmaster
{ my ( $self, $source_url, $raw_url ) = @_;
my $mapmaster = undef;
if ( $raw_url ) {
my ($map_server, $map_dsn) = $self->parse_das_string( $raw_url );
if ($map_server && $map_dsn) {
my $mapmaster_url = join '/', $map_server, $map_dsn;
if ( !exists $self->{'_sources'}{$map_server} ) {
eval {
$self->fetch_Sources( -location => $map_server );
};
if ($@) {
warning("Error parsing $source_url - bad mapmaster $mapmaster_url : $@")
}
}
$mapmaster = $self->{'_sources'}{$map_server}{$mapmaster_url};
}
}
return $mapmaster; } |
sub _parse_coord_system
{ my ( $self, $type, $auth, $version, $species ) = @_;
if ( exists $COORD_MAPPINGS{$type} &&
exists $COORD_MAPPINGS{$type}{$auth} &&
exists $COORD_MAPPINGS{$type}{$auth}{$version} &&
exists $COORD_MAPPINGS{$type}{$auth}{$version}{$species} ) {
my $s = $COORD_MAPPINGS{$type}{$auth}{$version}{$species};
return Bio::EnsEMBL::ExternalData::DAS::CoordSystem->new_from_string($s);
}
$type = $TYPE_MAPPINGS{$type} || $type; $auth = $AUTHORITY_MAPPINGS{$auth} || $auth; $version ||= '';
$species ||= '';
$species =~ s/ /_/;
if ( is_genomic($type) ) {
if ( !$species ) {
info("Genomic coordinate system has no species: $type $auth$version");
return;
}
$species =~ s/ /_/;
my $cs = Bio::EnsEMBL::ExternalData::DAS::CoordSystem->new(
-name => lc $type,
-version => $auth.$version,
-species => $species
);
return $cs;
}
my $cs = $NON_GENOMIC_COORDS{$type}{$auth};
if ( !$cs ) {
info("Coordinate system not supported: $auth $type");
return;
}
if ( $cs->species ne $species ) {
$cs = $cs->new( -name => $cs->name,
-version => $cs->version,
-species => $species,
-label => $cs->label );
}
return $cs;
}
} |
sub _parse_dsn_output
{ my ($self, $server_url, $set) = @_;
my $count = 0;
for my $hash (@{ $set }) {
my $source = Bio::EnsEMBL::ExternalData::DAS::Source->new(
-url => $server_url,
-dsn => $hash->{'source_id'},
-label => $hash->{'source'},
-description => $hash->{'description'},
);
$self->{'_sources'}{$server_url}{$source->full_url} ||= $source;
$count++;
if ( my $mapmaster = $self->_find_mapmaster( $source->full_url, $hash->{'mapmaster'} ) ) {
$source->coord_systems( $mapmaster->coord_systems );
}
}
info("Found $count sources");
return undef; } |
sub _parse_server
{ my ( $self, @servers ) = @_;
$self->{'daslite'}->dsn(\@servers);
my %success = ();
my $struct = $self->{'daslite'}->sources();
while (my ($url, $set) = each %{ $struct }) {
info("Processing $url");
my $status = $self->{'daslite'}->statuscodes($url);
$url =~ s|/sources\??$||;
$self->{'_sources'}{$url} = {};
$set = $set->[0]->{'source'} || [];
if ($status =~ /^200/ && scalar @{ $set }) {
$self->_parse_sources_output($url, $set);
$success{$url} = 1;
} else {
info("$url does not support sources command; trying dsn");
}
}
my @failed = grep { !$success{$_} } @servers;
if (scalar @failed) {
$self->{'daslite'}->dsn(\@failed);
$struct = $self->{'daslite'}->dsns();
$self->{'daslite'}->dsn(\@servers);
while (my ($url, $set) = each %{ $struct }) {
info("Processing $url");
my $status = $self->{'daslite'}->statuscodes($url);
$url =~ s|/dsn\??$||;
$set ||= [];
if ($status !~ /^200/) {
throw("Error contacting DAS server '$url' : $status");
} elsif (scalar @{ $set }) {
$self->_parse_dsn_output($url, $set);
}
}
} } |
sub _parse_sources_output
{ my ($self, $server_url, $set) = @_;
my $count = 0;
for my $source (@{ $set }) {
my $title = $source->{'source_title'};
my $homepage = $source->{'source_doc_href'};
my $description = $source->{'source_description'};
my $email = $source->{'maintainer'}[0]{'maintainer_email'};
for my $version (@{ $source->{'version'} || [] }) {
my ($url, $dsn);
for my $cap (@{ $version->{'capability'} || [] }) {
if ($cap->{'capability_type'} eq 'das1:features') {
($url, $dsn) = $cap->{capability_query_uri} =~ m|(.+/das1?)/(.+)/features|; last;
}
}
$dsn || next;
my $version_uri = $version->{'version_uri'};
info("Parsing source $version_uri");
my @coords = ( );
for my $coord (@{ $version->{'coordinates'} || [] }) {
my $auth = $coord->{'coordinates_authority'};
my $type = $coord->{'coordinates_source'};
my $version = $coord->{'coordinates_version'};
my $cdata = $coord->{'coordinates'};
my (undef, undef, $species) = split /,/, $cdata, 3;
if (!$type || !$auth) {
warning("Unable to parse authority and sequence type for $version_uri ; skipping"); next;
}
if ( my $coord = $self->_parse_coord_system( $type, $auth, $version, $species ) ) {
push @coords, $coord;
}
}
my $source = Bio::EnsEMBL::ExternalData::DAS::Source->new(
-logic_name => $version_uri,
-url => $url,
-dsn => $dsn,
-label => $title,
-description => $description,
-maintainer => $email,
-homepage => $homepage,
-coords =>\@ coords,
);
$count++;
$self->{'_sources'}{$server_url}{$source->full_url} ||= $source;
}
}
info("Found $count sources");
return undef; } |
sub fetch_Sources
{ my $self = shift;
my ($server, $f_species, $f_name, $f_logic)
= rearrange([ 'LOCATION', 'SPECIES', 'NAME', 'LOGIC_NAME' ], @_);
$server || throw('No DAS server specified');
($server, my $f_id) = $self->parse_das_string( $server );
if (!defined $self->{'_sources'}{$server}) {
$self->_parse_server( $server );
}
my @sources = values %{ $self->{'_sources'}{$server} || {} };
my @f_species = !defined $f_species ? ()
: ref $f_species ? @{ $f_species } : ( $f_species );
my @f_name = !defined $f_name ? ()
: ref $f_name ? @{ $f_name } : ( $f_name );
my @f_logic = !defined $f_logic ? ()
: ref $f_logic ? @{ $f_logic } : ( $f_logic );
if ( scalar @f_species ) {
info('Filtering by species');
@sources = grep { my $source = $_; grep { !scalar @{$source->coord_systems} || $source->matches_species( $_ ) } @f_species } @sources;
}
if ( scalar @f_name ) {
info('Filtering by name');
@sources = grep { my $source = $_; grep { $source->matches_name( $_ ) } @f_name } @sources;
}
if ( scalar @f_logic ) {
info('Filtering by logic_name');
@sources = grep { my $source = $_; grep { $source->logic_name eq $_ } @f_logic } @sources;
}
if ( $f_id ) {
info('Filtering by identifier (logic_name or dsn)');
@sources = grep { $_->logic_name eq $f_id || $_->dsn eq $f_id } @sources;
}
return [sort { lc $a->label cmp lc $b->label } @sources]; } |
sub is_genomic
{ my ($test) = @_;
my $name = ref $test && $test->can('name') ? $test->name : $test;
return $name =~ m/$GENOMIC_REGEX/i ? 1 : 0; }
1; } |
sub new
{ my $class = shift;
my ($proxy, $no_proxy, $timeout)
= rearrange(['PROXY','NOPROXY','TIMEOUT'], @_);
$timeout ||= 10;
my $das = Bio::Das::Lite->new();
$das->user_agent('Ensembl');
$das->timeout($timeout);
$das->http_proxy($proxy);
if ($no_proxy) {
if ($das->can('no_proxy')) {
$das->no_proxy($no_proxy);
} else {
warning("Installed version of Bio::Das::Lite does not support use of 'no_proxy'");
}
}
my $self = {
'daslite' => $das,
'proxy' => $proxy,
'noproxy' => $no_proxy,
'timeout' => $timeout,
};
bless $self, $class;
return $self; } |
parse_das_string | description | prev | next | Top |
sub parse_das_string
{ my ( $self, $in ) = @_;
if ($in !~ m{^\w+:}) {
$in = "http://$in"; }
my $server = URI->new($in)->canonical;
my $dsn = URI->new();
my $path = $server->path;
$path =~ s|/+|/|g; $server->path($path);
my @segs = $server->path_segments;
my @server_segs = ();
my @dsn_segs = ();
my $found = 0;
for my $seg ($server->path_segments) {
$seg || next;
if ($seg =~ /^das1?$/) {
$found = 1;
} elsif ($seg =~ /^sources|dsn$/) {
next;
} elsif ($found) {
push @dsn_segs, $seg;
} else {
push @server_segs, $seg;
}
}
$server->path_segments( @server_segs, 'das' );
$dsn->path_segments( @dsn_segs );
return ($server->as_string, $dsn->as_string); } |
General documentation