Bio::EnsEMBL::ExternalData::DAS Coordinator
SummaryPackage variablesSynopsisDescriptionGeneral documentationMethods
Toolbar
WebCvsRaw content
Summary
Bio::EnsEMBL::ExternalData::DAS::Coordinator
Package variables
Privates (from "my" definitions)
($url, $segment) = $raw_url =~ m|(.+)/features\?.*segment=([^;&]+)|
@sources(1) = @{ $coord_data->{'sources'}{$url} }
$status(3) = $statuses->{$url}
$stylesheet = Bio::EnsEMBL::ExternalData::DAS::Stylesheet->new( $styledata->[0] )
@sources(2) = values %{ $sources_with_data{$url} }
@features_query = map { { 'segment' => $_, 'type' => $filters{type}, 'feature_id' => $filters{feature}, 'group_id' => $filters{group}, 'maxbins' => $maxbins, } } @segments
$status(2) = $statuses->{$raw_url}
$response = $daslite->stylesheet()
$statuses = $daslite->statuscodes()
$status(1) = $statuses->{$url}
Included modules
Bio::Das::Lite
Bio::EnsEMBL::ExternalData::DAS::CoordSystem
Bio::EnsEMBL::ExternalData::DAS::Feature
Bio::EnsEMBL::ExternalData::DAS::GenomicMapper
Bio::EnsEMBL::ExternalData::DAS::GenomicPeptideMapper
Bio::EnsEMBL::ExternalData::DAS::SourceParser qw ( %GENE_COORDS %PROT_COORDS is_genomic )
Bio::EnsEMBL::ExternalData::DAS::Stylesheet
Bio::EnsEMBL::ExternalData::DAS::XrefPeptideMapper
Bio::EnsEMBL::Mapper
Bio::EnsEMBL::Utils::Argument qw ( rearrange )
Bio::EnsEMBL::Utils::Exception qw ( throw info warning )
POSIX qw ( ceil )
Synopsis
  # Instantiate with a list of Bio::EnsEMBL::ExternalData::DAS::Source objects:
my $c = Bio::EnsEMBL::ExternalData::DAS::Coordinator->new(-sources => $list);

# Fetch by slice
my $struct = $c->fetch_Features( $slice );

for my $logic_name ( keys %{ $struct } ) {

# Bio::EnsEMBL::ExternalData::DAS::Source object:
my $source = $struct->{$logic_name}{'source'}{'object'};
my $error = $struct->{$logic_name}{'source'}{'error'};

# Bio::EnsEMBL::ExternalData::DAS::Stylesheet object:
my $stylesheet = $struct->{$logic_name}{'stylesheet'}{'object'};
my $s_error = $struct->{$logic_name}{'stylesheet'}{'error'};

for my $segment ( keys %{ $struct->{$logic_name}{'features'} } ) {

my $f_url = $struct->{$logic_name}{'features'}{$segment}{'url'};
my $f_error = $struct->{$logic_name}{'features'}{$segment}{'error'};
# arrayref of Bio::EnsEMBL::ExternalData::DAS::Feature objects:
my $features = $struct->{$logic_name}{'features'}{$segment}{'objects'};

}

}

# Fetch by gene
my $struct = $c->fetch_Features( $gene );

# Fetch by protein
my $struct = $c->fetch_Features( $translation );

# Feature ID filtering
my $struct = $c->fetch_Features( $slice, feature => 'xyz1234' );

# Type ID and Group ID filtering
my $struct = $c->fetch_Features( $slice, group => 'xyz', type => 'foo' );
Description
Given a set of DAS::Source objects and a target object such as a Slice or
Translation, will simultaneously perform all DAS requests and map the features
onto the target object.
Methods
_choose_coord_systems
No description
Code
_get_Segments
No description
Code
fetch_FeaturesDescriptionCode
map_Features
No description
Code
newDescriptionCode
Methods description
fetch_Featurescode    nextTop
  Arg [1]    : Bio::EnsEMBL::Object $root_obj - the query object (e.g. Slice, Gene)
Arg [2] : (optional) hash of filters:
maxbins - the maximum available "rendering space" for features
NOTE this is only passed to the server, it is not
guaranteed to be honoured
feature - the feature ID
type - the type ID
group - the group ID
Description: Fetches DAS features for a given Slice, Gene or Translation
Example : $hashref = $c->fetch_Features( $slice, type => 'mytype' );
Returntype : A hash reference containing Bio::...::DAS::Feature and
Bio::...::DAS::Stylesheet objects:
{
$logic_name => {
'source' => {
'object' => $source_object,
'error' => 'Not applicable',
},
'features' => {
'X:1000,2000' => {
'error' => 'Error fetching...',
'url' => 'http://...',
'objects' => [ $feat1, $feat2 ],
},
},
'stylesheet' => {
'object' => $style1,
'error' => 'Error fetching...',
},
}
}
Exceptions : Throws if the object is not supported
Caller :
Status :
newcodeprevnextTop
  Arg [..]   : List of named arguments:
-SOURCES - Arrayref of Bio::EnsEMBL::DAS::Source objects.
-PROXY - A URL to use as an HTTP proxy server
-NOPROXY - A list of domains/hosts to not use the proxy for
-TIMEOUT - The request timeout, in seconds
-GENE_COORDS - Override the coordinate system representing genes
-PROT_COORDS - Override the coordinate system representing proteins
Description: Constructor
Returntype : Bio::EnsEMBL::DAS::Coordinator
Exceptions : If unable to assign the gene and protein coordinate systems
Caller :
Status :
Methods code
_choose_coord_systemsdescriptionprevnextTop
sub _choose_coord_systems {
  my ( $self, $target_cs, $target_ob, $coord_systems ) = @_;
  
  my @best_genomic = ();
  my @best_gene    = ();
  my @best_protein = ();
  
  my $csa = $target_ob->adaptor->db->get_CoordSystemAdaptor;
  my $ens_rank;
  my $ens_gene;
  my $ens_prot;
  
  for my $cs ( @{ $coord_systems } ) {
    if ( $cs->equals( $target_cs ) ) {
      return [ $cs ];
    }
    if ( is_genomic($cs) || $cs->name eq 'toplevel' ) {
      my $tmp = $csa->fetch_by_name( $cs->name, $cs->version ) || $csa->fetch_by_name( $cs->name ) || next;
      if ( !defined $ens_rank || $tmp->rank < $ens_rank ) {
        $ens_rank = $tmp->rank;
        @best_genomic = ($cs);
      } elsif ( $tmp->rank == $ens_rank ) {
        push @best_genomic, $cs;
      }
    }
    elsif ( $GENE_COORDS{$cs->name} ) {
      if ( $cs->equals( $self->{'gene_cs'} ) ) {
        $ens_gene = 1;
        @best_gene = ($cs);
      }
      elsif ( !$ens_gene ) {
        push @best_gene, $cs;
      }
    }
    elsif ( $PROT_COORDS{$cs->name} ) {
      if ( $cs->equals( $self->{'prot_cs'} ) ) {
        $ens_prot = 1;
        @best_protein = ($cs);
      }
      elsif ( !$ens_prot ) {
        push @best_protein, $cs;
      }
    }
  }
  
  my $best = [];
  if ( is_genomic($target_cs) || $target_cs->name eq 'toplevel' ) {
    $best = @best_genomic ?\@ best_genomic : @best_protein ?\@ best_protein :\@ best_gene;
  } elsif ( $GENE_COORDS{$target_cs->name} ) {
    $best = @best_gene    ?\@ best_gene    : @best_genomic ?\@ best_genomic :\@ best_protein;
  } elsif ( $PROT_COORDS{$target_cs->name} ) {
    $best = @best_protein ?\@ best_protein : @best_gene    ?\@ best_gene    :\@ best_genomic;
  }
  
  info('Choosen from '.scalar @{$coord_systems}.' coords: ' . join '; ', map { $_->name .' '. $_->version } @{$best});
  return $best;
}

1;
}
_get_SegmentsdescriptionprevnextTop
sub _get_Segments {
  my $self = shift;
  my $from_cs = shift; # the "foreign" source coordinate system 
my $to_cs = shift; # the target coordsys that mapped objects will be converted to
my ($slice, $gene, $prot) = @_; #warn sprintf "Getting mapper for %s -> %s", $from_cs->name, $to_cs->name;
info(sprintf 'Building mappings for %s %s -> %s %s', $from_cs->name, $from_cs->version, $to_cs->name, $to_cs->version); my %mappers = (); my @segments = (); # There are several different Mapper implementations in the API to convert
# between various coordinate systems: AssemblyMapper, TranscriptMapper,
# IdentityXref. For DAS, we often need to convert across the different realms
# these mappers serve, such as chromosome:NCBI35 -> peptide which requires an
# intermediary NCBI35 -> NCBI36 step. Unfortunately, the different mappers all
# work in different ways and have different interfaces and limitations.
#
# For example, AssemblyMapper and TranscriptMapper use custom methods rather
# than the standard API 'map_coordinates', IdentityXref uses custom
# 'external_id' and 'ensembl_id' identifiers for the regions it is mapping
# between, and all name the coordinate systems differently. These differences
# mean the different mappers cannot be strung together, so this module uses
# wrappers in order to achieve this.
# Mapping to slice-relative coordinates
if ( is_genomic($to_cs) ) { # Sanity checks
$slice || throw('Trying to convert to slice coordinates, but no Slice provided'); $slice->coord_system->equals($to_cs) || throw('Provided slice is not in target coordinate system'); # Mapping from a slice-based coordinate system
if ( is_genomic($from_cs) || $from_cs->name eq 'toplevel' ) { # No mapping needed - the coords are identical
if ( $from_cs->equals( $to_cs ) ) { info(sprintf 'No mappings needed for %s %s -> %s %s', $from_cs->name, $from_cs->version, $to_cs->name, $to_cs->version); push @segments, sprintf '%s:%s,%s', $slice->seq_region_name, $slice->start, $slice->end; } # No mapping needed, but we need to indicate what the real coordsys is
elsif ( $from_cs->name eq 'toplevel' && $slice->is_toplevel ) { info(sprintf 'No mappings needed for %s %s -> %s %s', $from_cs->name, $from_cs->version, $to_cs->name, $to_cs->version); $self->{'passthrough'}{$from_cs->name}{$from_cs->version}{$slice->seq_region_name} = $to_cs; push @segments, sprintf '%s:%s,%s', $slice->seq_region_name, $slice->start, $slice->end; } # We can't map from toplevel, only detect when no mapping is required...
elsif ( $from_cs->name eq 'toplevel' ) { warning(sprintf 'Mapping from toplevel to %s is only supported where the given %1$s slice is also toplevel', $to_cs->name); } else { # AssemblyMapperAdaptor doesn't like DAS::CoordSystem
# And sometimes DAS coordinate systems have spurious versions...
my $csa = $slice->adaptor->db->get_CoordSystemAdaptor; my $ama = $slice->adaptor->db->get_AssemblyMapperAdaptor; my $tmpfrom = $csa->fetch_by_name( $from_cs->name, $from_cs->version ) || $csa->fetch_by_name( $from_cs->name ); my $tmpto = $csa->fetch_by_name( $to_cs->name, $to_cs->version ) || $csa->fetch_by_name( $to_cs->name ); my $tmpmap = $tmpfrom && $tmpto ? $ama->fetch_by_CoordSystems($tmpfrom, $tmpto) : undef; # Ensembl might not support a specific genomic -> genomic mapping
if ( $tmpmap ) { # Wrapper for AssemblyMapper:
my $mapper = Bio::EnsEMBL::ExternalData::DAS::GenomicMapper->new( 'from', 'to', $tmpfrom, $tmpto, $tmpmap ); # Map backwards to get the query segments
my @coords = $mapper->map_coordinates($slice->seq_region_name, $slice->start, $slice->end, $slice->strand, 'to'); info(sprintf 'Adding mappings for %s %s -> %s %s', $from_cs->name, $from_cs->version, $to_cs->name, $to_cs->version); for my $c ( @coords ) { $c->isa('Bio::EnsEMBL::Mapper::Coordinate') || next; $self->{'mappers'}{$from_cs->name}{$from_cs->version}{$c->id} ||= $mapper; push @segments, sprintf '%s:%s,%s', $c->id, $c->start, $c->end; } } else { warning(sprintf 'Mapping from %s to %s is not supported', $from_cs->name, $to_cs->name); } } } # Mapping from ensembl_gene to slice
elsif ( $from_cs->equals( $self->{'gene_cs'} ) ) { my @genes = $gene ? ($gene) : @{ $slice->get_all_Genes }; info(sprintf 'Adding mappings for %s %s -> %s %s', $from_cs->name, $from_cs->version, $to_cs->name, $to_cs->version); for my $g ( @genes ) { # Genes are already definitely relative to the target slice, so don't need to do any assembly mapping
my $mapper = Bio::EnsEMBL::Mapper->new('from', 'to', $from_cs, $to_cs); $mapper->add_map_coordinates( $g->stable_id, 1, $g->length, $g->seq_region_strand, $slice->seq_region_name, $g->seq_region_start, $g->seq_region_end ); $self->{'mappers'}{$from_cs->name}{$from_cs->version}{$g->stable_id} = $mapper; push @segments, $g->stable_id; } } # Mapping from ensembl_peptide to slice
elsif ( $from_cs->equals( $self->{'prot_cs'} ) ) { my @transcripts = $gene ? @{ $gene->get_all_Transcripts } : @{ $slice->get_all_Transcripts }; info(sprintf 'Adding mappings for %s %s -> %s %s', $from_cs->name, $from_cs->version, $to_cs->name, $to_cs->version); for my $tran ( @transcripts ) { my $p = $tran->translation || next; $self->{'mappers'}{$from_cs->name}{$from_cs->version}{$p->stable_id} ||= Bio::EnsEMBL::ExternalData::DAS::GenomicPeptideMapper->new('from', 'to', $from_cs, $to_cs, $tran); push @segments, $p->stable_id; } } # Mapping from translation-mapped xref to slice
elsif ( my $callback = $XREF_PEPTIDE_FILTERS{$from_cs->name} ) { # Mapping path is xref -> ensembl_peptide -> slice
my $mid_cs = $self->{'prot_cs'}; my @transcripts = $gene ? @{ $gene->get_all_Transcripts } : @{ $slice->get_all_Transcripts }; for my $tran ( @transcripts ) { my $p = $tran->translation || next; # first stage mapper: xref to translation
push @segments, @{ $self->_get_Segments($from_cs, $mid_cs, $slice, $gene, $p) }; } # If the first stage actually produced mappings, we'll need to map from
# peptide to slice
if ($self->{'mappers'}{$from_cs->name}{$from_cs->version}) { # second stage mapper: gene or translation to transcript's slice
$self->_get_Segments($mid_cs, $to_cs, $slice, $gene, $prot); } } # Mapping from gene-mapped xref to slice
elsif ( $callback = $XREF_GENE_FILTERS{$from_cs->name} ) { my @genes = $gene ? ($gene) : @{ $slice->get_all_Genes }; info(sprintf 'Adding (nonpositional) mappings for %s %s -> %s %s', $from_cs->name, $from_cs->version, $to_cs->name, $to_cs->version); for my $g ( @genes ) { for my $xref (grep { $callback->{'predicate'}($_) } @{ $g->get_all_DBEntries() }) { my $segid = $callback->{'transformer'}( $xref ); push @segments, $segid; } # Gene-based xrefs don't have alignments and so don't generate mappings.
# It is enough to simply collate the segment ID's; only non-positional
# features will mapped.
} } else { warning(sprintf 'Mapping from %s to %s is not supported', $from_cs->name, $to_cs->name); } } # end mapping to slice/gene
# Mapping to peptide-relative coordinates
elsif ( $to_cs->equals( $self->{'prot_cs'} ) ) { $prot || throw('Trying to convert to peptide coordinates, but no Translation provided'); # Mapping from protein to protein (the same)
if ( $from_cs->equals( $to_cs ) ) { # no mapper needed
info(sprintf 'No mappings needed for %s %s -> %s %s', $from_cs->name, $from_cs->version, $to_cs->name, $to_cs->version); $self->{'passthrough'}{$from_cs->name}{$from_cs->version}{$prot->stable_id} = $to_cs; push @segments, $prot->stable_id; } # Mapping from slice. Note that from_cs isnt necessarily the same as the transcript's coord_system
elsif ( is_genomic($from_cs) || $from_cs->name eq 'toplevel' ) { my $ta = $prot->adaptor->db->get_TranscriptAdaptor(); my $sa = $prot->adaptor->db->get_SliceAdaptor(); my $tran = $ta->fetch_by_translation_stable_id($prot->stable_id); $slice = $sa->fetch_by_transcript_stable_id($tran->stable_id); $tran = $tran->transfer($slice); my $tran_cs = $slice->coord_system; # second stage mapper: transcript's slice to protein
my $mapper = Bio::EnsEMBL::ExternalData::DAS::GenomicPeptideMapper->new('from', 'to', $tran_cs, $to_cs, $tran); info(sprintf 'Adding mappings for %s %s -> %s %s', $tran_cs->name, $tran_cs->version, $to_cs->name, $to_cs->version); $self->{'mappers'}{$slice->coord_system->name}{$slice->coord_system->version||''}{$slice->seq_region_name} = $mapper; # first stage mapper: from_cs to transcript's slice
push @segments, @{ $self->_get_Segments($from_cs, $tran->slice->coord_system, $slice) }; } # Mapping from gene on a slice with the same coordinate system
elsif ( $from_cs->equals( $self->{'gene_cs'} ) ) { my $ga = $prot->adaptor->db->get_GeneAdaptor(); my $sa = $prot->adaptor->db->get_SliceAdaptor(); $gene = $ga->fetch_by_translation_stable_id($prot->stable_id); $slice = $sa->fetch_by_gene_stable_id($gene->stable_id); # Second stage mapper: slice to peptide
$self->_get_Segments($slice->coord_system, $to_cs, $slice, $gene, $prot); # First stage mapper: gene to slice
push @segments, @{ $self->_get_Segments($from_cs, $slice->coord_system, $slice, $gene, $prot) }; } # Mapping from xref to peptide
elsif ( my $callback = $XREF_PEPTIDE_FILTERS{$from_cs->name} ) { info(sprintf 'Adding mappings for %s %s -> %s %s', $from_cs->name, $from_cs->version, $to_cs->name, $to_cs->version); for my $xref (grep { $callback->{'predicate'}($_) } @{ $prot->get_all_DBEntries() }) { my $segid = $callback->{'transformer'}( $xref ); push @segments, $segid; # If xref has a cigar alignment, use it to build mappings to the
# Ensembl translation (assume they all align to the translation).
# If not, we still query with the segment because non-positional
# features don't need a mapper.
if ($xref->can('get_mapper')) { my $mapper = Bio::EnsEMBL::ExternalData::DAS::XrefPeptideMapper->new('from', 'to', $from_cs, $to_cs, $xref, $prot); $mapper->external_id($segid); $mapper->ensembl_id($prot->stable_id); $self->{'mappers'}{$from_cs->name}{$from_cs->version}{$segid} = $mapper; } } } # Mapping from gene-mapped xref to peptide
elsif ( $callback = $XREF_GENE_FILTERS{$from_cs->name} ) { my $ga = $prot->adaptor->db->get_GeneAdaptor(); $gene = $ga->fetch_by_translation_stable_id($prot->stable_id); info(sprintf 'Adding (nonpositional) mappings for %s %s -> %s %s', $from_cs->name, $from_cs->version, $to_cs->name, $to_cs->version); for my $xref (grep { $callback->{'predicate'}($_) } @{ $gene->get_all_DBEntries() }) { my $segid = $callback->{'transformer'}( $xref ); push @segments, $segid; # Gene-based xrefs don't have alignments and so don't generate mappings.
# It is enough to simply collate the segment ID's; only non-positional
# features will mapped.
} } else { warning(sprintf 'Mapping from %s to %s is not supported', $from_cs->name, $to_cs->name); } } else { warning(sprintf 'Mapping to %s is not supported', $to_cs->name); } my %segments = map { $_ => 1 } @segments; return [ keys %segments ];
}
fetch_FeaturesdescriptionprevnextTop
sub fetch_Features {
  my ( $self, $target_obj ) = splice @_, 0, 2;
  my %filters = @_; # maxbins, feature, type, group
# TODO: review this structure that is returned, would we prefer to split by
# segment ID? We don't always know it before we parse the feature though (e.g.
# when querying by feat ID). Also stylesheet errors aren't segment-specific
my ( $target_cs, $target_segment, $slice, $gene, $prot ); if ( $target_obj->isa('Bio::EnsEMBL::Gene') ) { $slice = $target_obj->slice; $gene = $target_obj; $target_cs = $slice->coord_system; # actually want features relative to the slice
$target_segment = $target_obj->stable_id; } elsif ( $target_obj->isa('Bio::EnsEMBL::Slice') ) { $slice = $target_obj; $target_cs = $target_obj->coord_system; $target_segment = sprintf '%s:%s,%s', $target_obj->seq_region_name, $target_obj->start, $target_obj->end; } elsif ( $target_obj->isa('Bio::EnsEMBL::Translation') ) { $prot = $target_obj; $target_cs = Bio::EnsEMBL::ExternalData::DAS::CoordSystem->new( -name => 'ensembl_peptide' ); $target_segment = $target_obj->stable_id; } else { throw('Unsupported object type: '.$target_obj); } my $target_species = $target_obj->adaptor->db->species; my %coords = (); my $final = {}; my %sources_with_data = (); #==========================================================#
# First sort the sources into coordinate systems #
#==========================================================#
for my $source (@{ $self->{'sources'} }) { # Set up the data structure...
$final->{$source->logic_name} = { 'source' => { 'object' => $source, }, 'features' => {}, 'stylesheet' => {}, }; my @coord_systems = @{ $self->_choose_coord_systems($target_cs, $target_obj, $source->coord_systems) }; if (! scalar @coord_systems ) { warning($source->logic_name.' has no coord systems'); $final->{$source->logic_name}{'source'}{'error'} = 'Bad source configuration'; next; } # Check the coordinate system is the correct species (if it has one)
@coord_systems = grep { $_->matches_species( $target_species ) } @coord_systems; if (! scalar @coord_systems ) { $final->{$source->logic_name}{'source'}{'error'} = "Source not compatible with $target_species"; } # Query in all compatible coordinate systems
for my $source_cs ( @coord_systems ) { # The coordinate system name doesn't need species in it because we have
# just checked it is species-compatible - we treat them the same from now
#Êon. That is, Ensembl,Gene_ID == Ensembl,Gene_ID,Homo sapiens.
my $cs_key = $source_cs->name . ' ' . $source_cs->version; # Sort sources by coordinate system
if ( !$coords{$cs_key} ) { # Do a lot of funky stuff to get the query segments, and build up
# mappers at the same time
my $segments = $self->_get_Segments( $source_cs, $target_cs, $slice, $gene, $prot); $coords{ $cs_key } = { 'sources' => {}, 'coord_system' => $source_cs, 'segments' => $segments }; } $coords{ $cs_key }{'sources'}{$source->full_url} ||= []; push @{ $coords{ $cs_key }{'sources'}{$source->full_url} }, $source; } } #==========================================================#
# Parallelise the requests for each coordinate system #
#==========================================================#
my $daslite = $self->{'daslite'}; # Split the requests by coordinate system, i.e. parallelise
# requests for segments that are from the same coordinate system
while (my ($coord_key, $coord_data) = each %coords) { my @segments = @{ $coord_data->{'segments'} }; my @urls = keys %{ $coord_data->{'sources'} }; my $source_cs = $coord_data->{'coord_system'}; my $coord_name = $source_cs->name . ' ' . $source_cs->version; # Either the mapping isn't supported, or nothing maps to the region we're
# interested in.
if (! scalar @segments ) { info("No segments found for $coord_name"); for ( values %{ $coord_data->{'sources'} } ) { for my $source (@{ $_ }) { $final->{$source->logic_name}{'source'}{'error'} = 'Not applicable'; } } next; } info("Querying with @segments for $coord_name"); $daslite->dsn(\@ urls ); my $response; my $statuses; my $maxbins = $source_cs->equals( $target_cs ) ? $filters{maxbins} : undef; #==========================================================#
# Get features for all DAS sources #
#==========================================================#
}
map_FeaturesdescriptionprevnextTop
sub map_Features {
  my ( $self, $features, $source_cs, $to_cs, $slice ) = splice @_, 0, 5;
  my %filters = @_; # feature, type, group
# TODO: implement maxbins filter??
my $filter_f = $filters{feature}; my $filter_t = $filters{type}; my $filter_g = $filters{group}; # If filtering we're more likely to have a small region, so it's better to
# make 4 tests when filtering and 1 when not than always make 3 tests.
# The big question is, is it better to test each filter is enabled than to
# just preprocess in an extra iteration? I suspect the former.
my $nofilter = !$filter_f && !$filter_t && !$filter_g; # Code block to build a feature object from raw hash
my $build_Feature = sub { my $f = shift; $f = Bio::EnsEMBL::ExternalData::DAS::Feature->new( $f ); # Where target coordsys is genomic, make a slice-relative feature
if ($slice) { $f->slice($slice->seq_region_Slice); $f = $f->transfer($slice); } else { $f->seqname( $f->{'segment_id'} ); } return $f; }; # Code block to apply optional filters
my $filter_Feature = sub { my $f = shift; # Test type first, because this is the more likely filter for large regions
# where efficiency matters most
if ( $filter_t ) { $f->{'type_id'} eq $filter_t || return 0; } if ( $filter_f ) { $f->{'feature_id'} eq $filter_f || return 0; } if ( $filter_g ) { return 0 unless grep { $_->{'group_id'} eq $filter_g } @{ $f->{'group'}||[] }; } return 1; }; my @new_features = (); # As part of the feature parsing we need to do some converting and filtering.
# We could do this in a separate loop before doing any mapping, but this adds
# an extra iteration step which is inefficient (especially for large numbers
# of features). So we duplicate a bit of code.
if ( $source_cs->equals( $to_cs ) || ( $slice && $source_cs->name eq 'toplevel' && $slice->is_toplevel ) ) { for my $f ( @{ $features } ) { if ( $nofilter || &$filter_Feature( $f ) ) { $f->{'strand'} = $ORI_NUMERIC{$f->{'orientation'} || '.'} || 0; # Convert to Ensembl-style (numeric) strand
push @new_features, &$build_Feature( $f ); # Build object
} } return\@ new_features; } # May need multiple mapping steps to reach the target coordinate system
# This loop works by undefining $source_cs, the redefining it when we know
# which coordinate system the mapper is mapping to
while ( $source_cs && !$source_cs->equals($to_cs) ) { info('Beginning mapping from '.$source_cs->name); my @this_features = @{ $features }; my $mappers = $self->{'mappers'}{$source_cs->name}{$source_cs->version||''}; my $passthrough = $self->{'passthrough'}{$source_cs->name}{$source_cs->version||''}; $features = []; my $this_cs = undef; my $positional_mapping_errors = 0; # Map the current set of features to the next coordinate system
for my $f ( @this_features ) { $nofilter || &$filter_Feature( $f ) || next; my $strand = $f->{'strand'}; if (!defined $strand) { $strand = $f->{'strand'} = $ORI_NUMERIC{$f->{'orientation'} || '.'} || 0; } # It doesn't matter what coordinate system non-positional features come
# from, they are always included and don't need mapping
if (!$f->{'start'} && !$f->{'end'}) { push @new_features, &$build_Feature( $f ); next; } my $segid = $f->{'segment_id'}; # Check for passthrough mappings (i.e. no mapping is needed but the coord system needs to change)
# This is used when mapping from toplevel
if (my $pass_cs = $passthrough->{$segid}) { $this_cs = $pass_cs; # If this is the final step, convert to Ensembl Feature
if ( $this_cs->equals( $to_cs ) ) { push @new_features, &$build_Feature( $f ); } else { push @{ $features }, $f; } next; } # Otherwise check there are mappings available for this segment
my $mapper = $mappers->{$segid}; if (!$mapper) { $positional_mapping_errors++; next; } $this_cs = $mapper->{'to_cs'} || throw('Mapper maps to unknown coordinate system'); # Get new coordinates for this feature
my @coords = $mapper->map_coordinates($segid, $f->{'start'}, $f->{'end'}, $strand, 'from'); # Create new features from the mapped coordinates
for my $c ( @coords ) { $c->isa('Bio::EnsEMBL::Mapper::Coordinate') || next; my %new = %{ $f }; $new{'segment_id'} = $c->id; $new{'start' } = $c->start; $new{'end' } = $c->end; $new{'strand' } = $c->strand; # If this is the final step, convert to Ensembl Feature
if ( $this_cs->equals( $to_cs ) ) { push @new_features, &$build_Feature(\% new ); } else { push @{ $features },\% new; } } } if ($positional_mapping_errors) { warning(sprintf '%d positional features could not be mapped (%s -> %s)', $positional_mapping_errors, $source_cs ? $source_cs->name.' '.$source_cs->version : 'UNKNOWN', $this_cs ? $this_cs->name .' '.$this_cs->version : 'UNKNOWN' ); } $source_cs = $this_cs; } return\@ new_features; } # Supports mappings:
# location-based to location-based
# location-based to protein-based
# protein-based to location-based
# protein-based to protein-based
# gene-based to location-based
# gene-based to protein-based
# xref-based to location-based
# xref-based to protein-based
# xref-based to gene-based
#
# Coordinate system definitions:
# location-based == chromosome|clone|contig|scaffold|supercontig etc
# protein-based == $self->{prot_cs} (ensembl_peptide)
# gene-based == $self->{gene_cs} (ensembl_gene)
# xref-based == uniprot_peptide|entrez_gene... (see %XREF_PEPTIDE_FILTERS)
}
newdescriptionprevnextTop
sub new {
  my $class = shift;
  
  my ($sources, $proxy, $no_proxy, $timeout, $gene_cs, $prot_cs)
    = rearrange(['SOURCES','PROXY', 'NOPROXY', 'TIMEOUT',
                 'GENE_COORDS', 'PROT_COORDS'], @_);
  
  $sources = [$sources] if ($sources && !ref $sources);
  
  my $das = Bio::Das::Lite->new();
  $das->user_agent('Ensembl');
  $das->timeout($timeout);
  $das->caching(0);
  $das->http_proxy($proxy);
  
  # Bio::Das::Lite support for no_proxy added around September 2008
if ($no_proxy) { if ($das->can('no_proxy')) { $das->no_proxy($no_proxy); } else { warning("Installed version of Bio::Das::Lite does not support use of 'no_proxy'"); } } $gene_cs ||= $GENE_COORDS{'ensembl_gene'} || throw('Unable to determine Gene coordinate system'); $prot_cs ||= $PROT_COORDS{'ensembl_peptide'} || throw('Unable to determine Peptide coordinate system'); my $self = { 'sources' => $sources, 'daslite' => $das, 'gene_cs' => $gene_cs, 'prot_cs' => $prot_cs, 'objects' => {}, }; bless $self, $class; return $self;
}
General documentation
No general documentation available.