BioMart::Dataset
GenomicAlign
Toolbar
Summary
BioMart::Dataset::GenomicAlign
Package variables
Privates (from "my" definitions)
$i;
Included modules
Inherit
Synopsis
A hidden Dataset containing sequence attributes that can be imported to other
visible Datasets which are compatible with its required data input, based
on the presence of one or more importable-exportable relationships.
Description
Dataset providing Align Sequence attributes, which can be imported into
other Datasets. AlignSequence is itself not a visible Dataset.
Methods
__processNewQuery | No description | Code |
_addRow | No description | Code |
_calcSeqOverLocations | No description | Code |
_continueWithBatch | No description | Code |
_editSequence | No description | Code |
_getConfigurationTree | No description | Code |
_getExportables | No description | Code |
_getImportables | No description | Code |
_getLocationFrom | No description | Code |
_getResultTable | No description | Code |
_ignoreRow | No description | Code |
_incrementBatch | No description | Code |
_initializeDNAAdaptor | No description | Code |
_initializeIndices | No description | Code |
_initializeReturnRow | No description | Code |
_new | No description | Code |
_processRow | No description | Code |
_processSequence | No description | Code |
_rawSequences | No description | Code |
_rawSequencesOriginal | No description | Code |
_rc | No description | Code |
Methods description
None available.
Methods code
__processNewQuery | description | prev | next | Top |
sub __processNewQuery
{ my ($self, $query) = @_;
my $attribute = $query->getAllAttributes($self->name)->[0];
my $seq_name = $attribute->name;
$self->set('seq_name', $seq_name);
my $ignore = 'IGNORE';
if ($seq_name =~ m/raw/){ $self->set('recipe', '_rawSequences');
} else {
BioMart::Exception::Configuration->throw("Unsupported sequence name $seq_name recieved by GenomicAlign\n");
}
$self->set('importable', undef);
$self->set('lastPkey', undef);
$self->set('importable_indices', undef);
$self->set('returnRow_indices', undef);
$self->set('locations', {});
$self->set('outRow', undef);
$self->set('calc_location', undef);
$self->set('sequence', undef);
my $filters = $query->getAllFilters($self->name);
foreach my $filt (@{$filters}) {
if ($filt->isa("BioMart::Configuration::FilterList")) {
if ($filt->linkName) {
if ($self->get('importable') ) {
BioMart::Exception::Configuration->throw("Recieved two importables, can only work with one\n");
} else {
$self->set('importable', $filt);
}
} else {
BioMart::Exception::Configuration->throw("Recieved invalid linkName ".$filt->linkName."\n");
}
} else {
unless ($filt->isa("BioMart::Configuration::ValueFilter")) {
BioMart::Exception::Configuration->throw("Recieved unknown filter ".$filt->name." in GenomicSequence Dataset!\n");
}
if ($self->get($filt->name)) {
BioMart::Exception::Configuration->throw("Recieved two ".$filt->name." flanking filters in GenomicSequence Dataset\n");
}
my $table = $filt->getTable;
my $row = $table->nextRow;
my $value = $row->[0];
if ($value) {
$self->set($filt->name, $value);
}
}
}
unless ($self->get('importable')) {
BioMart::Exception::Configuration->throw("No Importable Recieved in GenomicAlign\n");
} } |
sub _addRow
{ my ($self, $atable, $outRow) = @_;
$atable->addRow($outRow);
$self->_incrementBatch; } |
sub _calcSeqOverLocations
{ my ($self, $this_location) = @_;
$this_location->{start} || return; $this_location->{end} || return;
my $calc_location = $self->get('calc_location');
if ($calc_location) {
$calc_location->{"start"} = $this_location->{"start"} if ($this_location->{"start"} < $calc_location->{"start"});
$calc_location->{"end"} = $this_location->{"end"} if ($this_location->{"end"} > $calc_location->{"end"});
} else {
$calc_location = {};
foreach my $key (keys %{$this_location}) {
$calc_location->{$key} = $this_location->{$key};
}
}
$self->set('calc_location', $calc_location); } |
sub _continueWithBatch
{ my ($self, $batchSize, $rtable) = @_;
my $continue = ($rtable->isa("BioMart::ResultTable"))
? $rtable->inCurrentBatch()
: $rtable->hasMoreRows;
if ($continue && $batchSize) {
my $batchIndex = $self->get('batchIndex');
$continue = ($batchIndex < $batchSize);
}
return $continue; } |
sub _editSequence
{ my ($self, $seqref) = @_;
my $seq_edits = $self->get('seq_edits');
if ($$seqref && $seq_edits) {
foreach my $seq_edit (split /\;/, $seq_edits) {
my ($start, $end, $alt_seq) = split /\,/, $seq_edit;
my $len = $end - $start + 1;
substr($$seqref, $start - 1, $len) = $alt_seq;
}
} } |
sub _getConfigurationTree
{ my $self = shift;
return $self->getParam('configurator')->getConfigurationTree($self->virtualSchema, $self->name); } |
sub _getExportables
{ my ($self, $linkName) = @_;
my $exportables = $self->get('exportables');
if ($linkName) {
return [ $exportables->{$linkName} ];
}
my $ref = [];
push @{$ref}, values %{$exportables};
return $ref; } |
sub _getImportables
{ my ($self, $linkName) = @_;
my $importables = $self->get('importables');
if ($linkName) {
return $importables->{$linkName};
}
my $ref = [];
push @{$ref}, values %{$importables};
return $ref; } |
sub _getLocationFrom
{ my ($self, $curRow, @expectedFields) = @_;
my $importable_indices = $self->get('importable_indices');
my $location = {};
foreach my $expectedField (@expectedFields) {
$location->{$expectedField} = ( exists( $importable_indices->{$expectedField} ) ) ? $curRow->[ $importable_indices->{$expectedField} ] : undef;
}
return $location;
}
} |
sub _getResultTable
{ my ($self, @param) = @_;
$self->set('batchIndex', 0);
local($^W) = 0; my(%param) = @param;
my $query = $param{'query'};
my $atable = $param{'table'};
my $batch_size = $param{'batch_size'};
if ($self->serverType eq "web"){
my $batch_start = $param{'batch_start'} || 0;
my $location = $self->getParam('configurator')->get('location');
my $xml = $query->toXML($batch_start,$batch_size,0);
foreach my $el($location->getResultSet("","POST",$xml)){
if ($el =~ /No Sequence Returned/) {
$self->_setExhausted(1);
last;
}
my @clean=split(/\t/,$el);
$atable->addRow([@clean]);
}
return $atable;
} else {
$self->_initializeDNAAdaptor($query->
getInterfaceForDataset($self->name));
}
my $importable = $self->get('importable');
my $rtable = $importable->getTable();
my $has_rows = $rtable->hasMoreRows;
while ($has_rows && $self->_continueWithBatch($batch_size, $rtable)) {
$self->_processRow( $atable, $rtable->nextRow );
}
unless ($has_rows) {
$self->_setExhausted(1);
}
$importable->setTable($rtable);
$self->set('importable', $importable);
my $dna = $self->get('dna');
foreach my $attribute_name (keys %$dna) {
$dna->{$attribute_name}->close;
}
return $atable;
}
} |
sub _ignoreRow
{ my ($self, $curRow) = @_;
my $ignore = $self->get('ignore');
return 0 unless ($ignore);
my $ignore_row = $self->get('ignore_row');
my $test = $self->_getLocationFrom($curRow, $ignore_row);
return $test->{ $ignore_row } && $ignore->{ $test->{ $ignore_row } }; } |
sub _incrementBatch
{ my $self = shift;
my $batchIndex = $self->get('batchIndex');
$batchIndex++;
$self->set('batchIndex', $batchIndex); } |
sub _initializeDNAAdaptor
{ my $self = shift;
my $dna_params = $self->getConfigurationTree()->optionalParameters;
unless ($dna_params) {
BioMart::Exception::Configuration->throw("GenomicSequence Dataset requires optional_parameters to be set in the DatasetConfig\n");
}
my $dna = {};
foreach my $dna_params4specie ( split /\;/, $dna_params ){
my ($attribute_name, $dnatablename, $chunk_name_fieldname, $chunk_start_fieldname, $seqfieldname,$chunk_size) = split /\,/, $dna_params4specie ;
$dna->{$attribute_name} = BioMart::Dataset::GenomicSequence::DNAAdaptor->new('seq_name' => $attribute_name,
'dna_tablename' => $dnatablename, 'seq_fieldname' => $seqfieldname, 'chunk_name_fieldname' => $chunk_name_fieldname, 'chunk_start_fieldname' => $chunk_start_fieldname, 'chunk_size' => $chunk_size, 'configurator' => $self->getParam('configurator')
);
unless ($dna->{$attribute_name}) {
BioMart::Exception::Configuration->throw("Couldnt connect to DNAAdaptor for $attribute_name\n\n");
}
}
$self->set('dna', $dna);
} |
sub _initializeIndices
{ my ($self, $numFields) = @_;
my $importable_names = [];
my $returnRow_indices = {};
my $importable_indices = {};
my $filts = $self->get('importable')->getAllFilters;
my $index = 0;
foreach my $filt (@{$filts}) {
push @{$importable_names}, $filt->name;
$importable_indices->{$filt->name} = $index;
$index++;
}
my $resultIndex = 0;
while ($index < $numFields) {
$returnRow_indices->{$index} = $resultIndex;
$index++;
$resultIndex++;
}
$self->set('importable_indices', $importable_indices);
$self->set('returnRow_indices', $returnRow_indices);
$self->set('importable_names', $importable_names); } |
sub _initializeReturnRow
{ my ($self, $curRow) = @_;
my $returnRow = [];
foreach my $val (@{$curRow}) {
push @{$returnRow}, $val;
}
return $returnRow; } |
sub _new
{
my ($self, @param) = @_;
$self->SUPER::_new(@param);
my $i=1;
$self->attr('dna', undef);
$self->attr('dnaparams', undef);
$self->attr('recipe', 'raw_sequence'); $self->attr('ignore', undef);
$self->attr('ignore_row', undef);
$self->attr('seq_edits', undef); $self->attr('seq_name', undef); $self->attr('translate', 0); $self->attr('importable', undef);
$self->attr('lastPkey', undef);
$self->attr('importable_names', undef); $self->attr('importable_indices', undef); $self->attr('returnRow_indices', undef); $self->attr('returnRow', undef);
$self->attr('batchIndex', 0); $self->attr('seq_species', undef);
$self->attr('locations', {}); $self->attr('outRow', undef);
$self->attr('calc_location', undef);
$self->attr('sequence', undef);
}
} |
sub _processRow
{ my ($self, $atable, $curRow) = @_;
unless ($self->get('importable_indices')) {
if ($self->get('exhausted')) {
$atable->addRow(["No Sequence Returned"]);
} else {
my $numFields = @{$curRow};
$self->_initializeIndices($numFields);
}
}
my $method = $self->get('recipe');
$self->$method($atable, $curRow); } |
sub _processSequence
{
my $seq = '';
my $dna = $self->get('dna')->{$attribute_name};
my $chr = $location->{'chr'.$count};
my $start = $location->{'start'.$count};
my $end = $location->{'end'.$count};
my $strand = $location->{'strand'.$count};
if ($strand < 0) {
$seq .= $self->_rc( $dna->getSequence( $chr, $start, $end ) );
} else {
$seq .= $dna->getSequence( $chr, $start, $end );
}
$i++;
if (length($seq)) {
return $seq;
}
return undef; } |
sub _rawSequences
{ my ($self, $atable, $curRow) = @_;
my $rank = 1;
my $overall_count = 0;
my $local_count = 0;
my $species_numbers = 0;
my $count = 1;
my $n = 0;
my @importable_names = @{$self->get('importable_names')};
my $dna_params = $self->getConfigurationTree()->optionalParameters;
my @species_dna_params = split(/\;/, $dna_params);
my @species_attribute_name;
foreach my $sdp (@species_dna_params) {
my ($attribute_name) = split(/\,/,$sdp);
push @species_attribute_name, $attribute_name;
}
my $initRow = $self->_initializeReturnRow($curRow);
while (my $attribute_name = shift @species_attribute_name){
my $importable_indices = $self->get('importable_indices');
my ($name, $start, $end, $strand);
foreach my $var (\$name,\$ start,\$ end,\$ strand) {
$$var = shift @importable_names;
last if (defined $strand);
}
my $location = $self->_getLocationFrom($curRow, ($name, $start, $end, $strand));
my $sequence = $self->_processSequence($location, $attribute_name, $count);
if ($sequence) {
push @{$initRow}, $sequence;
}
shift @importable_names ;
$count++;
}
$self->_addRow($atable, $initRow);
}
1; } |
sub _rawSequencesOriginal
{ my $rank = 1;
if ($curRow) {
my $importable_indices = $self->get('importable_indices');
my $locations = {};
my $location = $self->_getLocationFrom($curRow, "chr", "start", "end");
$location->{"strand"} = ( exists( $importable_indices->{"strand"} ) ) ? $curRow->[ $importable_indices->{"strand"} ] : 1;
$locations->{$rank} = $location if ($location->{"start"});
my $sequence = $self->_processSequence($locations);
$self->_editSequence(\$sequence);
if ($sequence) {
$self->_addRow($atable, $self->_initializeReturnRow($curRow), $sequence);
}
}
} |
sub _rc
{ my ($self, $seq) = @_;
$seq = reverse($seq);
$seq =~ tr/YABCDGHKMRSTUVyabcdghkmrstuv/RTVGHCDMKYSAABrtvghcdmkysaab/;
return $seq;
}
} |
General documentation
AUTHOR - Arek Kasprzyk, Darin London | Top |
The peptide translation algorithm is taken directly
from the CodonTable module that is part of the
BioPerl project. For more information about the
BioPerl project, visit:
http://www.bioperl.org