BioMart::Dataset
GenomicMAlign
Toolbar
Summary
BioMart::Dataset::GenomicMALign
Package variables
No package variables defined.
Included modules
Inherit
Synopsis
Description
Methods
BEGIN | | Code |
__processNewQuery | No description | Code |
_addRow | No description | Code |
_addRow_Original | No description | Code |
_calcSeqOverLocations | No description | Code |
_codingCdnaPeptideSequences | No description | Code |
_continueWithBatch | No description | Code |
_editSequence | No description | Code |
_exonIntronFlankSequences | No description | Code |
_exonSequences | No description | Code |
_getConfigurationTree | No description | Code |
_getLocationFrom | No description | Code |
_getResultTable | No description | Code |
_ignoreRow | No description | Code |
_incrementBatch | No description | Code |
_initializeDNAAdaptor | No description | Code |
_initializeIndices | No description | Code |
_initializeReturnRow | No description | Code |
_modFlanks | No description | Code |
_new | No description | Code |
_nonOrientedRawSequences | No description | Code |
_processRow | No description | Code |
_processSequence | No description | Code |
_processSequenceNonOriented | No description | Code |
_processSequenceOriginal | No description | Code |
_rawSequences | No description | Code |
_rawSequencesOriginal | No description | Code |
_rc | No description | Code |
_translate | No description | Code |
_translate_ambiguous_codon | No description | Code |
_unambiquous_codons | No description | Code |
Methods description
None available.
Methods code
BEGIN { @NAMES = (
'Standard', 'Vertebrate Mitochondrial', 'Yeast Mitochondrial', 'Mold, Protozoan, and CoelenterateMitochondrial and Mycoplasma/Spiroplasma', 'Invertebrate Mitochondrial', 'Ciliate, Dasycladacean and Hexamita Nuclear', '', '',
'Echinoderm Mitochondrial', 'Euplotid Nuclear', '"Bacterial"', 'Alternative Yeast Nuclear', 'Ascidian Mitochondrial', 'Flatworm Mitochondrial', 'Blepharisma Nuclear', 'Chlorophycean Mitochondrial', '', '', '', '',
'Trematode Mitochondrial', 'Scenedesmus obliquus Mitochondrial', 'Thraustochytrium Mitochondrial' );
@TABLES =
qw(
FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSS**VVVVAAAADDEEGGGG
FFLLSSSSYY**CCWWTTTTPPPPHHQQRRRRIIMMTTTTNNKKSSRRVVVVAAAADDEEGGGG
FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSSSVVVVAAAADDEEGGGG
FFLLSSSSYYQQCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
'' ''
FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG
FFLLSSSSYY**CCCWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
FFLLSSSSYY**CC*WLLLSPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSGGVVVVAAAADDEEGGGG
FFLLSSSSYYY*CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG
FFLLSSSSYY*QCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
FFLLSSSSYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
'' '' '' ''
FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNNKSSSSVVVVAAAADDEEGGGG
FFLLSS*SYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
FF*LSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
);
my @nucs = qw(t c a g);
my $x = 0;
($CODONS, $TRCOL) = ({}, {});
for my $i (@nucs) {
for my $j (@nucs) {
for my $k (@nucs) {
my $codon = "$i$j$k";
$CODONS->{$codon} = $x;
$TRCOL->{$x} = $codon;
$x++;
}
}
}
%IUPAC_DNA = ( A => [qw(A)],
C => [qw(C)],
G => [qw(G)],
T => [qw(T)],
U => [qw(U)],
M => [qw(A C)],
R => [qw(A G)],
W => [qw(A T)],
S => [qw(C G)],
Y => [qw(C T)],
K => [qw(G T)],
V => [qw(A C G)],
H => [qw(A C T)],
D => [qw(A G T)],
B => [qw(C G T)],
X => [qw(G A T C)],
N => [qw(G A T C)]
); } |
__processNewQuery | description | prev | next | Top |
sub __processNewQuery
{ my ($self, $query) = @_;
my $attribute = $query->getAllAttributes($self->name)->[0];
my $seq_name = $attribute->name;
if ($seq_name eq 'pkey'){
$attribute = $query->getAllAttributes($self->name)->[-1];
$seq_name = $attribute->name;
}
$self->set('seq_name', $seq_name);
my $ignore = IGNORE;
if ($seq_name =~ m/oriented_raw_sequence/i){ $self->set('recipe', '_rawSequences'); ## here it calls the _rawSeq query ... } elsif (($seq_name =~ m/nonOrientedRawSequence/i)){ $self->set('recipe', '_nonOrientedRawSequences'); ## here it calls the _nonOrientedRawSeq query ... } else { BioMart::Exception::Configuration->throw("Unsupported sequence name $seq_name recieved by GenomicMAlign\n"); }
$self->set('downstream_flank', 0);
$self->set('upstream_flank', 0);
$self->set('importable', undef);
$self->set('lastPkey', undef);
$self->set('importable_indices', undef);
$self->set('returnRow_indices', undef);
$self->set('locations', {});
$self->set('outRow', undef);
$self->set('calc_location', undef);
$self->set('sequence', undef);
my $filters = $query->getAllFilters($self->name);
foreach my $filt (@{$filters}) {
if ($filt->isa("BioMart::Configuration::FilterList")) {
if ($filt->linkName) {
if ($self->get('importable') ) {
BioMart::Exception::Configuration->throw("Recieved two importables, can only work with one\n");
}
else {
$self->set('importable', $filt);
}
}
else {
BioMart::Exception::Configuration->throw("Recieved invalid linkName ".
$filt->linkName."\n");
}
}
else {
unless ($filt->isa("BioMart::Configuration::ValueFilter")) {
BioMart::Exception::Configuration->throw("Recieved unknown filter ".$filt->name." in GenomicMAlign Dataset!\n");
}
if ($self->get($filt->name)) {
BioMart::Exception::Configuration->throw("Recieved two ".$filt->name." flanking filters in GenomicMAlign Dataset\n");
}
my $table = $filt->getTable;
my $row = $table->nextRow;
my $value = $row->[0];
if ($value) {
$self->set($filt->name, $value);
}
}
}
unless ($self->get('importable')) {
BioMart::Exception::Configuration->throw("No Importable Recieved in GenomicMAlign\n");
} } |
sub _addRow
{ my ($self, $atable, $outRow) = @_;
$atable->addRow($outRow);
$self->_incrementBatch; } |
sub _addRow_Original
{ my ($self, $atable, $outRow, $sequence) = @_;
push @{$outRow}, $sequence;
$atable->addRow($outRow);
$self->_incrementBatch;
}
} |
sub _calcSeqOverLocations
{ my ($self, $this_location) = @_;
$this_location->{start} || return; $this_location->{end} || return;
my $calc_location = $self->get('calc_location');
if ($calc_location) {
$calc_location->{"start"} = $this_location->{"start"}
if ($this_location->{"start"} < $calc_location->{"start"});
$calc_location->{"end"} = $this_location->{"end"}
if ($this_location->{"end"} > $calc_location->{"end"});
}
else {
$calc_location = {};
foreach my $key (keys %{$this_location}) {
$calc_location->{$key} = $this_location->{$key};
}
}
$self->set('calc_location', $calc_location); } |
sub _codingCdnaPeptideSequences
{ my ($self, $atable, $curRow) = @_;
my $importable_indices = $self->get('importable_indices');
my $pkey = $curRow ?
($curRow->[$importable_indices->{"pkey"}] || 'DUMMY') : undef;
my $lastPkey = $self->get('lastPkey') || $pkey;
my $locations = $self->get('locations');
my $outRow = $self->get('outRow');
if( ( ! defined $pkey ) or ( $pkey ne $lastPkey ) ){
my $sequence;
if( grep{ $locations->{$_}->{"start"} } keys %$locations ) {
$sequence = $self->_processSequence($locations);
$sequence = $self->_translate($sequence)
if ($self->get('translate'));
$self->_editSequence(\$sequence);
}
if ($sequence) {
$self->_addRow($atable, $outRow, $sequence);
}
else {
$self->_addRow($atable, $outRow, "Sequence unavailable");
}
$locations = {};
$outRow = undef;
}
if ($curRow) {
my $rank = $curRow->[ $importable_indices->{"rank"} ];
my $location = $self->_getLocationFrom($curRow, "chr", "start", "end",
"strand", "phase");
$location = $self->_modFlanks($location, 0);
$locations->{$rank} = $location if ($location->{"start"});
}
$outRow ||= $self->_initializeReturnRow($curRow);
$self->set('locations', $locations);
$self->set('lastPkey', $pkey);
$self->set('outRow', $outRow); } |
sub _continueWithBatch
{ my ($self, $batchSize, $rtable) = @_;
my $continue = ($rtable->isa("BioMart::ResultTable"))
? $rtable->inCurrentBatch()
: $rtable->hasMoreRows;
if ($continue && $batchSize) {
my $batchIndex = $self->get('batchIndex');
$continue = ($batchIndex < $batchSize);
}
return $continue; } |
sub _editSequence
{ my ($self, $seqref) = @_;
my $seq_edits = $self->get('seq_edits');
if ($$seqref && $seq_edits) {
foreach my $seq_edit (split /\;/, $seq_edits) {
my ($start, $end, $alt_seq) = split /\,/, $seq_edit;
my $len = $end - $start + 1;
substr($$seqref, $start - 1, $len) = $alt_seq;
}
} } |
sub _exonIntronFlankSequences
{ my ($self, $atable, $curRow) = @_;
my $rank = 1;
my $importable_indices = $self->get('importable_indices');
my $pkey = $curRow ?
($curRow->[$importable_indices->{"pkey"}] || 'DUMMY') : undef;
my $lastPkey = $self->get('lastPkey') || $pkey;
my $outRow = $self->get('outRow');
if( ( ! defined $pkey ) or ( $pkey ne $lastPkey ) ){
my $shift = ($self->get('seq_name') =~ m/flank/); my $location = $self->_modFlanks( $self->get('calc_location'),
$shift );
$self->set('calc_location', undef);
my $sequence;
if ($location->{"start"}) {
my $locations = { $rank => $location };
$sequence = $self->_processSequence($locations);
$self->_editSequence(\$sequence);
}
if ($sequence) {
$self->_addRow($atable, $outRow, $sequence);
}
else {
$self->_addRow($atable, $outRow, "Sequence unavailable");
}
$outRow = undef;
}
if ($curRow) {
my $location = $self->_getLocationFrom($curRow, "chr", "start",
"end", "strand");
$self->_calcSeqOverLocations( $location );
}
$outRow ||= $self->_initializeReturnRow($curRow);
$self->set('lastPkey', $pkey);
$self->set('outRow', $outRow); } |
sub _exonSequences
{ my ($self, $atable, $curRow) = @_;
$curRow || return;
return if ($self->_ignoreRow($curRow)); my $rank = 1;
my $locations = {};
$locations->{$rank} = $self->_modFlanks( $self->_getLocationFrom($curRow,
"chr", "start", "end", "strand"), 0 );
my $sequence;
if ($locations->{1}->{"start"}) {
$sequence = $self->_processSequence($locations);
$self->_editSequence(\$sequence);
}
if ($sequence) {
$self->_addRow($atable, $self->_initializeReturnRow($curRow),
$sequence);
}
else {
$self->_addRow($atable, $self->_initializeReturnRow($curRow),
"Sequence unavailable");
}
if ($self->get('ignore')) {
my $ignore = $self->get('ignore');
my $ignore_row = $self->get('ignore_row');
my $ref = $self->_getLocationFrom($curRow, $ignore_row);
$ignore->{ $ref->{ $ignore_row } } = 1; $self->set('ignore', $ignore);
}
} |
sub _getConfigurationTree
{ my ($self,$interface,$dsCounter)=@_;;
return $self->getParam('configurator')->getConfigurationTree(
$self->virtualSchema,
$self->name,
$interface,
$dsCounter); } |
sub _getLocationFrom
{ my ($self, $curRow, @expectedFields) = @_;
my $importable_indices = $self->get('importable_indices');
my $location = {};
foreach my $expectedField (@expectedFields) {
$location->{$expectedField} =
( exists( $importable_indices->{$expectedField} ) ) ?
$curRow->[ $importable_indices->{$expectedField} ] : undef;
}
return $location; } |
sub _getResultTable
{ my ($self, @param) = @_;
$self->set('batchIndex', 0);
local($^W) = 0; my(%param) = @param;
my $query = $param{'query'};
my $atable = $param{'table'};
my $batch_size = $param{'batch_size'};
if ($self->serverType eq "web"){
my $batch_start = $param{'batch_start'} || 0;
my $location = $self->getParam('configurator')->get('location');
my $xml = $query->toXML($batch_start,$batch_size,0);
foreach my $el($location->getResultSet("","POST",$xml)){
if ($el =~ /No Sequence Returned/) {
$self->_setExhausted(1);
last;
}
my @clean=split(/\t/,$el);
$atable->addRow([@clean]);
}
return $atable;
} else {
$self->_initializeDNAAdaptor($query->
getInterfaceForDataset($self->name));
}
my $importable = $self->get('importable');
my $rtable = $importable->getTable();
my $attribute_count = @{$query->getAllAttributes};
if ($rtable->hashedResults || $attribute_count > 1){
$self->set('attribute_merge_required','1');
}
my $has_rows = $rtable->hasMoreRows;
while ($has_rows && $self->_continueWithBatch($batch_size, $rtable)) {
$self->_processRow( $atable, $rtable->nextRow);
}
unless ($has_rows) {
$self->_setExhausted(1);
$self->_processRow($atable);
}
$importable->setTable($rtable);
$self->set('importable', $importable);
my $dna = $self->get('dna');
foreach my $attribute_name (keys %$dna) {
$dna->{$attribute_name}->close;
}
return $atable;
}
} |
sub _ignoreRow
{ my ($self, $curRow) = @_;
my $ignore = $self->get('ignore');
return 0 unless ($ignore);
my $ignore_row = $self->get('ignore_row');
my $test = $self->_getLocationFrom($curRow, $ignore_row);
return $test->{ $ignore_row } && $ignore->{ $test->{ $ignore_row } }; } |
sub _incrementBatch
{ my $self = shift;
my $batchIndex = $self->get('batchIndex');
$batchIndex++;
$self->set('batchIndex', $batchIndex); } |
sub _initializeDNAAdaptor
{ my ($self,$interface) = @_;
my $dna_params = $self->getConfigurationTree($interface)->optionalParameters;
unless ($dna_params) {
BioMart::Exception::Configuration->throw("GenomicMAlign Dataset requires optional_parameters to be set in the DatasetConfig\n");
}
my $dna = {};
foreach my $dna_params4specie ( split /\;/, $dna_params ){
my ($attribute_name, $dnatablename, $chunk_name_fieldname, $chunk_start_fieldname, $seqfieldname,$chunk_size) = split /\,/, $dna_params4specie ;
$dna->{$attribute_name} = BioMart::Dataset::GenomicSequence::DNAAdaptor->new( 'seq_name' => $attribute_name, 'dna_tablename' => $dnatablename,
'seq_fieldname' => $seqfieldname,
'chunk_name_fieldname' => $chunk_name_fieldname,
'chunk_start_fieldname' => $chunk_start_fieldname,
'chunk_size' => $chunk_size,
'configurator' => $self->getParam('configurator'),
);
unless ($dna->{$attribute_name}) { BioMart::Exception::Configuration->throw("Couldnt connect to DNAAdaptor\n");
}
} $self->set('dna', $dna); } |
sub _initializeIndices
{ my ($self, $numFields) = @_;
my $returnRow_indices = {};
my $importable_indices = {};
my $filts = $self->get('importable')->getAllFilters;
my $index = 0;
foreach my $filt (@{$filts}) {
$importable_indices->{$filt->name} = $index;
$index++;
}
my $resultIndex = 0;
while ($index < $numFields) {
$returnRow_indices->{$index} = $resultIndex;
$index++;
$resultIndex++;
}
$self->set('importable_indices', $importable_indices);
$self->set('returnRow_indices', $returnRow_indices);
} |
sub _initializeReturnRow
{ my ($self, $curRow) = @_;
return $self->get('attribute_merge_required') ? $curRow : [];
} |
sub _modFlanks
{ my ($self, $location, $shift) = @_;
$location->{start} || return $location; $location->{end} || return $location;
if ($shift) {
if ($self->get('upstream_flank')) {
if ($location->{"strand"} < 0) {
$location->{"start"} = $location->{"end"} + 1;
$location->{"end"} += $self->get('upstream_flank');
}
else {
$location->{"end"} = $location->{"start"} - 1;
$location->{"start"} -= $self->get('upstream_flank');
}
}
elsif ($self->get('downstream_flank')) {
if ($location->{"strand"} < 0) {
$location->{"end"} = $location->{"start"} - 1;
$location->{"start"} -= $self->get('downstream_flank');
}
else {
$location->{"start"} = $location->{"end"} + 1;
$location->{"end"} += $self->get('downstream_flank');
}
}
else {
BioMart::Exception::Configuration->throw("Requests for flank sequence must be accompanied by an upstream_flank or downstream_flank request\n");
}
}
else {
if ($location->{"strand"} < 0) {
$location->{"start"} -= $self->get('downstream_flank');
$location->{"end"} += $self->get('upstream_flank');
} else {
$location->{"start"} -= $self->get('upstream_flank');
$location->{"end"} += $self->get('downstream_flank');
}
}
$location->{"start"} = 1 if ($location->{"start"} < 1);
return $location; } |
sub _new
{ my ($self, @param) = @_;
$self->SUPER::_new(@param);
$self->attr('dna', undef);
$self->attr('dnaparams', undef);
$self->attr('recipe', undef); $self->attr('ignore', undef);
$self->attr('ignore_row', undef);
$self->attr('seq_edits', undef);
$self->attr('codon_table_id', 1); $self->attr('seq_name', undef); $self->attr('translate', 0); $self->attr('downstream_flank', 0);
$self->attr('upstream_flank', 0);
$self->attr('importable', undef);
$self->attr('lastPkey', undef);
$self->attr('importable_indices', undef); $self->attr('returnRow_indices', undef); $self->attr('returnRow', undef);
$self->attr('batchIndex', 0);
$self->attr('locations', {}); $self->attr('outRow', undef);
$self->attr('calc_location', undef);
$self->attr('sequence', undef);
$self->attr('attribute_merge_required', 0);
}
} |
sub _nonOrientedRawSequences
{ my ($self, $atable, $curRow) = @_;
my $rank = 1;
my $overall_count = 0;
my $local_count = 0;
my $species_numbers = 0;
my $count = 1;
my $n = 0;
my $interface = "default";
if ($curRow) {
my @importable_names ;
my $dna_params = $self->getConfigurationTree($interface)->
optionalParameters;
my @species_dna_params = split(/\;/, $dna_params);
my @species_attribute_name;
foreach my $sdp (@species_dna_params) {
my ($attribute_name) = split(/\,/,$sdp);
push @species_attribute_name, $attribute_name; }
my $initRow = $self->_initializeReturnRow($curRow);
my $filters = $self->get('importable')->getAllFilters;
foreach my $filter (@{$filters}) {
push (@importable_names, $filter->name) ; }
while (my $attribute_name = shift @species_attribute_name){
my ($name, $start, $end, $strand);
foreach my $var (\$name,\$ start,\$ end,\$ strand) {
$$var = shift @importable_names;
last if (defined $strand);
}
my $location = $self->_getLocationFrom($curRow, ($name, $start, $end, $strand));
my $sequence = $self->_processSequenceNonOriented($location, $attribute_name, $count);
if ($sequence) {
push @{$initRow}, $sequence;
}
$count++;
}
my $size = @{$initRow}; $self->_addRow($atable, $initRow);
}
}
1; } |
sub _processRow
{ my ($self, $atable, $curRow) = @_;
unless ($self->get('importable_indices')) {
if ($self->get('exhausted')) {
$atable->addRow(["No Sequence Returned"]);
}
else {
my $numFields = @{$curRow};
$self->_initializeIndices($numFields);
}
}
my $method = $self->get('recipe');
$self->$method($atable, $curRow); } |
sub _processSequence
{ my ($self, $location, $attribute_name, $count) = @_;
my $i=1;
my $seq = '';
my $dna = $self->get('dna')->{$attribute_name};
my $chr = $location->{'chr'.$count}; my $start = $location->{'start'.$count}; my $end = $location->{'end'.$count}; my $strand = $location->{'strand'.$count};
if ($strand < 0) {
$seq .= $self->_rc( $dna->getSequence( $chr, $start, $end ) );
} else {
$seq .= $dna->getSequence( $chr, $start, $end );
}
$i++;
if (length($seq)) {
return $seq;
}
return undef;
}
} |
sub _processSequenceNonOriented
{ my $i=1;
my $seq = '';
my $dna = $self->get('dna')->{$attribute_name};
my $chr = $location->{'chr'.$count}; my $start = $location->{'start'.$count}; my $end = $location->{'end'.$count}; my $strand = $location->{'strand'.$count};
$seq .= $dna->getSequence( $chr, $start, $end );
$i++;
if (length($seq)) {
return $seq;
}
return undef;
}
} |
sub _processSequenceOriginal
{ my ($self, $locations) = @_;
my $seq = '';
my $temp_Seq = '';
my $first_coding_exon_flag = 0;
my $dna = $self->get('dna');
foreach my $rank (sort { $a <=> $b } keys %{$locations}) {
my $location = $locations->{$rank}; my $chr = $location->{'chr'}; my $start = $location->{'start'}; my $end = $location->{'end'}; my $strand = exists( $location->{'strand'}) ?
$location->{'strand'} : 1; my $phase = $location->{'phase'} || 0;
if ($first_coding_exon_flag == 0) {
if ($strand < 0) {
$temp_Seq = $self->_rc( $dna->
getSequence( $chr, $start, $end ) );
}
else {
$temp_Seq = $dna->getSequence( $chr, $start, $end );
}
if($temp_Seq) { if ($phase > 0) { $seq = 'N'x$phase;
}
$seq .= $temp_Seq;
$first_coding_exon_flag = 1;
}
}
else {
if ($strand < 0) {
$seq .= $self->_rc( $dna->getSequence( $chr, $start, $end ) );
}
else {
$seq .= $dna->getSequence( $chr, $start, $end );
}
}
}
if (length($seq)) {
return $seq;
}
return undef
}
} |
sub _rawSequences
{ my ($self, $atable, $curRow) = @_;
my $rank = 1;
my $overall_count = 0;
my $local_count = 0;
my $species_numbers = 0;
my $count = 1;
my $n = 0;
my $interface = "default";
if ($curRow) {
my @importable_names ;
my $dna_params = $self->getConfigurationTree($interface)->
optionalParameters;
my @species_dna_params = split(/\;/, $dna_params);
my @species_attribute_name;
foreach my $sdp (@species_dna_params) {
my ($attribute_name) = split(/\,/,$sdp);
push @species_attribute_name, $attribute_name; }
my $initRow = $self->_initializeReturnRow($curRow);
my $filters = $self->get('importable')->getAllFilters;
foreach my $filter (@{$filters}) {
push (@importable_names, $filter->name) ; }
while (my $attribute_name = shift @species_attribute_name){
my ($name, $start, $end, $strand);
foreach my $var (\$name,\$ start,\$ end,\$ strand) {
$$var = shift @importable_names;
last if (defined $strand);
}
my $location = $self->_getLocationFrom($curRow, ($name, $start, $end, $strand));
my $sequence = $self->_processSequence($location, $attribute_name, $count);
if ($sequence) {
push @{$initRow}, $sequence;
}
$count++;
}
my $size = @{$initRow}; $self->_addRow($atable, $initRow);
} } |
sub _rawSequencesOriginal
{ my ($self, $atable, $curRow) = @_;
my $rank = 1;
if ($curRow) {
my $importable_indices = $self->get('importable_indices');
my $locations = {};
my $location = $self->_getLocationFrom($curRow, "chr", "start", "end");
$location->{"strand"} = ( exists( $importable_indices->{"strand"} ) ) ?
$curRow->[ $importable_indices->{"strand"} ] : 1;
$locations->{$rank} = $location if ($location->{"start"});
my $sequence = $self->_processSequence($locations);
$self->_editSequence(\$sequence);
if ($sequence) {
$self->_addRow($atable, $self->_initializeReturnRow($curRow), $sequence);
}
} } |
sub _rc
{ my ($self, $seq) = @_;
$seq = reverse($seq);
$seq =~ tr/YABCDGHKMRSTUVyabcdghkmrstuv/RTVGHCDMKYSAABrtvghcdmkysaab/;
return $seq; } |
sub _translate
{ my ($self, $seq) = @_;
BioMart::Exception::Configuration->throw("Calling translate without a seq argument!")
unless defined $seq;
return '' unless $seq;
my $id = $self->get('codon_table_id') || DEFAULTCODONTABLEID;
my ($partial) = 0;
$partial = 2 if length($seq) % 3 == 2;
$seq = lc $seq;
$seq =~ tr/u/t/;
my $protein = "";
if ($seq =~ /[^actg]/ ) { for (my $i = 0; $i < (length($seq) - 2 ); $i+=3) {
my $triplet = substr($seq, $i, 3);
if (exists $CODONS->{$triplet}) {
$protein .= substr($TABLES[$id-1],
$CODONS->{$triplet},1);
}
else {
$protein .= $self->_translate_ambiguous_codon($triplet);
}
}
}
else { for (my $i = 0; $i < (length($seq) - 2 ); $i+=3) {
my $triplet = substr($seq, $i, 3);
if (exists $CODONS->{$triplet}) {
$protein .= substr($TABLES[$id-1], $CODONS->{$triplet}, 1);
}
else {
$protein .= 'X';
}
}
}
if ($partial == 2) { my $triplet = substr($seq, ($partial -4)). "n";
if (exists $CODONS->{$triplet}) {
my $aa = substr($TABLES[$id-1], $CODONS->{$triplet},1);
$protein .= $aa;
} else {
$protein .= $self->_translate_ambiguous_codon($triplet, $partial);
}
}
return $protein; } |
sub _translate_ambiguous_codon
{ my ($self, $triplet, $partial) = @_;
$partial ||= 0;
my $id = $self->get('codon_table_id') || DEFAULTCODONTABLEID;
my $aa;
my @codons = _unambiquous_codons($triplet);
my %aas =();
foreach my $codon (@codons) {
$aas{substr($TABLES[$id-1],$CODONS->{$codon},1)} = 1;
}
my $count = scalar keys %aas;
if ( $count == 1 ) {
$aa = (keys %aas)[0];
}
elsif ( $count == 2 ) {
if ($aas{'D'} and $aas{'N'}) {
$aa = 'B';
}
elsif ($aas{'E'} and $aas{'Q'}) {
$aa = 'Z';
}
else {
$partial ? ($aa = '') : ($aa = 'X');
}
}
else {
$partial ? ($aa = '') : ($aa = 'X');
}
return $aa; } |
_unambiquous_codons | description | prev | next | Top |
sub _unambiquous_codons
{ my ($value) = @_;
my @nts = ();
my @codons = ();
my ($i, $j, $k);
@nts = map { $IUPAC_DNA{uc $_} } split(//, $value);
for my $i (@{$nts[0]}) {
for my $j (@{$nts[1]}) {
for my $k (@{$nts[2]}) {
push @codons, lc "$i$j$k";
}
}
}
return @codons; } |
General documentation
AUTHOR - Darin London, Damian Smedley | Top |