BioMart::Dataset
GenomicSequenceMod
Toolbar
Summary
BioMart::Dataset::GenomicSequenceMod
Package variables
No package variables defined.
Included modules
Inherit
Synopsis
A hidden Dataset containing sequence attributes that can be imported to other
visible Datasets which are compatible with its required data input, based
on the presence of one or more importable-exportable relationships.
Description
Dataset providing Genomic Sequence attributes, which can be imported into
other Datasets. GenomicSequence is itself not a visible Dataset.
Methods
BEGIN | | Code |
__processNewQuery | No description | Code |
_addRow | No description | Code |
_calcSeqOverLocations | No description | Code |
_codingCdnaPeptideSequences | No description | Code |
_continueWithBatch | No description | Code |
_editSequence | No description | Code |
_exonIntronFlankSequences | No description | Code |
_exonSequences | No description | Code |
_getConfigurationTree | No description | Code |
_getLocationFrom | No description | Code |
_getResultTable | No description | Code |
_ignoreRow | No description | Code |
_incrementBatch | No description | Code |
_initializeDNAAdaptor | No description | Code |
_initializeIndices | No description | Code |
_initializeReturnRow | No description | Code |
_modFlanks | No description | Code |
_new | No description | Code |
_processRow | No description | Code |
_processSequence | No description | Code |
_rawSequences | No description | Code |
_rc | No description | Code |
_snpSequences | No description | Code |
_translate | No description | Code |
_translate_ambiguous_codon | No description | Code |
_unambiquous_codons | No description | Code |
_utrSequences | No description | Code |
Methods description
None available.
Methods code
BEGIN { @NAMES = (
'Standard', 'Vertebrate Mitochondrial', 'Yeast Mitochondrial', 'Mold, Protozoan, and CoelenterateMitochondrial and Mycoplasma/Spiroplasma', 'Invertebrate Mitochondrial', 'Ciliate, Dasycladacean and Hexamita Nuclear', '', '',
'Echinoderm Mitochondrial', 'Euplotid Nuclear', '"Bacterial"', 'Alternative Yeast Nuclear', 'Ascidian Mitochondrial', 'Flatworm Mitochondrial', 'Blepharisma Nuclear', 'Chlorophycean Mitochondrial', '', '', '', '',
'Trematode Mitochondrial', 'Scenedesmus obliquus Mitochondrial', 'Thraustochytrium Mitochondrial' );
@TABLES =
qw(
FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSS**VVVVAAAADDEEGGGG
FFLLSSSSYY**CCWWTTTTPPPPHHQQRRRRIIMMTTTTNNKKSSRRVVVVAAAADDEEGGGG
FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSSSVVVVAAAADDEEGGGG
FFLLSSSSYYQQCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
'' ''
FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG
FFLLSSSSYY**CCCWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
FFLLSSSSYY**CC*WLLLSPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSGGVVVVAAAADDEEGGGG
FFLLSSSSYYY*CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG
FFLLSSSSYY*QCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
FFLLSSSSYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
'' '' '' ''
FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNNKSSSSVVVVAAAADDEEGGGG
FFLLSS*SYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
FF*LSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
);
my @nucs = qw(t c a g);
my $x = 0;
($CODONS, $TRCOL) = ({}, {});
for my $i (@nucs) {
for my $j (@nucs) {
for my $k (@nucs) {
my $codon = "$i$j$k";
$CODONS->{$codon} = $x;
$TRCOL->{$x} = $codon;
$x++;
}
}
}
%IUPAC_DNA = ( A => [qw(A)],
C => [qw(C)],
G => [qw(G)],
T => [qw(T)],
U => [qw(U)],
M => [qw(A C)],
R => [qw(A G)],
W => [qw(A T)],
S => [qw(C G)],
Y => [qw(C T)],
K => [qw(G T)],
V => [qw(A C G)],
H => [qw(A C T)],
D => [qw(A G T)],
B => [qw(C G T)],
X => [qw(G A T C)],
N => [qw(G A T C)]
); } |
__processNewQuery | description | prev | next | Top |
sub __processNewQuery
{ my ($self, $query) = @_;
my $attribute = $query->getAllAttributes($self->name)->[0];
my $seq_name = $attribute->name;
if ($seq_name eq 'pkey'){
$attribute = $query->getAllAttributes($self->name)->[-1];
$seq_name = $attribute->name;
}
$self->set('seq_name', $seq_name);
$self->set('translate', ($seq_name =~ m/peptide$/));
my $ignore = IGNORE;
if ($seq_name =~ m/(coding|cdna|peptide)$/) { # this is not actually going to ignore anything, but is # simply used to determine translation table for each gene # without creating a second instance variable $self->set('ignore_row', "type"); $self->set('recipe', '_codingCdnaPeptideSequences');
}
elsif ($seq_name =~ m/(exon_intron|flank)$/) { $self->set('recipe', '_exonIntronFlankSequences'); }
elsif ($seq_name =~ m/raw/){ $self->set('recipe', '_rawSequences'); }
elsif ($seq_name =~ m/(gene_exon|transcript_exon|transcript_intron)$/) { # set the system to ignore rows with duplicate pkeys # for gene_exon $self->set('ignore', $ignore->{$1}); #undef for transcript_exon $self->set('ignore_row', "pkey"); $self->set('recipe', '_exonSequences');
}
elsif ($seq_name =~ m/utr$/) { $self->set('recipe', '_utrSequences'); }
elsif ($seq_name =~ m/snp$/) { $self->set('recipe', '_snpSequences'); }
else {
BioMart::Exception::Configuration->throw("Unsupported sequence name $seq_name recieved by GenomicSequence\n");
}
$self->set('downstream_flank', 0);
$self->set('upstream_flank', 0);
$self->set('importable', undef);
$self->set('lastPkey', undef);
$self->set('importable_indices', undef);
$self->set('returnRow_indices', undef);
$self->set('locations', {});
$self->set('outRow', undef);
$self->set('calc_location', undef);
$self->set('sequence', undef);
my $filters = $query->getAllFilters($self->name);
foreach my $filt (@{$filters}) {
if ($filt->isa("BioMart::Configuration::FilterList")) {
if ($filt->linkName) {
if ($self->get('importable') ) {
BioMart::Exception::Configuration->throw("Recieved two importables, can only work with one\n");
}
else {
$self->set('importable', $filt);
}
}
else {
BioMart::Exception::Configuration->throw("Recieved invalid linkName ".
$filt->linkName."\n");
}
}
else {
unless ($filt->isa("BioMart::Configuration::ValueFilter")) {
BioMart::Exception::Configuration->throw("Recieved unknown filter ".$filt->name." in GenomicSequence Dataset!\n");
}
if ($self->get($filt->name)) {
BioMart::Exception::Configuration->throw("Recieved two ".$filt->name." flanking filters in GenomicSequence Dataset\n");
}
my $table = $filt->getTable;
my $row = $table->nextRow;
my $value = $row->[0];
if ($value) {
$self->set($filt->name, $value);
}
}
}
unless ($self->get('importable')) {
BioMart::Exception::Configuration->throw("No Importable Recieved in GenomicSequence\n");
} } |
sub _addRow
{ my ($self, $atable, $outRow, $sequence) = @_;
push @{$outRow}, $sequence;
$atable->addRow($outRow);
$self->_incrementBatch;
}
} |
sub _calcSeqOverLocations
{ my ($self, $this_location) = @_;
$this_location->{start} || return; $this_location->{end} || return;
my $calc_location = $self->get('calc_location');
if ($calc_location) {
$calc_location->{"start"} = $this_location->{"start"}
if ($this_location->{"start"} < $calc_location->{"start"});
$calc_location->{"end"} = $this_location->{"end"}
if ($this_location->{"end"} > $calc_location->{"end"});
}
else {
$calc_location = {};
foreach my $key (keys %{$this_location}) {
$calc_location->{$key} = $this_location->{$key};
}
}
$self->set('calc_location', $calc_location); } |
sub _codingCdnaPeptideSequences
{ my ($self, $atable, $curRow) = @_;
my $importable_indices = $self->get('importable_indices');
my $pkey = $curRow ?
($curRow->[$importable_indices->{"pkey"}] || 'DUMMY') : undef;
my $lastPkey = $self->get('lastPkey') || $pkey;
my $locations = $self->get('locations');
my $outRow = $self->get('outRow');
if( ( ! defined $pkey ) or ( $pkey ne $lastPkey ) ){
my $sequence;
if( grep{ $locations->{$_}->{"start"} } keys %$locations ) {
$sequence = $self->_processSequence($locations);
$sequence = $self->_translate($sequence)
if ($self->get('translate'));
$self->_editSequence(\$sequence);
}
if ($sequence) {
$self->_addRow($atable, $outRow, $sequence);
}
else {
$self->_addRow($atable, $outRow, "Sequence unavailable");
}
$locations = {};
$outRow = undef;
}
if ($curRow) {
my $rank = $curRow->[ $importable_indices->{"rank"} ];
my $location = $self->_getLocationFrom($curRow, "chr", "start", "end",
"strand", "phase");
$location = $self->_modFlanks($location, 0);
$locations->{$rank} = $location if ($location->{"start"});
}
$outRow ||= $self->_initializeReturnRow($curRow);
$self->set('locations', $locations);
$self->set('lastPkey', $pkey);
$self->set('outRow', $outRow); } |
sub _continueWithBatch
{ my ($self, $batchSize, $rtable) = @_;
my $continue = ($rtable->isa("BioMart::ResultTable"))
? $rtable->inCurrentBatch()
: $rtable->hasMoreRows;
if ($continue && $batchSize) {
my $batchIndex = $self->get('batchIndex');
$continue = ($batchIndex < $batchSize);
}
return $continue; } |
sub _editSequence
{ my ($self, $seqref) = @_;
my $seq_edits = $self->get('seq_edits');
if ($$seqref && $seq_edits) {
foreach my $seq_edit (split /\;/, $seq_edits) {
my ($start, $end, $alt_seq) = split /\,/, $seq_edit;
my $len = $end - $start + 1;
substr($$seqref, $start - 1, $len) = $alt_seq;
}
} } |
sub _exonIntronFlankSequences
{ my ($self, $atable, $curRow) = @_;
my $rank = 1;
my $importable_indices = $self->get('importable_indices');
my $pkey = $curRow ?
($curRow->[$importable_indices->{"pkey"}] || 'DUMMY') : undef;
my $lastPkey = $self->get('lastPkey') || $pkey;
my $outRow = $self->get('outRow');
if( ( ! defined $pkey ) or ( $pkey ne $lastPkey ) ){
my $shift = ($self->get('seq_name') =~ m/flank/); my $location = $self->_modFlanks( $self->get('calc_location'),
$shift );
$self->set('calc_location', undef);
my $sequence;
if ($location->{"start"}) {
my $locations = { $rank => $location };
$sequence = $self->_processSequence($locations);
$self->_editSequence(\$sequence);
}
if ($sequence) {
$self->_addRow($atable, $outRow, $sequence);
}
else {
$self->_addRow($atable, $outRow, "Sequence unavailable");
}
$outRow = undef;
}
if ($curRow) {
my $location = $self->_getLocationFrom($curRow, "chr", "start",
"end", "strand");
$self->_calcSeqOverLocations( $location );
}
$outRow ||= $self->_initializeReturnRow($curRow);
$self->set('lastPkey', $pkey);
$self->set('outRow', $outRow); } |
sub _exonSequences
{ my ($self, $atable, $curRow) = @_;
$curRow || return;
return if ($self->_ignoreRow($curRow)); my $rank = 1;
my $locations = {};
$locations->{$rank} = $self->_modFlanks( $self->_getLocationFrom($curRow,
"chr", "start", "end", "strand"), 0 );
my $sequence;
if ($locations->{1}->{"start"}) {
$sequence = $self->_processSequence($locations);
$self->_editSequence(\$sequence);
}
if ($sequence) {
$self->_addRow($atable, $self->_initializeReturnRow($curRow),
$sequence);
}
else {
$self->_addRow($atable, $self->_initializeReturnRow($curRow),
"Sequence unavailable");
}
if ($self->get('ignore')) {
my $ignore = $self->get('ignore');
my $ignore_row = $self->get('ignore_row');
my $ref = $self->_getLocationFrom($curRow, $ignore_row);
$ignore->{ $ref->{ $ignore_row } } = 1; $self->set('ignore', $ignore);
}
} |
sub _getConfigurationTree
{ my ($self,$interface,$dsCounter)=@_;;
return $self->getParam('configurator')->getConfigurationTree(
$self->virtualSchema,
$self->name,
$interface,
$dsCounter); } |
sub _getLocationFrom
{ my ($self, $curRow, @expectedFields) = @_;
my $importable_indices = $self->get('importable_indices');
my $location = {};
foreach my $expectedField (@expectedFields) {
$location->{$expectedField} =
( exists( $importable_indices->{$expectedField} ) ) ?
$curRow->[ $importable_indices->{$expectedField} ] : undef;
}
return $location; } |
sub _getResultTable
{ my ($self, @param) = @_;
$self->set('batchIndex', 0);
local($^W) = 0; my(%param) = @param;
my $query = $param{'query'};
my $atable = $param{'table'};
my $batch_size = $param{'batch_size'};
if ($self->serverType eq "web"){
my $batch_start = $param{'batch_start'} || 0;
my $location = $self->getParam('configurator')->get('location');
my $xml = $query->toXML($batch_start,$batch_size,0);
foreach my $el($location->getResultSet("","POST",$xml)){
if ($el =~ /No Sequence Returned/) {
$self->_setExhausted(1);
last;
}
my @clean=split(/\t/,$el);
$atable->addRow([@clean]);
}
return $atable;
} else {
$self->_initializeDNAAdaptor($query->
getInterfaceForDataset($self->name));
}
my $importable = $self->get('importable');
my $rtable = $importable->getTable();
my $attribute_count = @{$query->getAllAttributes};
if ($rtable->hashedResults || $attribute_count > 1){
$self->set('attribute_merge_required','1');
}
my $has_rows = $rtable->hasMoreRows;
while ($has_rows && $self->_continueWithBatch($batch_size, $rtable)) {
$self->_processRow( $atable, $rtable->nextRow);
}
unless ($has_rows) {
$self->_setExhausted(1);
$self->_processRow($atable);
}
$importable->setTable($rtable);
$self->set('importable', $importable);
$self->get('dna')->close;
return $atable;
}
} |
sub _ignoreRow
{ my ($self, $curRow) = @_;
my $ignore = $self->get('ignore');
return 0 unless ($ignore);
my $ignore_row = $self->get('ignore_row');
my $test = $self->_getLocationFrom($curRow, $ignore_row);
return $test->{ $ignore_row } && $ignore->{ $test->{ $ignore_row } }; } |
sub _incrementBatch
{ my $self = shift;
my $batchIndex = $self->get('batchIndex');
$batchIndex++;
$self->set('batchIndex', $batchIndex); } |
sub _initializeDNAAdaptor
{ my ($self,$interface) = @_;
my $dna_params = $self->getConfigurationTree($interface)->
optionalParameters;
unless ($dna_params) {
BioMart::Exception::Configuration->throw("GenomicSequence Dataset requires optional_parameters to be set in the DatasetConfig\n");
}
my ($dnatablename, $chunk_name_fieldname, $chunk_start_fieldname,
$seqfieldname,$chunk_size) = split /\,/, $dna_params;
my $dna = BioMart::Dataset::GenomicSequence::DNAAdaptor->new(
'seq_name' => $self->name,
'dna_tablename' => $dnatablename,
'seq_fieldname' => $seqfieldname,
'chunk_name_fieldname' => $chunk_name_fieldname,
'chunk_start_fieldname' => $chunk_start_fieldname,
'chunk_size' => $chunk_size,
'configurator' => $self->getParam('configurator'),
);
unless ($dna) {
BioMart::Exception::Configuration->throw("Couldnt connect to DNAAdaptor\n");
}
$self->set('dna', $dna); } |
sub _initializeIndices
{ my ($self, $numFields) = @_;
my $returnRow_indices = {};
my $importable_indices = {};
my $filts = $self->get('importable')->getAllFilters;
my $index = 0;
foreach my $filt (@{$filts}) {
$importable_indices->{$filt->name} = $index;
$index++;
}
my $resultIndex = 0;
while ($index < $numFields) {
$returnRow_indices->{$index} = $resultIndex;
$index++;
$resultIndex++;
}
$self->set('importable_indices', $importable_indices);
$self->set('returnRow_indices', $returnRow_indices); } |
sub _initializeReturnRow
{ my ($self, $curRow) = @_;
return $self->get('attribute_merge_required') ? $curRow : []; } |
sub _modFlanks
{ my ($self, $location, $shift) = @_;
$location->{start} || return $location; $location->{end} || return $location;
if ($shift) {
if ($self->get('upstream_flank')) {
if ($location->{"strand"} < 0) {
$location->{"start"} = $location->{"end"} + 1;
$location->{"end"} += $self->get('upstream_flank');
}
else {
$location->{"end"} = $location->{"start"} - 1;
$location->{"start"} -= $self->get('upstream_flank');
}
}
elsif ($self->get('downstream_flank')) {
if ($location->{"strand"} < 0) {
$location->{"end"} = $location->{"start"} - 1;
$location->{"start"} -= $self->get('downstream_flank');
}
else {
$location->{"start"} = $location->{"end"} + 1;
$location->{"end"} += $self->get('downstream_flank');
}
}
else {
BioMart::Exception::Configuration->throw("Requests for flank sequence must be accompanied by an upstream_flank or downstream_flank request\n");
}
}
else {
if ($location->{"strand"} < 0) {
$location->{"start"} -= $self->get('downstream_flank');
$location->{"end"} += $self->get('upstream_flank');
} else {
$location->{"start"} -= $self->get('upstream_flank');
$location->{"end"} += $self->get('downstream_flank');
}
}
$location->{"start"} = 1 if ($location->{"start"} < 1);
return $location; } |
sub _new
{ my ($self, @param) = @_;
$self->SUPER::_new(@param);
$self->attr('dna', undef);
$self->attr('dnaparams', undef);
$self->attr('recipe', undef); $self->attr('ignore', undef);
$self->attr('ignore_row', undef);
$self->attr('seq_edits', undef);
$self->attr('codon_table_id', 1); $self->attr('seq_name', undef); $self->attr('translate', 0); $self->attr('downstream_flank', 0);
$self->attr('upstream_flank', 0);
$self->attr('importable', undef);
$self->attr('lastPkey', undef);
$self->attr('importable_indices', undef); $self->attr('returnRow_indices', undef); $self->attr('returnRow', undef);
$self->attr('batchIndex', 0);
$self->attr('locations', {}); $self->attr('outRow', undef);
$self->attr('calc_location', undef);
$self->attr('sequence', undef);
$self->attr('attribute_merge_required', 0);
}
} |
sub _processRow
{ my ($self, $atable, $curRow) = @_;
unless ($self->get('importable_indices')) {
if ($self->get('exhausted')) {
$atable->addRow(["No Sequence Returned"]);
}
else {
my $numFields = @{$curRow};
$self->_initializeIndices($numFields);
}
}
my $method = $self->get('recipe');
$self->$method($atable, $curRow); } |
sub _processSequence
{ my ($self, $locations) = @_;
my $seq = '';
my $temp_Seq = '';
my $first_coding_exon_flag = 0;
my $dna = $self->get('dna');
foreach my $rank (sort { $a <=> $b } keys %{$locations}) {
my $location = $locations->{$rank}; my $chr = $location->{'chr'}; my $start = $location->{'start'}; my $end = $location->{'end'}; my $strand = exists( $location->{'strand'}) ? $location->{'strand'} : 1; my $phase = $location->{'phase'} || 0;
if ($first_coding_exon_flag == 0) {
if ($strand < 0) {
$temp_Seq = $self->_rc( $dna->
getSequence( $chr, $start, $end ) );
}
else {
$temp_Seq = $dna->getSequence( $chr, $start, $end );
}
if($temp_Seq) { if ($phase > 0) { $seq = 'N'x$phase;
}
$seq .= $temp_Seq;
$first_coding_exon_flag = 1;
}
}
else {
if ($strand < 0) {
$seq .= $self->_rc( $dna->getSequence( $chr, $start, $end ) );
}
else {
$seq .= $dna->getSequence( $chr, $start, $end );
}
}
}
if (length($seq)) {
return $seq;
}
return undef } |
sub _rawSequences
{ my ($self, $atable, $curRow) = @_;
my $rank = 1;
if ($curRow) {
my $importable_indices = $self->get('importable_indices');
my $locations = {};
my $location = $self->_getLocationFrom($curRow, "chr", "start", "end");
$location->{"strand"} = ( exists( $importable_indices->{"strand"} ) ) ?
$curRow->[ $importable_indices->{"strand"} ] : 1;
$locations->{$rank} = $location if ($location->{"start"});
my $sequence = $self->_processSequence($locations);
$self->_editSequence(\$sequence);
if ($sequence) {
$self->_addRow($atable, $self->_initializeReturnRow($curRow),
$sequence);
}
} } |
sub _rc
{ my ($self, $seq) = @_;
$seq = reverse($seq);
$seq =~ tr/YABCDGHKMRSTUVyabcdghkmrstuv/RTVGHCDMKYSAABrtvghcdmkysaab/;
return $seq; } |
sub _snpSequences
{ my ($self, $atable, $curRow) = @_;
my $rank = 1;
if ($curRow) {
my $location = $self->_getLocationFrom($curRow, "chr", "pos",
"strand", "allele");
if ($location->{"strand"} < 0) {
$location->{"start"} = $location->{"pos"} -
$self->get('downstream_flank');
$location->{"start"} = 1
if ($location->{"start"} < 1);
$location->{"end"} = $location->{"pos"} +
$self->get('upstream_flank');
$location->{"off"} = $self->get('upstream_flank');
}
else {
$location->{"start"} = $location->{"pos"} -
$self->get('upstream_flank');
$location->{"end"} = $location->{"pos"} +
$self->get('downstream_flank');
$location->{"off"} = $self->get('upstream_flank');
if ($location->{"start"} < 1) {
$location->{"off"} = $self->get('upstream_flank') +
$location->{"start"} - 1;
$location->{"start"} = 1;
}
}
my $locations = {};
$locations->{$rank} = $location;
my $sequence = $self->_processSequence($locations);
$self->_editSequence(\$sequence);
if ($sequence) {
substr($sequence, $location->{"off"}, 1) = "\n".
$location->{"allele"}."\n";
$self->_addRow($atable, $self->_initializeReturnRow($curRow),
$sequence);
}
}
}
1; } |
sub _translate
{ my ($self, $seq) = @_;
BioMart::Exception::Configuration->throw("Calling translate without a seq argument!")
unless defined $seq;
return '' unless $seq;
my $id = $self->get('codon_table_id') || DEFAULTCODONTABLEID;
my ($partial) = 0;
$partial = 2 if length($seq) % 3 == 2;
$seq = lc $seq;
$seq =~ tr/u/t/;
my $protein = "";
if ($seq =~ /[^actg]/ ) { for (my $i = 0; $i < (length($seq) - 2 ); $i+=3) {
my $triplet = substr($seq, $i, 3);
if (exists $CODONS->{$triplet}) {
$protein .= substr($TABLES[$id-1],
$CODONS->{$triplet},1);
}
else {
$protein .= $self->_translate_ambiguous_codon($triplet);
}
}
}
else { for (my $i = 0; $i < (length($seq) - 2 ); $i+=3) {
my $triplet = substr($seq, $i, 3);
if (exists $CODONS->{$triplet}) {
$protein .= substr($TABLES[$id-1], $CODONS->{$triplet}, 1);
}
else {
$protein .= 'X';
}
}
}
if ($partial == 2) { my $triplet = substr($seq, ($partial -4)). "n";
if (exists $CODONS->{$triplet}) {
my $aa = substr($TABLES[$id-1], $CODONS->{$triplet},1);
$protein .= $aa;
} else {
$protein .= $self->_translate_ambiguous_codon($triplet, $partial);
}
}
return $protein; } |
sub _translate_ambiguous_codon
{ my ($self, $triplet, $partial) = @_;
$partial ||= 0;
my $id = $self->get('codon_table_id') || DEFAULTCODONTABLEID;
my $aa;
my @codons = _unambiquous_codons($triplet);
my %aas =();
foreach my $codon (@codons) {
$aas{substr($TABLES[$id-1],$CODONS->{$codon},1)} = 1;
}
my $count = scalar keys %aas;
if ( $count == 1 ) {
$aa = (keys %aas)[0];
}
elsif ( $count == 2 ) {
if ($aas{'D'} and $aas{'N'}) {
$aa = 'B';
}
elsif ($aas{'E'} and $aas{'Q'}) {
$aa = 'Z';
}
else {
$partial ? ($aa = '') : ($aa = 'X');
}
}
else {
$partial ? ($aa = '') : ($aa = 'X');
}
return $aa; } |
sub _unambiquous_codons
{ my ($value) = @_;
my @nts = ();
my @codons = ();
my ($i, $j, $k);
@nts = map { $IUPAC_DNA{uc $_} } split(//, $value);
for my $i (@{$nts[0]}) {
for my $j (@{$nts[1]}) {
for my $k (@{$nts[2]}) {
push @codons, lc "$i$j$k";
}
}
}
return @codons; } |
_utrSequences | description | prev | next | Top |
sub _utrSequences
{ my ($self, $atable, $curRow) = @_;
my $locations = $self->get('locations');
my $outRow = $self->get('outRow');
if ($curRow) {
my $importable_indices = $self->get('importable_indices');
my $pkey = $curRow->[ $importable_indices->{"pkey"} ];
my $lastPkey = $self->get('lastPkey');
if ( $lastPkey && ($pkey ne $lastPkey) ) {
my $hasUtr = keys %{$locations};
if ($hasUtr) {
my @ranks = sort { $a <=> $b } keys %{$locations};
my $low_rank = shift @ranks;
my $hi_rank = ($hasUtr > 1) ? pop @ranks : $low_rank;
my $calc_location = $self->get('calc_location');
my $low_loc_strand = $locations->{$low_rank}->{"strand"};
if ($self->get('upstream_flank')) {
if ($low_loc_strand < 0) {
$calc_location->{"start"} =
$calc_location->{"end"} + 1;
$calc_location->{"end"} =
$calc_location->{"start"} +
$self->get('upstream_flank') - 1;
}
else {
$calc_location->{"end"} =
$calc_location->{"start"} - 1;
$calc_location->{"start"} =
$calc_location->{"start"} -
$self->get('upstream_flank') + 1;
$calc_location->{"start"} = 1
if ($calc_location->{"start"} < 1);
}
$locations->{$low_rank - 1} = $calc_location;
}
elsif ($self->get('downstream_flank')) {
if ($low_loc_strand < 0) {
$calc_location->{"end"} =
$calc_location->{"start"} - 1;
$calc_location->{"start"} =
$calc_location->{"end"} -
$self->get('downstream_flank') + 1;
}
else {
$calc_location->{"start"} =
$calc_location->{"end"} + 1;
$calc_location->{"end"} =
$calc_location->{"start"} +
$self->get('downstream_flank') - 1;
}
$locations->{$hi_rank + 1} = $calc_location;
}
my $sequence = $self->_processSequence($locations);
$self->_editSequence(\$sequence);
if ($sequence) {
$locations = {};
$hasUtr = 0;
$self->_addRow($atable, $outRow, $sequence);
$outRow = undef;
$self->set('calc_location', undef);
}
}
else {
$self->_addRow($atable, $outRow,
"No UTR is annotated for this transcript");
$outRow = undef;
}
}
unless ($outRow) {
$outRow = $self->_initializeReturnRow($curRow);
}
my $rank = $curRow->[ $importable_indices->{"rank"} ];
my $location = $self->_getLocationFrom($curRow, "chr", "start",
"end", "strand");
if ($location->{"start"}) {
$locations->{$rank} = $location;
$self->_calcSeqOverLocations($location);
}
$self->set('lastPkey', $pkey);
}
else {
my $hasUtr = keys %{$locations};
if ($hasUtr) {
my @ranks = sort { $a <=> $b } keys %{$locations};
my $low_rank = shift @ranks;
my $hi_rank = ($hasUtr > 1) ? pop @ranks : $low_rank;
my $calc_location = $self->get('calc_location');
my $low_loc_strand = $locations->{$low_rank}->{"strand"};
if ($self->get('upstream_flank')) {
if ($low_loc_strand < 0) {
$calc_location->{"start"} = $calc_location->{"end"} + 1;
$calc_location->{"end"} =
$calc_location->{"start"} +
$self->get('upstream_flank') - 1;
}
else {
$calc_location->{"end"} = $calc_location->{"start"} - 1;
$calc_location->{"start"} =
$calc_location->{"start"} -
$self->get('upstream_flank') + 1;
$calc_location->{"start"} = 1
if ($calc_location->{"start"} < 1);
}
$locations->{$low_rank - 1} = $calc_location;
}
elsif ($self->get('downstream_flank')) {
if ($low_loc_strand < 0) {
$calc_location->{"end"} = $calc_location->{"start"} - 1;
$calc_location->{"start"} = $calc_location->{"end"} -
$self->get('downstream_flank') + 1;
}
else {
$calc_location->{"start"} = $calc_location->{"end"} + 1;
$calc_location->{"end"} = $calc_location->{"start"} +
$self->get('downstream_flank') - 1;
}
$locations->{$hi_rank + 1} = $calc_location;
}
my $sequence = $self->_processSequence($locations);
$self->_editSequence(\$sequence);
if ($sequence) {
$locations = {};
$hasUtr = 0;
$self->_addRow($atable, $outRow, $sequence);
$outRow = undef;
$self->set('calc_location', undef);
}
}
else {
$self->_addRow($atable, $outRow,
"No UTR is annotated for this transcript");
$outRow = undef;
}
}
$self->set('locations', $locations);
$self->set('outRow', $outRow); } |
General documentation
AUTHOR - Darin London, Damian Smedley | Top |
The peptide translation algorithm is taken directly
from the CodonTable module that is part of the
BioPerl project. For more information about the
BioPerl project, visit:
http://www.bioperl.org