BioMart::Formatter
FASTAH
Toolbar
Summary
BioMart::Formatter::FASTAH
Package variables
No package variables defined.
Inherit
Synopsis
FASTA formatter specific for Homologues
The FASTA Formatter returns whitespace separated tabular data
for a BioMart query's ResultTable
This is a FASTA formatter for Compara_homology which is very specific
for the compara_mart project
Description
When given a BioMart::ResultTable containing the results of
a BioMart::Query the FASTA Formatter will return tabular output
with one line for each row of data in the ResultTable and single spaces
separating the individual entries in each row. The getDisplayNames
and getFooterText can be used to return appropiately formatted
headers and footers respectively
Methods
BEGIN | | Code |
_new | No description | Code |
_translate_ambiguous_codon | No description | Code |
_unambiquous_codons | No description | Code |
apply_edit | No description | Code |
formatseq | No description | Code |
formatseqPep | No description | Code |
getDisplayNames | No description | Code |
isSpecial | No description | Code |
nextRow | No description | Code |
processQuery | No description | Code |
rc | No description | Code |
translate | No description | Code |
Methods description
None available.
Methods code
BEGIN { use constant CODONSIZE => 3;
$CODONGAP = $GAP x CODONSIZE;
@NAMES = (
'Standard', 'Vertebrate Mitochondrial', 'Yeast Mitochondrial', 'Mold, Protozoan, and CoelenterateMitochondrial and Mycoplasma/Spiroplasma', 'Invertebrate Mitochondrial', 'Ciliate, Dasycladacean and Hexamita Nuclear', '', '',
'Echinoderm Mitochondrial', 'Euplotid Nuclear', '"Bacterial"', 'Alternative Yeast Nuclear', 'Ascidian Mitochondrial', 'Flatworm Mitochondrial', 'Blepharisma Nuclear', 'Chlorophycean Mitochondrial', '', '', '', '',
'Trematode Mitochondrial', 'Scenedesmus obliquus Mitochondrial', 'Thraustochytrium Mitochondrial' );
@TABLES =
qw(
FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSS**VVVVAAAADDEEGGGG
FFLLSSSSYY**CCWWTTTTPPPPHHQQRRRRIIMMTTTTNNKKSSRRVVVVAAAADDEEGGGG
FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSSSVVVVAAAADDEEGGGG
FFLLSSSSYYQQCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
'' ''
FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG
FFLLSSSSYY**CCCWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
FFLLSSSSYY**CC*WLLLSPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSGGVVVVAAAADDEEGGGG
FFLLSSSSYYY*CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG
FFLLSSSSYY*QCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
FFLLSSSSYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
'' '' '' ''
FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNNKSSSSVVVVAAAADDEEGGGG
FFLLSS*SYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
FF*LSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG
);
my @nucs = qw(t c a g);
my $x = 0;
($CODONS, $TRCOL) = ({}, {});
for my $i (@nucs) {
for my $j (@nucs) {
for my $k (@nucs) {
my $codon = "$i$j$k";
$CODONS->{$codon} = $x;
$TRCOL->{$x} = $codon;
$x++;
}
}
}
%IUPAC_DNA = ( A => [qw(A)],
C => [qw(C)],
G => [qw(G)],
T => [qw(T)],
U => [qw(U)],
M => [qw(A C)],
R => [qw(A G)],
W => [qw(A T)],
S => [qw(C G)],
Y => [qw(C T)],
K => [qw(G T)],
V => [qw(A C G)],
H => [qw(A C T)],
D => [qw(A G T)],
B => [qw(C G T)],
X => [qw(G A T C)],
N => [qw(G A T C)]
);
} |
sub _new
{ my ($self) = @_;
$self->SUPER::_new(); } |
sub _translate_ambiguous_codon
{ my ($triplet, $id, $partial) = @_;
$partial ||= 0;
my $aa;
my @codons = _unambiquous_codons($triplet);
my %aas =();
foreach my $codon (@codons) {
$aas{substr($TABLES[$id-1],$CODONS->{$codon},1)} = 1;
}
my $count = scalar keys %aas;
if ( $count == 1 ) {
$aa = (keys %aas)[0];
}
elsif ( $count == 2 ) {
if ($aas{'D'} and $aas{'N'}) {
$aa = 'B';
}
elsif ($aas{'E'} and $aas{'Q'}) {
$aa = 'Z';
} else {
$partial ? ($aa = '') : ($aa = 'X');
}
} else {
$partial ? ($aa = '') : ($aa = 'X');
}
return $aa;
}
} |
sub _unambiquous_codons
{ my ($value) = @_;
my @nts = ();
my @codons = ();
my ($i, $j, $k);
@nts = map { $IUPAC_DNA{uc $_} } split(//, $value);
for my $i (@{$nts[0]}) {
for my $j (@{$nts[1]}) {
for my $k (@{$nts[2]}) {
push @codons, lc "$i$j$k";
}
}
}
return @codons;
}
1; } |
sub apply_edit
{ my $seqeds = shift;
my $seqref = shift;
my @seqedits = split (/:/,$seqeds);
if(!defined($seqedits[0]) || !defined($seqedits[1])) {
return $seqref;
}
my $len = $seqedits[1] - $seqedits[0] + 1;
substr($$seqref, $seqedits[0] - 1, $len) = $seqedits[2];
return $seqref;
}
} |
sub formatseq
{ my ($row)= shift;
my ($finalseq1, $finalseq2, $header_atts1, $header_atts2,
$strand1, $strand2, $tag1, $tag2) ;
my @atts1 = ();
my @atts2 = ();
my @coorarray = ();
my @coorarray1;
my @coorarray2;
my $seq1 = shift @$row;
my $seq2 = shift @$row;
my $i = 0 ;
my $k = 0;
my $j = 0;
foreach my $attribute (@$row){
if (!$attribute){$attribute = "NULL"}
if ($attribute eq "tagHeader"){
push (@atts1, $row->[$i+1]);
push (@atts2, $row->[$i+2]);
}
$i++;
if ($attribute =~ /^(tagExonseq|tagCdna|tagCodingseq|tagUnsp|tag3utr|tag5utr)/){
$j++;
if ($j == 1){
@coorarray1 = split(/\|/, $attribute);
$strand1 = $row->[$k+2];
}
if ($j == 2){
@coorarray2 = split(/\|/, $attribute);
$tag1 = shift (@coorarray1); $tag2 = shift (@coorarray2);
$strand2 = $row->[$k+2];
}
}
$k++;
}
foreach my $coors1 (@coorarray1){
if ($coors1 eq "No UTR"){
$finalseq1 = "No UTR";
}else{
my @coor1 = split(/:/,$coors1);
$finalseq1 .= substr ($seq1, $coor1[0], $coor1[1]);
}
}
foreach my $coors2 (@coorarray2){
if ($coors2 =~ m/No UTR/gi){ $finalseq2 = "No UTR"; }else{
my @coor2 = split(/:/,$coors2);
$finalseq2 .= substr ($seq2, $coor2[0], $coor2[1]);
}
}
if (($strand1 < 0)&&($finalseq1 ne "No UTR")) {$finalseq1 = &rc($finalseq1);}
if (($strand2 < 0)&&($finalseq2 ne "No UTR")) {$finalseq2 = &rc($finalseq2);}
$finalseq1 =~ s/(\w{60})/$1\n/g;
$finalseq2 =~ s/(\w{60})/$1\n/g;
$header_atts1 = join ("|", @atts1);
$header_atts2 = join ("|", @atts2);
return ">" . $header_atts1 . "\n"
. $finalseq1 ."\n"
. ">" . $header_atts2 . "\n"
. $finalseq2 ."\n"
. "#\n";
}
} |
sub formatseqPep
{ my ($row)= shift;
my ($finalseq1, $finalseq2, $header_atts1, $header_atts2,
$strand1, $strand2, $tag1, $tag2, $chro1, $chro2,
$seqedits1, $seqedits2) ;
my @atts1 = ();
my @atts2 = ();
my @coorarray = ();
my @coorarray1;
my @coorarray2;
my $seq1 = shift @$row;
my $seq2 = shift @$row;
my $i = 0 ;
my $k = 0;
my $j = 0;
foreach my $attribute (@$row){
if (!$attribute){$attribute = "NULL"}
if ($attribute eq "tagHeader"){
push (@atts1, $row->[$i+1]);
push (@atts2, $row->[$i+2]);
}
$i++;
if ($attribute =~ /^(tagPeptide)/){
$j++;
if ($j == 1){
@coorarray1 = split(/\|/, $attribute);
$strand1 = $row->[$k+2];
$seqedits1 = $row->[$k+4];
$chro1 = $row->[$k+6];
}
if ($j == 2){
@coorarray2 = split(/\|/, $attribute);
$tag1 = shift (@coorarray1); $tag2 = shift (@coorarray2);
$strand2 = $row->[$k+2];
$seqedits2 = $row->[$k+4];
$chro2 = $row->[$k+6];
}
}
$k++;
}
foreach my $coors1 (@coorarray1){
my @coor1 = split(/:/,$coors1);
$finalseq1 .= substr ($seq1, $coor1[0], $coor1[1]);
}
foreach my $coors2 (@coorarray2){
my @coor2 = split(/:/,$coors2);
$finalseq2 .= substr ($seq2, $coor2[0], $coor2[1]);
}
if ($strand1 < 0) {$finalseq1 = &rc($finalseq1);}
if ($strand2 < 0) {$finalseq2 = &rc($finalseq2);}
$finalseq1 = &translate($finalseq1,$chro1);
$finalseq2 = &translate($finalseq2,$chro2);
if ($finalseq1 =~ /\*$/){ chop $finalseq1 }
if ($finalseq2 =~ /\*$/){ chop $finalseq2 }
if ($seqedits1 ne 'NULL'){
my @seqed = split (/\|/,$seqedits1);
shift @seqed; foreach my $se (@seqed) {
&apply_edit($se,\$ finalseq1);
}
}
if ($seqedits2 ne 'NULL'){
my @seqed = split (/\|/,$seqedits2);
shift @seqed; foreach my $se (@seqed) {
&apply_edit($se,\$ finalseq2);
}
}
$finalseq1 =~ s/(\w{60})/$1\n/g;
$finalseq2 =~ s/(\w{60})/$1\n/g;
$header_atts1 = join ("|", @atts1);
$header_atts2 = join ("|", @atts2);
return ">" . $header_atts1 . "\n"
. $finalseq1 ."\n"
. ">" . $header_atts2 . "\n"
. $finalseq2 ."\n"
. "#\n";
}
} |
sub getDisplayNames
{ my $self = shift;
return ''; } |
sub isSpecial
{ return 1;
}
} |
sub nextRow
{ my $self = shift;
my $rtable = $self->get('result_table');
my $row = $rtable->nextRow;
if (!$row){
return;
}
my ($tag1, $tag2);
my (@coorarray1, @coorarray2);
my $k = 0;
my $j = 0;
foreach my $attribute (@$row){
if (not defined $attribute){$attribute = "NULL"}
if ($attribute =~ m/tagExonseq|tagCdna|tagCodingseq|tagUnsp|tag3utr|tag5utr|tagPeptide/){ $j++; if ($j == 1){
@coorarray1 = split(/\|/, $attribute);
$tag1 = shift (@coorarray1); }
if ($j == 2){
@coorarray2 = split(/\|/, $attribute);
$tag2 = shift (@coorarray2);
}
}
$k++;
}
if ($tag1 ne $tag2){return "Tag are not the same:".$tag1." - ".$tag2."\n\n";}
if ($tag1 =~ m/tagExonseq|tagCdna|tagCodingseq|tagUnsp|tag3utr|tag5utr/){
&formatseq($row);
}elsif ($tag1 =~ m/tagPeptide/){ &formatseqPep($row);
}else {
return "tags not supported by the formater FASTAH.pm tags(tag1:$tag1 tag2:$tag2)\n";
}
}
} |
sub processQuery
{ my ($self, $query) = @_;
$self->set('original_attributes',[@{$query->getAllAttributes()}])
if ($query->getAllAttributes());
$self->set('query',$query);
return $query; } |
sub rc
{ my ($seq) = @_;
$seq = reverse($seq);
$seq =~ tr/YABCDGHKMRSTUVyabcdghkmrstuv/RTVGHCDMKYSAABrtvghcdmkysaab/;
return $seq;
}
} |
sub translate
{ my ($seq, $chro) = @_;
return '' unless $seq;
my $id; if ($chro =~ /MT/g){$id = '2'}else{$id = '1'}
my ($partial) = 0;
$partial = 2 if length($seq) % CODONSIZE == 2;
$seq = lc $seq;
$seq =~ tr/u/t/;
my $protein = "";
if ($seq =~ /[^actg]/ ) { for (my $i = 0; $i < (length($seq) - (CODONSIZE-1)); $i+= CODONSIZE) {
my $triplet = substr($seq, $i, CODONSIZE);
if (exists $CODONS->{$triplet}) {
$protein .= substr($TABLES[$id-1],
$CODONS->{$triplet},1);
} else {
$protein .= _translate_ambiguous_codon($triplet, $id);
}
}
} else { for (my $i = 0; $i < (length($seq) - (CODONSIZE -1)); $i+=CODONSIZE) {
my $triplet = substr($seq, $i, CODONSIZE);
if (exists $CODONS->{$triplet}) {
$protein .= substr($TABLES[$id-1], $CODONS->{$triplet}, 1);
} else {
$protein .= 'X';
}
}
}
if ($partial == 2) { my $triplet = substr($seq, ($partial -4)). "n";
if (exists $CODONS->{$triplet}) {
my $aa = substr($TABLES[$id-1], $CODONS->{$triplet},1);
$protein .= $aa;
} else {
$protein .= _translate_ambiguous_codon($triplet, $id, $partial);
}
}
return $protein;
}
} |
General documentation