BioMart::Formatter FASTAH
SummaryIncluded librariesPackage variablesSynopsisDescriptionGeneral documentationMethods
Toolbar
WebCvsRaw content
Summary
BioMart::Formatter::FASTAH
Package variables
No package variables defined.
Inherit
BioMart::FormatterI
Synopsis
FASTA formatter specific for Homologues
The FASTA Formatter returns whitespace separated tabular data
for a BioMart query's ResultTable
This is a FASTA formatter for Compara_homology which is very specific
for the compara_mart project
Description
When given a BioMart::ResultTable containing the results of
a BioMart::Query the FASTA Formatter will return tabular output
with one line for each row of data in the ResultTable and single spaces
separating the individual entries in each row. The getDisplayNames
and getFooterText can be used to return appropiately formatted
headers and footers respectively
Methods
BEGIN Code
_new
No description
Code
_translate_ambiguous_codon
No description
Code
_unambiquous_codons
No description
Code
apply_edit
No description
Code
formatseq
No description
Code
formatseqPep
No description
Code
getDisplayNames
No description
Code
isSpecial
No description
Code
nextRow
No description
Code
processQuery
No description
Code
rc
No description
Code
translate
No description
Code
Methods description
None available.
Methods code
BEGINTop
BEGIN {
     use constant CODONSIZE => 3;
    #$GAP = '-';
$CODONGAP = $GAP x CODONSIZE; @NAMES = #id
( 'Standard', #1
'Vertebrate Mitochondrial',#2
'Yeast Mitochondrial',# 3
'Mold, Protozoan, and CoelenterateMitochondrial and Mycoplasma/Spiroplasma',#4
'Invertebrate Mitochondrial',#5
'Ciliate, Dasycladacean and Hexamita Nuclear',# 6
'', '', 'Echinoderm Mitochondrial',#9
'Euplotid Nuclear',#10
'"Bacterial"',# 11
'Alternative Yeast Nuclear',# 12
'Ascidian Mitochondrial',# 13
'Flatworm Mitochondrial',# 14
'Blepharisma Nuclear',# 15
'Chlorophycean Mitochondrial',# 16
'', '', '', '', 'Trematode Mitochondrial',# 21
'Scenedesmus obliquus Mitochondrial', #22
'Thraustochytrium Mitochondrial' #23
); @TABLES = qw( FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSS**VVVVAAAADDEEGGGG FFLLSSSSYY**CCWWTTTTPPPPHHQQRRRRIIMMTTTTNNKKSSRRVVVVAAAADDEEGGGG FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSSSVVVVAAAADDEEGGGG FFLLSSSSYYQQCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG '' '' FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG FFLLSSSSYY**CCCWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG FFLLSSSSYY**CC*WLLLSPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSGGVVVVAAAADDEEGGGG FFLLSSSSYYY*CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG FFLLSSSSYY*QCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG FFLLSSSSYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG '' '' '' '' FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNNKSSSSVVVVAAAADDEEGGGG FFLLSS*SYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG FF*LSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG ); my @nucs = qw(t c a g); my $x = 0; ($CODONS, $TRCOL) = ({}, {}); for my $i (@nucs) { for my $j (@nucs) { for my $k (@nucs) { my $codon = "$i$j$k"; $CODONS->{$codon} = $x; $TRCOL->{$x} = $codon; $x++; } } } %IUPAC_DNA = ( A => [qw(A)], C => [qw(C)], G => [qw(G)], T => [qw(T)], U => [qw(U)], M => [qw(A C)], R => [qw(A G)], W => [qw(A T)], S => [qw(C G)], Y => [qw(C T)], K => [qw(G T)], V => [qw(A C G)], H => [qw(A C T)], D => [qw(A G T)], B => [qw(C G T)], X => [qw(G A T C)], N => [qw(G A T C)] ); #$TERMINATOR = '*';
}
_newdescriptionprevnextTop
sub _new {
    my ($self) = @_;

    $self->SUPER::_new();
}
_translate_ambiguous_codondescriptionprevnextTop
sub _translate_ambiguous_codon {
    my ($triplet, $id, $partial) = @_;
    $partial ||= 0;
    #my $id = '1';
my $aa; my @codons = _unambiquous_codons($triplet); my %aas =(); foreach my $codon (@codons) { $aas{substr($TABLES[$id-1],$CODONS->{$codon},1)} = 1; } my $count = scalar keys %aas; if ( $count == 1 ) { $aa = (keys %aas)[0]; } elsif ( $count == 2 ) { if ($aas{'D'} and $aas{'N'}) { $aa = 'B'; } elsif ($aas{'E'} and $aas{'Q'}) { $aa = 'Z'; } else { $partial ? ($aa = '') : ($aa = 'X'); } } else { $partial ? ($aa = '') : ($aa = 'X'); } return $aa; } #----------------------
}
_unambiquous_codonsdescriptionprevnextTop
sub _unambiquous_codons {
    my ($value) = @_;
    my @nts = ();
    my @codons = ();
    my ($i, $j, $k);
    @nts = map { $IUPAC_DNA{uc $_} }  split(//, $value);
    for my $i (@{$nts[0]}) {
	for my $j (@{$nts[1]}) {
	    for my $k (@{$nts[2]}) {
		push @codons, lc "$i$j$k";
	    }
	}
    }
    return @codons;
}

1;
}
apply_editdescriptionprevnextTop
sub apply_edit {
    my $seqeds   = shift;
    my $seqref = shift;
    my @seqedits = split (/:/,$seqeds);
    
    #if(ref($seqref) ne 'SCALAR') {
# throw("Reference to scalar argument expected");
#}
if(!defined($seqedits[0]) || !defined($seqedits[1])) { return $seqref; } my $len = $seqedits[1] - $seqedits[0] + 1; substr($$seqref, $seqedits[0] - 1, $len) = $seqedits[2]; return $seqref; } #-----------------------------------------
#-------From Bioperl CodonTable.pm -------
#-------Modified like GenomicSequence ----
}
formatseqdescriptionprevnextTop
sub formatseq {
    my ($row)= shift;
    my ($finalseq1, $finalseq2, $header_atts1, $header_atts2, 
	$strand1, $strand2, $tag1, $tag2) ;
    my @atts1 = ();
    my @atts2 = ();
    my @coorarray = ();
    my @coorarray1;
    my @coorarray2;

    
    #--  get sequences
my $seq1 = shift @$row; my $seq2 = shift @$row; # my $i = 0 ;
# foreach my $attribute (@$row){
# print "attribute: $attribute\n";
# if (!$attribute){$attribute = "NULL"}
# elsif ($attribute eq "tagHeader"){
# push (@atts1, $row->[$i+1]);
# push (@atts2, $row->[$i+2]);
# }
# $i++;
# }
my $i = 0 ; my $k = 0; my $j = 0; foreach my $attribute (@$row){ #print "attribute: $attribute\n";
#-- Get the attributes
if (!$attribute){$attribute = "NULL"} if ($attribute eq "tagHeader"){ push (@atts1, $row->[$i+1]); push (@atts2, $row->[$i+2]); } $i++; #-- Get the coordinates
if ($attribute =~ /^(tagExonseq|tagCdna|tagCodingseq|tagUnsp|tag3utr|tag5utr)/){ $j++; if ($j == 1){ # print " attribute1:: $attribute\n";
@coorarray1 = split(/\|/, $attribute); $strand1 = $row->[$k+2]; } if ($j == 2){ # print " attribute2:: $attribute\n";
@coorarray2 = split(/\|/, $attribute); $tag1 = shift (@coorarray1); # remove the tags
$tag2 = shift (@coorarray2); $strand2 = $row->[$k+2]; } } $k++; } #-- Chop the seq according to the given coordinate
foreach my $coors1 (@coorarray1){ if ($coors1 eq "No UTR"){ $finalseq1 = "No UTR"; }else{ my @coor1 = split(/:/,$coors1); $finalseq1 .= substr ($seq1, $coor1[0], $coor1[1]); } } foreach my $coors2 (@coorarray2){ if ($coors2 =~ m/No UTR/gi){
$finalseq2 = "No UTR";
}else{ my @coor2 = split(/:/,$coors2); $finalseq2 .= substr ($seq2, $coor2[0], $coor2[1]); } } #-- RC the seq if needed !
if (($strand1 < 0)&&($finalseq1 ne "No UTR")) {$finalseq1 = &rc($finalseq1);} if (($strand2 < 0)&&($finalseq2 ne "No UTR")) {$finalseq2 = &rc($finalseq2);} #-- format the seq whith a 60bp width
$finalseq1 =~ s/(\w{60})/$1\n/g; $finalseq2 =~ s/(\w{60})/$1\n/g; #-- Join the attributes together
$header_atts1 = join ("|", @atts1); $header_atts2 = join ("|", @atts2); #-- Then return the whole thing
return ">" . $header_atts1 . "\n" . $finalseq1 ."\n" . ">" . $header_atts2 . "\n" . $finalseq2 ."\n" . "#\n"; } #------------------
}
formatseqPepdescriptionprevnextTop
sub formatseqPep {
    my ($row)= shift;
    my ($finalseq1, $finalseq2, $header_atts1, $header_atts2, 
	$strand1, $strand2, $tag1, $tag2, $chro1, $chro2,
	$seqedits1, $seqedits2) ;
    my @atts1 = ();
    my @atts2 = ();
    my @coorarray = ();
    my @coorarray1;
    my @coorarray2;
    
    #--  get sequences
my $seq1 = shift @$row; my $seq2 = shift @$row; my $i = 0 ; my $k = 0; my $j = 0; foreach my $attribute (@$row){ #print "attribute: $attribute\n";
#-- Get the attributes
if (!$attribute){$attribute = "NULL"} if ($attribute eq "tagHeader"){ push (@atts1, $row->[$i+1]); push (@atts2, $row->[$i+2]); } $i++; #-- Get the coordinates
if ($attribute =~ /^(tagPeptide)/){ $j++; if ($j == 1){ # print " attribute1:: $attribute\n";
@coorarray1 = split(/\|/, $attribute); $strand1 = $row->[$k+2]; $seqedits1 = $row->[$k+4]; $chro1 = $row->[$k+6]; } if ($j == 2){ # print " attribute2:: $attribute\n";
@coorarray2 = split(/\|/, $attribute); $tag1 = shift (@coorarray1); # remove the tags
$tag2 = shift (@coorarray2); $strand2 = $row->[$k+2]; $seqedits2 = $row->[$k+4]; $chro2 = $row->[$k+6]; } } $k++; } # print "tag1:$tag1 strand1:$strand1 seqedits1:$seqedits1 chro1:$chro1\n";
# print "tag2:$tag2 strand2:$strand2 seqedits2:$seqedits2 chro2:$chro2\n";
#-- Chop the seq according to the given coordinate
foreach my $coors1 (@coorarray1){ my @coor1 = split(/:/,$coors1); $finalseq1 .= substr ($seq1, $coor1[0], $coor1[1]); } foreach my $coors2 (@coorarray2){ my @coor2 = split(/:/,$coors2); $finalseq2 .= substr ($seq2, $coor2[0], $coor2[1]); } #-- RC the seq if needed !
if ($strand1 < 0) {$finalseq1 = &rc($finalseq1);} if ($strand2 < 0) {$finalseq2 = &rc($finalseq2);} #-- Translated it
$finalseq1 = &translate($finalseq1,$chro1); $finalseq2 = &translate($finalseq2,$chro2); #-- Post process the seq with seqedits
if ($finalseq1 =~ /\*$/){ chop $finalseq1 } if ($finalseq2 =~ /\*$/){ chop $finalseq2 } #-- Apply seq edits if exists
if ($seqedits1 ne 'NULL'){ my @seqed = split (/\|/,$seqedits1); shift @seqed; #remove tag
foreach my $se (@seqed) { # $finalseq1 =
&apply_edit($se,\$ finalseq1); } } if ($seqedits2 ne 'NULL'){ my @seqed = split (/\|/,$seqedits2); shift @seqed; #remove tag
foreach my $se (@seqed) { #$finalseq2 =
&apply_edit($se,\$ finalseq2); } } #-- format the seq whith a 60bp width
$finalseq1 =~ s/(\w{60})/$1\n/g; $finalseq2 =~ s/(\w{60})/$1\n/g; #-- Join the attributes together
$header_atts1 = join ("|", @atts1); $header_atts2 = join ("|", @atts2); #-- Then return the whole thing
return ">" . $header_atts1 . "\n" . $finalseq1 ."\n" . ">" . $header_atts2 . "\n" . $finalseq2 ."\n" . "#\n"; } #----------------------
}
getDisplayNamesdescriptionprevnextTop
sub getDisplayNames {
    my $self = shift;
    return '';
}
isSpecialdescriptionprevnextTop
sub isSpecial {
    return 1;
}
#------------------
}
nextRowdescriptionprevnextTop
sub nextRow {
    my $self = shift;

    my $rtable = $self->get('result_table');
    my $row = $rtable->nextRow;
    if (!$row){
        return;
    }
#    my $array_length = @{$row};
#-- determine which coordinate we are using
my ($tag1, $tag2); my (@coorarray1, @coorarray2); my $k = 0; my $j = 0; foreach my $attribute (@$row){ #print "attribute: $attribute\n";
if (not defined $attribute){$attribute = "NULL"} if ($attribute =~ m/tagExonseq|tagCdna|tagCodingseq|tagUnsp|tag3utr|tag5utr|tagPeptide/){
$j++;
if ($j == 1){ #print " attribute1:: $attribute\n";
@coorarray1 = split(/\|/, $attribute); $tag1 = shift (@coorarray1); # remove the tags
} if ($j == 2){ #print " attribute2:: $attribute\n";
@coorarray2 = split(/\|/, $attribute); $tag2 = shift (@coorarray2); } } $k++; } # print "TAGS : $tag1 $tag2\n";
if ($tag1 ne $tag2){return "Tag are not the same:".$tag1." - ".$tag2."\n\n";} #-- then format depending of the tag (cdna, peptide, etc.)
if ($tag1 =~ m/tagExonseq|tagCdna|tagCodingseq|tagUnsp|tag3utr|tag5utr/){

&formatseq(
$row);
}elsif ($tag1 =~ m/tagPeptide/){

&formatseqPep(
$row);
# print "tagPeptide : $tag1 $tag2\n";
}else { return "tags not supported by the formater FASTAH.pm tags(tag1:$tag1 tag2:$tag2)\n"; } } #------------------
}
processQuerydescriptionprevnextTop
sub processQuery {
    my ($self, $query) = @_;

    $self->set('original_attributes',[@{$query->getAllAttributes()}]) 
	if ($query->getAllAttributes());
    $self->set('query',$query);
    return $query;
}
rcdescriptionprevnextTop
sub rc {
    my ($seq) = @_;

    $seq = reverse($seq);
    $seq =~ tr/YABCDGHKMRSTUVyabcdghkmrstuv/RTVGHCDMKYSAABrtvghcdmkysaab/;

    return $seq;
}
#----------------------
#----------------------
}
translatedescriptionprevnextTop
sub translate {
    my ($seq, $chro) = @_;
    #warn("Calling translate without a seq argument!") unless defined $seq;
return '' unless $seq; #my $id = $self->id;
# 1 = vertebrate 2= Vertebrate Mitochondrial
my $id;#my $id = '1'; # by default it's1
if ($chro =~ /MT/g){$id = '2'}else{$id = '1'} my ($partial) = 0; $partial = 2 if length($seq) % CODONSIZE == 2; $seq = lc $seq; $seq =~ tr/u/t/; my $protein = ""; if ($seq =~ /[^actg]/ ) { #ambiguous chars
for (my $i = 0; $i < (length($seq) - (CODONSIZE-1)); $i+= CODONSIZE) { my $triplet = substr($seq, $i, CODONSIZE); # if( $triplet eq $CODONGAP ) {
# $protein .= $GAP;
# } els
if (exists $CODONS->{$triplet}) { $protein .= substr($TABLES[$id-1], $CODONS->{$triplet},1); } else { $protein .= _translate_ambiguous_codon($triplet, $id); } } } else { # simple, strict translation
for (my $i = 0; $i < (length($seq) - (CODONSIZE -1)); $i+=CODONSIZE) { my $triplet = substr($seq, $i, CODONSIZE); # if( $triplet eq $CODONGAP ) {
# $protein .= $GAP;
# }
if (exists $CODONS->{$triplet}) { $protein .= substr($TABLES[$id-1], $CODONS->{$triplet}, 1); } else { $protein .= 'X'; } } } if ($partial == 2) { # 2 overhanging nucleotides
my $triplet = substr($seq, ($partial -4)). "n"; # if( $triplet eq $CODONGAP ) {
# $protein .= $GAP;
# } elsif (exists $CODONS->{$triplet}) {
if (exists $CODONS->{$triplet}) { my $aa = substr($TABLES[$id-1], $CODONS->{$triplet},1); $protein .= $aa; } else { $protein .= _translate_ambiguous_codon($triplet, $id, $partial); } } return $protein; } #----------------------
}
General documentation
AUTHORSTop
    *
CONTACTTop
This module is part of the BioMart project
http://www.biomart.org
Questions can be posted to the mart-dev mailing list: mart-dev@ebi.ac.uk