Summary | Included libraries | Package variables | Synopsis | Description | General documentation | Methods |
WebCvs | Raw content |
AXT Formatter
AXT alignment files are produced from Blastz
For more documentation see :
http://bioperl.org/wiki/AXT_format
http://genome.ucsc.edu/goldenPath/help/axt.html
_get_aligned_sequence_from_original_sequence_and_cigar_line | No description | Code |
_new | No description | Code |
_rc | No description | Code |
_rcCigarLine | No description | Code |
getDisplayNames | No description | Code |
isSpecial | No description | Code |
nextRow | No description | Code |
preProcessRowMlagan | No description | Code |
processQuery | No description | Code |
returnAXTline | No description | Code |
_get_aligned_sequence_from_original_sequence_and_cigar_line | description | prev | next | Top |
my ($original_sequence, $cigar_line) = @_; my $aligned_sequence = ""; return undef if (!$original_sequence or !$cigar_line); my $seq_pos = 0; my @cig = ( $cigar_line =~ /(\d*[GMD])/g ); for my $cigElem ( @cig ) { my $cigType = substr( $cigElem, -1, 1 ); my $cigCount = substr( $cigElem, 0 ,-1 ); $cigCount = 1 unless ($cigCount =~ /^\d+$/); #print "-- $cigElem $cigCount $cigType\n";}
if( $cigType eq "M" ) { $aligned_sequence .= substr($original_sequence, $seq_pos, $cigCount); $seq_pos += $cigCount; } elsif( $cigType eq "G" or $cigType eq "D") { $aligned_sequence .= "-" x $cigCount; } } warn ("Cigar line ($seq_pos) does not match sequence lenght (".length($original_sequence).")") if ($seq_pos != length($original_sequence)); return $aligned_sequence; } #--------------------------------------------
_new | description | prev | next | Top |
my ($self) = @_; $self->SUPER::_new(); $aln_nb = 0 ;}
_rc | description | prev | next | Top |
my ($seq) = @_; $seq = reverse($seq); $seq =~ tr/YABCDGHKMRSTUVyabcdghkmrstuv/RTVGHCDMKYSAABrtvghcdmkysaab/; return $seq; } #--------------------------------------------}
_rcCigarLine | description | prev | next | Top |
my ($cigar_line) = @_; #print STDERR "###cigar_line $cigar_line\n";}
my @cig = ( $cigar_line =~ /(\d*[GMD])/g ); my @rev_cigar = reverse(@cig); my $rev_cigar; for my $cigElem ( @rev_cigar ) { $rev_cigar.=$cigElem; } #print STDERR "###rev_cigar $rev_cigar\n";
return $rev_cigar; } #--------------------------------------------
getDisplayNames | description | prev | next | Top |
my $self = shift; return '' ; } # subroutines from AXT.pm <alpha version>}
#--------------------------------------------
isSpecial | description | prev | next | Top |
return 1; } 1;}
nextRow | description | prev | next | Top |
my $self = shift; my @data ; my @array; #my $aln_nb = 0 ;}
my $PROCESSED_SEQS ; my $rtable = $self->get('result_table'); my $row = $rtable->nextRow; if (!$row){ return; } if ( ( ($$row[0]=~/^(A|C|G|T|N)/) && ($$row[0]!~/^(Chr)/) ) && ( ($$row[1]=~/^(A|C|G|T|N)/) && ($$row[1]!~/^(Chr)/) ) ){ # 15/08/06 removed /i
# added a hack for 'Ch'
@data = &preProcessRowMlagan(\@{$row}); foreach my $foo (@data){ my $seq = $foo->[0] ; my $chr = $foo->[1] ; my $start = $foo->[2] ; my $end = $foo->[3] ; my $strand = $foo->[4] ; my $length = $foo->[5] ; my $genome = $foo->[6] ; my $cigar = $foo->[7] ; my $score = $foo->[8] ; my @prearray = ($seq,$chr,$start,$end,$strand,$length,$cigar,$score); ## Can be better coded ## need to change that like, add another for ($j=0..$j<=7){ push (@array, $data[$i][$j] )
push (@array, @prearray); # if ($seq ne 'N'){
# # do something
# # in pairwise seq alignment you should always have a seq
# }
} push (@array, $aln_nb); $PROCESSED_SEQS = &returnAXTline(@array); $aln_nb++; return $PROCESSED_SEQS ; } } #------------------------
preProcessRowMlagan | description | prev | next | Top |
my $row = shift ; my @want ; my $score; my $k = 0; my $size_row = @{$row}; #-- Get all the seq in $want[$k][0]}
while ( ($$row[0]=~/^(A|C|G|T|N)/) && ($$row[0]!~/^Chr/i) && ($$row[0]!~/\_/) ){ # get all seq out
$want[$k][0] = shift (@{$row}); $k++; } #-- then put the rest of it into $want[$j][??]
for (my $j=0;$j<=$k-1;$j++){ for (my $i=1;$i<=8;$i++){ #IMPORTANT changed from 7 to 8, as I have now a score for all species
$want[$j][$i] = shift (@{$row}); } } return (@want); } #--------------------------------------------
#sub getDisplayNames {
#// WARNING
#// This return the number of attribute in the attribute list. (eg: 17)
# my $self = shift;
# return $self->getTextDisplayNames("\t");
#}
#--------------------------------------------
processQuery | description | prev | next | Top |
my ($self, $query) = @_; $self->set('original_attributes',[@{$query->getAllAttributes()}]) if ($query->getAllAttributes()); $self->set('query',$query); return $query;}
returnAXTline | description | prev | next | Top |
my ($seq1,$chr1,$start1,$end1,$strand1,$length1,$cigar1,$score1,$seq2,$chr2,$start2,$end2,$strand2,$length2,$cigar2,$score2,$aln_nb) = @_; if ($strand1 > 0){ ## If the qy specie is forward}
my ($hstart2, $hend2, $hstrand2); if ($strand2 < 0) { $hstrand2 = "-"; $hstart2 = $length2 - $end2 + 1; $hend2 = $length2 - $start2 + 1; } else { $hstrand2 = "+"; $hstart2 = $start2; $hend2 = $end2; } my $line1 = sprintf("%d %5s %10d %10d %5s %10d %10d %-1s %s", $aln_nb,$chr1,$start1,$end1,$chr2,$hstart2,$hend2,$hstrand2,$score1); my $line2 = sprintf( _get_aligned_sequence_from_original_sequence_and_cigar_line($seq1, $cigar1)); my $line3 = sprintf( _get_aligned_sequence_from_original_sequence_and_cigar_line($seq2, $cigar2)); return ("$line1\n$line2\n$line3\n\n"); #-------
} elsif ($strand1 < 0){ ## If the qy specie is reverse
my $length_seq1 = length ($seq1); my $length_seq2 = length ($seq2); my ($hstart1, $hend1, $hstrand1); #$hstrand1 = "-"; #the strand1 is always forward in AXT format
$hstart1 = $start1 ; $hend1 = $end1 ; my ($hstart2, $hend2, $hstrand2); #recalculate coordinate if strand +
if ($strand2 > 0) { # if strand + make it - !
$hstrand2 = "-"; $hstart2 = $length2 - $end2 + 1; $hend2 = $length2 - $start2 + 1; } else { # if strand - make it + !
$hstrand2 = "+"; $hstart2 = $start2; $hend2 = $end2; } my $line1 = sprintf ("%d %5s %10d %10d %5s %10d %10d %-1s %s ", $aln_nb,$chr1,$start1,$end1,$chr2,$hstart2,$hend2,$hstrand2,$score1); my $line2 = sprintf ( _get_aligned_sequence_from_original_sequence_and_cigar_line( _rc($seq1), _rcCigarLine($cigar1))); my $line3 = sprintf ( _get_aligned_sequence_from_original_sequence_and_cigar_line( _rc($seq2), _rcCigarLine($cigar2))); return ("$line1\n$line2\n$line3\n\n"); } else { warn "\n\n Problem with 1st species strand\n\n "; } } #--------------------------------------------
EXAMPLE | Top |
0 chr19 3001012 3001075 chr11 70568380 70568443 - 3500
TCAGCTCATAAATCACCTCCTGCCACAAGCCTGGCCTGGTCCCAGGAGAGTGTCCAGGCTCAGA
TCTGTTCATAAACCACCTGCCATGACAAGCCTGGCCTGTTCCCAAGACAATGTCCAGGCTCAGA
1 chr19 3008279 3008357 chr11 70573976 70574054 - 3900 CACAATCTTCACATTGAGATCCTGAGTTGCTGATCAGAATGGAAGGCTGAGCTAAGATGAGCGACGAGGCAATGTCACA CACAGTCTTCACATTGAGGTACCAAGTTGTGGATCAGAATGGAAAGCTAGGCTATGATGAGGGACAGTGCGCTGTCACA
AUTHORS | Top |
CONTACT | Top |