sub _read_taxlabels
{ my ($self) = @_;
my ($name, @names);
while (my $entry = $self->_readline) {
($name) = $entry =~ /\s*(\S+)\s+/;
$name =~ s/\[[^\[]+\]//g;
$name =~ s/\W/_/g;
push @names, $name;
last if /^\s*;/;
}
return @names; } |
sub next_aln
{ my $self = shift;
my $entry;
my ($aln_name, $seqcount, $residuecount, %hash, $alphabet,
$match, $gap, $missing, $equate, $interleave,
$name,$str,@names,$seqname,$start,$end,$count,$seq);
my $aln = Bio::SimpleAlign->new(-source => 'nexus');
$entry = $self->_readline;
$entry = $self->_readline while $entry =~ /^\s+$/;
return unless $entry;
$self->throw("Not a valid interleaved NEXUS file! [#NEXUS] not starting the file\n$entry")
unless $entry =~ /^#NEXUS/i;
while (defined($entry = $self->_readline)) {
local ($_) = $entry;
/\[TITLE. *([^\]]+)]\s+/i and $aln_name = $1;
last if /^begin +data/i || /^begin +taxa/i;
}
$aln_name =~ s/\s/_/g and $aln->id($aln_name) if $aln_name;
my $taxlabels;
while ($entry = $self->_readline) {
local ($_) = $entry;
$taxlabels = 1 if /taxlabels/i;
if ($taxlabels) {
@names = $self->_read_taxlabels;
$taxlabels = 0;
}
/ntax ?= ?(\d+)/i and $seqcount = $1;
/nchar ?= ?(\d+)/i and $residuecount = $1;
/matchchar ?= ?(.)/i and $match = $1;
/gap ?= ?(.)/i and $gap = $1;
/missing ?= ?(.)/i and $missing = $1;
/equate ?= ?"([^\"]+)/i and $equate = $1; /datatype ?= ?(\w+)/i and $alphabet = lc $1;
/interleave/i and $interleave = 1 ;
last if /matrix/i;
}
$self->throw("Not a valid NEXUS sequence file. Datatype not specified")
unless $alphabet;
$self->throw("Not a valid NEXUS sequence file. Datatype should not be [$alphabet]")
unless $valid_type{$alphabet};
$aln->gap_char($gap);
$aln->missing_char($missing);
while ($entry = $self->_readline) {
unless ($entry =~ /^\s+$/) {
$self->_pushback($entry);
last;
}
}
if (@names == 0) { while ($entry = $self->_readline) {
local ($_) = $entry;
s/\[[^[]+\]//g; if ($interleave) {
/^\s+$/ and last;
} else {
/^\s+$/ and next;
}
/^\s*;\s*$/ and last;
if (/^\s*('([^']*?)'|([^']\S*))\s+(.*)\s$/) { $name = ($2 || $3);
$str = $4;
$name =~ s/ /_/g;
push @names, $name;
$str =~ s/\s//g;
$count = @names;
$hash{$count} = $str;
};
$self->throw("Not a valid interleaved NEXUS file!
seqcount [$count] > predeclared [$seqcount] in the first section") if $count > $seqcount;
}
}
$count = 0;
while( $entry = $self->_readline) {
local ($_) = $entry;
s/\[[^[]+\]//g; last if /^\s*;/;
$count = 0, next if $entry =~ /^\s*$/;
if (/^\s*('([^']*?)'|([^']\S*))\s+(.*)\s$/) { $str = $4;
$str =~ s/\s//g;
$count++;
$hash{$count} .= $str;
};
$self->throw("Not a valid interleaved NEXUS file!
seqcount [$count] > predeclared [$seqcount] ") if $count > $seqcount;
}
return 0 if @names < 1;
$count = 0;
foreach $name ( @names ) {
$count++;
if( $name =~ /(\S+)\/(\d+)-(\d+)/ ) {
$seqname = $1;
$start = $2;
$end = $3;
} else {
$seqname=$name;
$start = 1;
$str = $hash{$count};
$str =~ s/[^A-Za-z]//g;
$end = length($str);
}
$self->throw("Length of sequence [$seqname] is not [$residuecount]! ")
unless CORE::length($hash{$count}) == $residuecount;
$seq = new Bio::LocatableSeq('-seq'=>$hash{$count},
'-id'=>$seqname,
'-start'=>$start,
'-end'=>$end,
'alphabet'=>$alphabet
);
$aln->add_seq($seq);
}
$aln->unmatch($match) if $match;
if ($equate) {
$aln->map_chars($1, $2) while $equate =~ /(\S)=(\S)/g;
}
while ($entry !~ /endblock/i) {
$entry = $self->_readline;
}
return $aln; } |
sub write_aln
{ my ($self,@aln) = @_;
my $count = 0;
my $wrapped = 0;
my $maxname;
my ($length,$date,$name,$seq,$miss,$pad,%hash,@arr,$tempcount,$index );
my ($match, $missing, $gap,$symbols) = ('', '', '','');
foreach my $aln (@aln) {
if( ! $aln || ! $aln->isa('Bio::Align::AlignI') ) {
$self->warn("Must provide a Bio::Align::AlignI object when calling write_aln");
next;
}
$self->throw("All sequences in the alignment must be the same length")
unless $aln->is_flush($self->verbose);
$length = $aln->length();
$self->_print (sprintf("#NEXUS\n[TITLE: %s]\n\nbegin data;\ndimensions ntax=%s nchar=%s;\n",
$aln->id, $aln->no_sequences, $length));
$match = "match=". $aln->match_char if $aln->match_char;
$missing = "missing=". $aln->missing_char if $aln->missing_char;
$gap = "gap=". $aln->gap_char if $aln->gap_char;
$symbols = 'symbols="'.join('',$aln->symbol_chars). '"' if( $aln->symbol_chars);
$self->_print (sprintf("format interleave datatype=%s %s %s %s %s;\n\nmatrix\n",
$aln->get_seq_by_pos(1)->alphabet, $match, $missing, $gap, $symbols));
my $indent = $aln->maxdisplayname_length;
$aln->set_displayname_flat();
foreach $seq ( $aln->each_seq() ) {
$name = $aln->displayname($seq->get_nse());
$name = sprintf("%-${indent}s", $name);
$hash{$name} = $seq->seq();
push(@arr,$name);
}
while( $count < $length ) {
foreach $name ( @arr ) {
my $dispname = $name;
$self->_print (sprintf("%${indent}s ",$dispname));
$tempcount = $count;
$index = 0;
while( ($tempcount + 10 < $length) && ($index < 5) ) {
$self->_print (sprintf("%s ",substr($hash{$name},$tempcount,10)));
$tempcount += 10;
$index++;
}
if( $index < 5) {
$self->_print (sprintf("%s ",substr($hash{$name},$tempcount)));
$tempcount += 10;
}
$self->_print ("\n");
}
$self->_print ("\n\n");
$count = $tempcount;
$wrapped = 1;
}
$self->_print (";\n\nendblock;\n");
}
$self->flush if $self->_flush_on_write && defined $self->_fh;
return 1;
}
1; } |
Report bugs to the Bioperl bug tracking system to help us keep track
the bugs and their resolution.
Bug reports can be submitted via email or the web:
bioperl-bugs@bio.perl.org
http://bugzilla.bioperl.org/