sub next_aln
{ my ($self) = @_;
my $seenbegin = 0;
my %data = ( 'seq1' => {
'start'=> undef,
'end'=> undef,
'name' => '',
'data' => '' },
'seq2' => {
'start'=> undef,
'end'=> undef,
'name' => '',
'data' => '' },
'align' => '',
'type' => $self->{'_type'}, );
my %names;
while( defined($_ = $self->_readline) ) {
next if( /^\#?\s+$/ || /^\#+\s*$/ );
if( /^\#(\=|\-)+\s*$/) {
last if( $seenbegin);
} elsif( /(Local|Global):\s*(\S+)\s+vs\s+(\S+)/ ||
/^\#\s+Program:\s+(\S+)/ )
{
my ($name1,$name2) = ($2,$3);
if( ! defined $name1 ) { $data{'type'} = $1;
$name1 = $name2 = '';
} else {
$data{'type'} = $1 eq 'Local' ? 'water' : 'needle';
}
$data{'seq1'}->{'name'} = $name1;
$data{'seq2'}->{'name'} = $name2;
$self->{'_type'} = $data{'type'};
} elsif( /Score:\s+(\S+)/ ) {
$data{'score'} = $1;
} elsif( /^\#\s+(1|2):\s+(\S+)/ && ! $data{"seq$1"}->{'name'} ) {
my $nm = $2;
$nm = substr($nm,0,$EMBOSSTitleLen); if( $names{$nm} ) {
$nm .= "-". $names{$nm};
}
$names{$nm}++;
$data{"seq$1"}->{'name'} = $nm;
} elsif( $data{'seq1'}->{'name'} &&
/^$data{'seq1'}->{'name'}/ ) {
my $count = 0;
$seenbegin = 1;
my @current;
while( defined ($_) ) {
my $align_other = '';
my $delayed;
if($count == 0 || $count == 2 ) {
my @l = split;
my ($seq,$align,$start,$end);
if( $count == 2 && $data{'seq2'}->{'name'} eq '' ) {
($start,$align,$end) = @l;
} elsif( @l == 3 ) {
$align = '';
($seq,$start,$end) = @l
} else {
($seq,$start,$align,$end) = @l;
}
my $seqname = sprintf("seq%d", ($count == 0) ? '1' : '2');
$data{$seqname}->{'data'} .= $align;
$data{$seqname}->{'start'} ||= $start;
$data{$seqname}->{'end'} = $end;
$current[$count] = [ $start,$align || ''];
} else {
s/^\s+//;
s/\s+$//;
$data{'align'} .= $_;
}
BOTTOM:
last if( $count++ == 2);
$_ = $self->_readline();
}
if( $data{'type'} eq 'needle' ) {
my ($s1,$s2) = ($data{'seq1'}, $data{'seq2'});
my $d = length($current[0]->[1]) - length($current[2]->[1]);
if( $d < 0 ) { if( $current[0]->[0] <= 1 && $current[2]->[0] > 1) {
$s1->{'data'} = ('-' x abs($d)) . $s1->{'data'};
$data{'align'} = (' 'x abs($d)).$data{'align'};
} else {
$s1->{'data'} .= '-' x abs($d);
$data{'align'} .= ' 'x abs($d);
}
} elsif( $d > 0) { if( $current[2]->[0] <= 1 && $current[0]->[0] > 1) {
$s2->{'data'} = ('-' x abs($d)) . $s2->{'data'};
$data{'align'} = (' 'x abs($d)).$data{'align'};
} else {
$s2->{'data'} .= '-' x abs($d);
$data{'align'} .= ' 'x abs($d);
}
}
}
}
}
return undef unless $seenbegin;
my $aln = Bio::SimpleAlign->new(-verbose => $self->verbose(),
-source => "EMBOSS-".$data{'type'});
foreach my $seqname ( qw(seq1 seq2) ) {
return undef unless ( defined $data{$seqname} );
$data{$seqname}->{'name'} ||= $seqname;
my $seq = new Bio::LocatableSeq('-seq' => $data{$seqname}->{'data'},
'-id' => $data{$seqname}->{'name'},
'-start'=> $data{$seqname}->{'start'},
'-end' => $data{$seqname}->{'end'},
);
$aln->add_seq($seq);
}
return $aln; } |
User feedback is an integral part of the evolution of this and other
Bioperl modules. Send your comments and suggestions preferably to
the Bioperl mailing list. Your participation is much appreciated.
bioperl-l@bioperl.org - General discussion
http://bioperl.org/MailList.shtml - About the mailing lists
The rest of the documentation details each of the object methods.
Internal methods are usually preceded with a _