sub next_assembly
{ my $self = shift; local $/="\n";
my $assembly = Bio::Assembly::Scaffold->new(-source=>'phrap');
my ($contigOBJ,$read_name);
my $read_data = {}; while ($_ = $self->_readline) { chomp;
(/^CO Contig(\d+) (\d+) (\d+) (\d+) (\w+)/) && do { my $contigID = $1;
$contigOBJ = Bio::Assembly::Contig->new(-source=>'phrap', -id=>$contigID);
my $ori = ($5 eq 'U' ? 1 : -1); $contigOBJ->strand($ori);
my $consensus_sequence = undef;
while ($_ = $self->_readline) { chomp; last if (/^$/); s/\*/-/g; $consensus_sequence .= $_;
}
my $consensus_length = length($consensus_sequence);
$consensus_sequence = Bio::LocatableSeq->new(-seq=>$consensus_sequence,
-start=>1,
-end=>$consensus_length,
-id=>$contigID);
$contigOBJ->set_consensus_sequence($consensus_sequence);
$assembly->add_contig($contigOBJ);
};
/^BQ/ && do {
my $consensus = $contigOBJ->get_consensus_sequence()->seq();
my ($i,$j,@tmp);
my @quality = ();
$j = 0;
while ($_ = $self->_readline) {
chomp;
last if (/^$/);
@tmp = grep { /^\d+$/ } split(/\s+/);
$i = 0;
my $previous = 0;
my $next = 0;
while ($i<=$#tmp) {
if (substr($consensus,$j,1) eq '-') {
$previous = $tmp[$i-1] unless ($i == 0);
if ($i < $#tmp) {
$next = $tmp[$i+1];
} else {
$next = 0;
}
push(@quality,int(($previous+$next)/2)); } else {
push(@quality,$tmp[$i]);
$i++;
}
$j++;
}
}
my $qual = Bio::Seq::PrimaryQual->new(-qual=>join(" ",@quality),
-id=>$contigOBJ->id());
$contigOBJ->set_consensus_quality($qual);
};
/^AF (\S+) (C|U) (-*\d+)/ && do {
$read_name = $1; my $ori = $2;
$read_data->{$read_name}{'padded_start'} = $3; $ori = ( $ori eq 'U' ? 1 : -1);
$read_data->{$read_name}{'strand'} = $ori;
};
/^RD (\S+) (-*\d+) (\d+) (\d+)/ && do {
$read_name = $1;
$read_data->{$read_name}{'length'} = $2; $read_data->{$read_name}{'contig'} = $contigOBJ;
my $read_sequence;
while ($_ = $self->_readline) {
chomp;
last if (/^$/);
s/\*/-/g; $read_sequence .= $_; }
my $read = Bio::LocatableSeq->new(-seq=>$read_sequence,
-start=>1,
-end=>$read_data->{$read_name}{'length'},
-strand=>$read_data->{$read_name}{'strand'},
-id=>$read_name,
-primary_id=>$read_name,
-alphabet=>'dna');
my $padded_start = $read_data->{$read_name}{'padded_start'};
my $padded_end = $padded_start + $read_data->{$read_name}{'length'} - 1;
my $coord = Bio::SeqFeature::Generic->new(-start=>$padded_start,
-end=>$padded_end,
-strand=>$read_data->{$read_name}{'strand'},
-tag => { 'contig' => $contigOBJ->id }
);
$contigOBJ->set_seq_coord($coord,$read);
};
/^QA (-?\d+) (-?\d+) (-?\d+) (-?\d+)/ && do {
my $qual_start = $1; my $qual_end = $2;
my $align_start = $3; my $align_end = $4;
unless ($align_start == -1 && $align_end == -1) {
$align_start = $contigOBJ->change_coord("aligned $read_name",'gapped consensus',$align_start);
$align_end = $contigOBJ->change_coord("aligned $read_name",'gapped consensus',$align_end);
my $align_feat = Bio::SeqFeature::Generic->new(-start=>$align_start,
-end=>$align_end,
-strand=>$read_data->{$read_name}{'strand'},
-primary=>"_align_clipping:$read_name");
$align_feat->attach_seq( $contigOBJ->get_seq_by_name($read_name) );
$contigOBJ->add_features([ $align_feat ], 0);
}
unless ($qual_start == -1 && $qual_end == -1) {
$qual_start = $contigOBJ->change_coord("aligned $read_name",'gapped consensus',$qual_start);
$qual_end = $contigOBJ->change_coord("aligned $read_name",'gapped consensus',$qual_end);
my $qual_feat = Bio::SeqFeature::Generic->new(-start=>$qual_start,
-end=>$qual_end,
-strand=>$read_data->{$read_name}{'strand'},
-primary=>"_quality_clipping:$read_name");
$qual_feat->attach_seq( $contigOBJ->get_seq_by_name($read_name) );
$contigOBJ->add_features([ $qual_feat ], 0);
}
};
/^CT\s*\{/ && do {
my ($contigID,$type,$source,$start,$end,$date) = split(' ',$self->_readline);
$contigID =~ s/^Contig//i;
my $extra_info = undef;
while ($_ = $self->_readline) {
last if (/\}/);
$extra_info .= $_;
}
my $contig_tag = Bio::SeqFeature::Generic->new(-start=>$start,
-end=>$end,
-primary=>$type,
-tag=>{ 'source' => $source,
'creation_date' => $date,
'extra_info' => $extra_info
});
$assembly->get_contig_by_id($contigID)->add_features([ $contig_tag ],1);
};
/^RT\s*\{/ && do {
my ($readID,$type,$source,$start,$end,$date) = split(' ',$self->_readline);
my $extra_info = undef;
while ($_ = $self->_readline) {
last if (/\}/);
$extra_info .= $_;
}
$start = $contigOBJ->change_coord("aligned $read_name",'gapped consensus',$start);
$end = $contigOBJ->change_coord("aligned $read_name",'gapped consensus',$end);
my $read_tag = Bio::SeqFeature::Generic->new(-start=>$start,
-end=>$end,
-primary=>$type,
-tag=>{ 'source' => $source,
'creation_date' => $date,
'extra_info' => $extra_info
});
my $contig = $read_data->{$readID}{'contig'};
my $coord = $contig->get_seq_coord( $contig->get_seq_by_name($readID) );
$coord->add_sub_SeqFeature($read_tag);
};
/^WA\s*\{/ && do {
my ($type,$source,$date) = split(' ',$self->_readline);
my $extra_info = undef;
while ($_ = $self->_readline) {
last if (/\}/);
$extra_info = $_;
}
my $assembly_tag = join(" ","TYPE: ",$type,"PROGRAM:",$source,
"DATE:",$date,"DATA:",$extra_info);
$assembly_tag = Bio::Annotation::SimpleValue->new(-value=>$assembly_tag);
$assembly->annotation->add_Annotation('phrap',$assembly_tag);
};
}
$assembly->update_seq_list();
return $assembly; } |