use Bio::EnsEMBL::Pipeline::GeneDuplication::CodonBasedAlignment;
# Create our object, specifying the genetic code needed to
# translate our sequences. The commonest codes are:
# universal => 1
# vertebrate mitochondrial => 2
# These numbers are the same as those you need to specify to
# translate any Bio::Seq object. Hence if you need a truly
# oddball genetic code, check the Bio::Seq module documentation.
my $cba =
Bio::EnsEMBL::Pipeline::GeneDuplication::CodonBasedAlignment->new(
-genetic_code => 1,
-seqs => \@seqs);
# The sequences passed to the module should be a reference to
# an array of Bio::Seq objects. Everything is going to fall apart
# if you pass amino acid sequences here, so make sure they are
# nucleotide sequences.
# To run the the actual alignment process do the following.
# The return value is an array of Bio::Seq objects, that
# when dumped to a multiple fasta file or the like, will
# display a multiple alignment.
my $align = $cba->run_alignment;
foreach my $seq (@$align){
my $string = $seq->seq;
$string =~ s/(.{60})/$1\n/g;
print STDOUT ">" . $seq->display_id . "\n$string";
}
None available.
sub _alignment
{ my ($self, $action, $seq) = @_;
if (defined $action && $action eq 'add') {
push (@{$self->{_alignment}}, $seq);
} elsif (defined $action && $action eq 'clear') {
$self->{_alignment} = [];
} elsif (defined $action) {
warning("CodonBasedAlignment::_alignment - unknown action specified ".
"[$action]. Must be\' add\' or\' clear\'");
}
return $self->{_alignment}; } |
sub new
{ my ($class, @args) = @_;
my $self = bless {},$class;
my ($genetic_code,
$seqs,
) = rearrange([qw(GENETIC_CODE
SEQS
)], @args);
unless (defined $genetic_code){
warning("Genetic code not specified. Defaulting " .
"to the so-called universal code.");
$genetic_code = 1;
}
$self->genetic_code($genetic_code);
$self->sequences($seqs) if defined $seqs;
return $self; } |
sub run_alignment
{ my ($self) = @_;
my $clustalw = Bio::Tools::Run::Alignment::Clustalw->new();
my $seqs = $self->sequences;
unless (scalar @$seqs > 1) {
die "It takes more than one sequence to clustalw";
return 0
}
my %nt_seqs;
my %desc_lookup;
foreach my $nt_seq (@$seqs){
$nt_seqs{$nt_seq->display_id} = $nt_seq;
$desc_lookup{$nt_seq->display_id} = $nt_seq->desc;
}
my @aa_seqs;
foreach my $seq (@$seqs) {
push (@aa_seqs, $seq->translate(undef, undef, undef, $self->genetic_code));
}
my $alignment = $clustalw->align(\@aa_seqs);
my @aligned_seqs;
my $longest_seq = 0;
foreach my $aligned_seq ($alignment->each_seq){
$aligned_seq->desc($desc_lookup{$aligned_seq->display_id});
push(@aligned_seqs, $aligned_seq);
$longest_seq = $aligned_seq->length if $aligned_seq->length > $longest_seq;
}
foreach my $aligned_seq (@aligned_seqs) {
my $nt_work_seq = $nt_seqs{$aligned_seq->display_id}->seq;
$nt_work_seq =~ s/(...)/$1:/g;
my @nt_seq_array = split /:/, $nt_work_seq;
my @aa_seq_array = split //, $aligned_seq->seq;
while (scalar @aa_seq_array < $longest_seq){
push(@aa_seq_array, '-');
print "Adding gap to make sequences same length.\n";
}
my $aligned_nt_string = '';
foreach my $aa (@aa_seq_array) {
if ($aa eq '.' || $aa eq '-'){
$aligned_nt_string .= '---';
} else {
my $codon = shift @nt_seq_array;
unless ($codon =~ /.../){$codon = '---';}
$aligned_nt_string .= $codon;
}
}
if (scalar @nt_seq_array == 1 && $nt_seq_array[-1] =~ /.../){
$aligned_nt_string .= join '', @nt_seq_array;
}
my $aligned_nt_bioseq = Bio::Seq->new(-display_id => $aligned_seq->display_id,
-desc => $aligned_seq->desc,
-seq => $aligned_nt_string);
$self->_alignment('add', $aligned_nt_bioseq);
}
return $self->_alignment; } |
sub sequences
{ my $self = shift;
if (@_) {
$self->{_sequences} = shift;
foreach my $seq (@{$self->{_sequences}}){
throw("Unexpected sequence type. Should be Bio::Seq, " .
"but was [$seq]")
unless $seq->isa("Bio::Seq");
}
}
return $self->{_sequences}
}
} |
sub sequences_from_file
{ my $self = shift;
my $seqio = Bio::SeqIO->new(-file => $self->filename,
-format => 'fasta');
my @seqs;
while (my $seq = $seqio->next_seq){
push @seqs, $seq;
}
$self->sequences(\@seqs);
return 1 } |