CodonBasedAlignment documentation.

use Bio::EnsEMBL::Pipeline::GeneDuplication::CodonBasedAlignment;
# Create our object, specifying the genetic code needed to
# translate our sequences. The commonest codes are:
# universal => 1
# vertebrate mitochondrial => 2
# These numbers are the same as those you need to specify to
# translate any Bio::Seq object. Hence if you need a truly
# oddball genetic code, check the Bio::Seq module documentation.
my $cba =
Bio::EnsEMBL::Pipeline::GeneDuplication::CodonBasedAlignment->new(
-genetic_code => 1,
-seqs => \@seqs);
# The sequences passed to the module should be a reference to
# an array of Bio::Seq objects. Everything is going to fall apart
# if you pass amino acid sequences here, so make sure they are
# nucleotide sequences.
# To run the the actual alignment process do the following.
# The return value is an array of Bio::Seq objects, that
# when dumped to a multiple fasta file or the like, will
# display a multiple alignment.
my $align = $cba->run_alignment;
foreach my $seq (@$align){
my $string = $seq->seq;
$string =~ s/(.{60})/$1\n/g;

  print STDOUT ">" . $seq->display_id . "\n$string";
}

sub _alignment {

  my ($self, $action, $seq) = @_;

  if (defined $action && $action eq 'add') {
    push (@{$self->{_alignment}}, $seq);
  } elsif (defined $action && $action eq 'clear') {
    $self->{_alignment} = [];
  } elsif (defined $action) {
    warning("CodonBasedAlignment::_alignment - unknown action specified ".
		"[$action].  Must be\' add\' or\' clear\'");
  }
  
  return $self->{_alignment};

}

sub new {

  my ($class, @args) = @_;

  my $self = bless {},$class;

  my ($genetic_code,
      $seqs,
     ) = rearrange([qw(GENETIC_CODE
		       SEQS
		      )], @args);

  unless (defined $genetic_code){
    warning("Genetic code not specified.  Defaulting " . 
	    "to the so-called universal code.");
    $genetic_code = 1;
  }

  $self->genetic_code($genetic_code);

  $self->sequences($seqs) if defined $seqs;

  return $self;

}

sub run_alignment {

  my ($self) = @_;

  my $clustalw = Bio::Tools::Run::Alignment::Clustalw->new();

  my $seqs = $self->sequences;

  unless (scalar @$seqs > 1) {
    die "It takes more than one sequence to clustalw";
    return 0
  }

  my %nt_seqs;
  my %desc_lookup;

  foreach my $nt_seq (@$seqs){
    $nt_seqs{$nt_seq->display_id}     = $nt_seq; 
    $desc_lookup{$nt_seq->display_id} = $nt_seq->desc;
  }

  my @aa_seqs;
  foreach my $seq (@$seqs) {
    push (@aa_seqs, $seq->translate(undef, undef, undef, $self->genetic_code));
  }

  my $alignment = $clustalw->align(\@aa_seqs);

  my @aligned_seqs;

  my $longest_seq = 0;
  foreach my $aligned_seq ($alignment->each_seq){
    # Tack description lines back onto our aligned sequences.
    $aligned_seq->desc($desc_lookup{$aligned_seq->display_id});

    push(@aligned_seqs, $aligned_seq);
    $longest_seq = $aligned_seq->length if $aligned_seq->length > $longest_seq;
  }

  foreach my $aligned_seq (@aligned_seqs) {
    my $nt_work_seq = $nt_seqs{$aligned_seq->display_id}->seq;
    $nt_work_seq =~ s/(...)/$1:/g;
    my @nt_seq_array = split /:/, $nt_work_seq;
    my @aa_seq_array = split //, $aligned_seq->seq;

    while (scalar @aa_seq_array < $longest_seq){
      push(@aa_seq_array, '-');
      print "Adding gap to make sequences same length.\n";
    }

#    throw("nt and aa sequences are not of corresponding lengths.") 
#      if scalar @nt_seq_array != scalar @aa_seq_array;

    my $aligned_nt_string = '';

    foreach my $aa (@aa_seq_array) {
      if ($aa eq '.' || $aa eq '-'){
	$aligned_nt_string .= '---';
      } else {
	my $codon = shift @nt_seq_array;

	# Pad out any partial codon (some do exist).
	unless ($codon =~ /.../){$codon = '---';}

	$aligned_nt_string .= $codon;
      }
    }

    # If the nt sequence included a stop codon, tack this
    # on here (the aa sequence will be missing this).

    if (scalar @nt_seq_array == 1 && $nt_seq_array[-1] =~ /.../){
      $aligned_nt_string .= join '', @nt_seq_array;
    }

    my $aligned_nt_bioseq = Bio::Seq->new(-display_id => $aligned_seq->display_id,
					  -desc       => $aligned_seq->desc,
					  -seq        => $aligned_nt_string);

    $self->_alignment('add', $aligned_nt_bioseq);
  }

  return $self->_alignment;

}

_alignment	No description	Code
filename	No description	Code
genetic_code	No description	Code
new	No description	Code
run_alignment	No description	Code
sequences	No description	Code
sequences_from_file	No description	Code