None available.
sub _alignments
{ my $self = shift;
if (@_) {
$self->{_aligns} = shift;
throw("Was expecting alignments to be " .
"Bio::EnsEMBL::Pipeline::Alignment objects.")
unless (scalar @{$self->{_aligns}} > 0 &&
defined $self->{_aligns}->[0] &&
$self->{_aligns}->[0]->isa("Bio::EnsEMBL::Pipeline::Alignment"));
$self->{_align_vs_id} = undef;
}
return $self->{_aligns} } |
sub _check_ids
{ my $self = shift;
my %alignment_names;
my $verdict = 1;
foreach my $align (@{$self->_alignments}){
$alignment_names{$align->alignment_name}++;
}
foreach my $seq (@{$self->_seqs}){
$verdict = 0
unless $alignment_names{$seq->display_id}
}
return $verdict } |
sub _derive_informative_site_alignment
{ my ($self, $seq, $informative_sites_string) = @_;
my $gene_string = $seq->seq;
my $gene_length = length($gene_string);
my $ext_align = $self->_retrieve_align_by_id($seq->display_id);
my %evidence_strings;
foreach my $align_seq (@{$ext_align->fetch_AlignmentSeqs}){
$evidence_strings{$align_seq->name} = $align_seq->seq;
}
my $align_length = length($evidence_strings{'genomic_sequence'});
my %aligned_informative_site_strings;
my $exon_string = $evidence_strings{'exon_sequence'};
$exon_string =~ s/intron-truncated/----------------/g;
my $align_coord = 0;
my $lost = 1;
for (my $gene_coord = 0; $gene_coord < $gene_length; $gene_coord++) {
if ($lost) {
my $gene_seed_string = substr($gene_string, $gene_coord, 10);
LOST:
while (1){
my $exon_seed_string = substr($exon_string, $align_coord, 10);
if ($exon_seed_string =~ /^(-+)/){
$align_coord += length($1);
next LOST;
}
throw("Have run off the end of the alignment without matching the gene seed. Bugger.")
if ($align_coord >= $align_length);
$exon_seed_string =~ s/-//g;
my $increment = 0;
EXTENTION:
while (length ($exon_seed_string) < 8){
$increment++;
$exon_seed_string = substr($exon_string, $align_coord, 10 + $increment);
if ($exon_seed_string =~ /-$/){
$increment++;
$exon_seed_string =~ s/-//g;
next EXTENTION
}
$exon_seed_string =~ s/-//g;
throw("Have run off the end of the alignment without matching the gene seed. Bugger.")
if ($align_coord + 10 + $increment >= $align_length);
}
if ($gene_seed_string =~ /^$exon_seed_string/) {
$lost = 0;
last LOST
}
$align_coord++;
}
}
throw("Gene and aligned exon sequence have unexpectedly become unaligned.")
unless substr($gene_string, $gene_coord, 1) eq substr($exon_string, $align_coord, 1);
if (substr($informative_sites_string, $gene_coord, 1) eq '*'){
foreach my $evidence_id (keys %evidence_strings) {
$aligned_informative_site_strings{$evidence_id} .=
substr($evidence_strings{$evidence_id}, $align_coord, 1)
}
}
else {
}
my $hop = 1;
if (($gene_coord + 1 <= $gene_length)&&
(substr($exon_string, $align_coord + $hop, 1) ne '-')) {
$align_coord++;
next
} else {
while (substr($exon_string, $align_coord + $hop, 1) eq '-'){
$hop++;
while (substr($exon_string, $align_coord + $hop, 10) eq '-' x 10) {
$hop += 10;
while (substr($exon_string, $align_coord + $hop, 100) eq '-' x 100) {
$hop += 100;
}
}
}
$align_coord += $hop;
my $exon_neighbourhood = substr($exon_string, $align_coord, 10);
$exon_neighbourhood =~ s/-//g;
if ((! substr($gene_string, $gene_coord + 1, 10) =~ /^$exon_neighbourhood/)
&& ($gene_coord < (length($gene_string) - 1))) {
warning("Erk. Have managed to become lost.");
$lost = 1;
}
}
}
my $informative_sites_alignment =
Bio::EnsEMBL::Pipeline::Alignment->new(
-name => $seq->display_id);
foreach my $evidence_id (keys %aligned_informative_site_strings) {
my $new_is_seq =
Bio::EnsEMBL::Pipeline::Alignment::AlignmentSeq->new(
-name => $evidence_id,
-seq => $aligned_informative_site_strings{$evidence_id});
$informative_sites_alignment->add_sequence($new_is_seq);
}
return $informative_sites_alignment; } |
sub _informative_site_strings
{ my $self = shift;
my $cba =
Bio::EnsEMBL::Pipeline::GeneDuplication::CodonBasedAlignment->new(
-genetic_code => 1);
$cba->sequences($self->_seqs);
my $aligned_seqs = $cba->run_alignment;
my @seq_arrays;
my @seq_name;
foreach my $realigned_seq (@$aligned_seqs) {
my @seq_array = split //, $realigned_seq->seq;
push @seq_arrays,\@ seq_array;
push @seq_name, $realigned_seq->display_id;
}
my $alignment_length = length($aligned_seqs->[0]->seq);
my %informative_site_string;
for (my $i = 0; $i < $alignment_length; $i++) {
my $informative_site = 0;
my $reference_base = $seq_arrays[0]->[$i];
for (my $j = 1; $j < scalar @seq_arrays; $j++) {
if ($seq_arrays[$j]->[$i] ne $reference_base) {
$informative_site = 1;
last
}
}
for (my $j = 0; $j < scalar @seq_arrays; $j++) {
if ($informative_site && $seq_arrays[$j]->[$i] ne '-') {
$informative_site_string{$seq_name[$j]} .= '*';
} elsif ($seq_arrays[$j]->[$i] ne '-') {
$informative_site_string{$seq_name[$j]} .= '-';
}
}
}
@seq_arrays = undef;
return\% informative_site_string } |
sub _retrieve_align_by_id
{ my ($self, $id) = @_;
unless (defined $self->{_align_vs_id}) {
foreach my $align (@{$self->_alignments}){
$self->{_align_vs_id}->{$align->alignment_name} = $align
}
}
warning ("Alignment with identifier [$id] does not exist.")
unless $self->{_align_vs_id}->{$id};
return $self->{_align_vs_id}->{$id} } |
sub _seqs
{ my $self = shift;
if (@_) {
$self->{_seqs} = shift;
throw("Was expecting the translatable sequences to be Bio::Seq objects.")
unless (scalar @{$self->{_seqs}} > 0 &&
defined $self->{_seqs}->[0] &&
$self->{_seqs}->[0]->isa("Bio::Seq"));
foreach my $seq (@{$self->{_seqs}}){
throw("Gaps found in translatable sequence. These should be gap-free.")
if $seq->seq =~ /-/;
}
}
return $self->{_seqs} } |
sub informative_sites
{ my $self = shift;
my $gene_informative_sites = $self->_informative_site_strings;
my %inform_site_aligns;
foreach my $seq (@{$self->_seqs}) {
next
unless $gene_informative_sites->{$seq->display_id} =~ /\*/;
$inform_site_aligns{$seq->display_id} =
$self->_derive_informative_site_alignment(
$seq,
$gene_informative_sites->{$seq->display_id});
}
return\% inform_site_aligns } |
sub new
{ my ($class, @args) = @_;
my $self = bless {},$class;
my ($translatable_seqs,
$alignments
) = rearrange([qw(TRANSLATABLE_SEQS
ALIGNMENTS
)],@args);
throw("No sequences to align.")
unless $translatable_seqs;
$self->_seqs($translatable_seqs);
throw("No alignments to reconcile.")
unless (defined $alignments);
$self->_alignments($alignments);
throw("Ids of translatable sequences do not fully match with " .
"alignment ids. This sometimes happens because one of " .
"the clustered genes has zero EST matches and hence no " .
"alignment.")
unless $self->_check_ids;
return $self } |