Raw content of Bio::EnsEMBL::Compara::AlignSlice::Exon
#
# Ensembl module for Bio::EnsEMBL::Compara::AlignSlice::Exon
#
# Original author: Javier Herrero
#
# Copyright EnsEMBL Team
#
# You may distribute this module under the same terms as perl itself
# pod documentation - main docs before the code
=head1 NAME
Bio::EnsEMBL::Compara::AlignSlice::Exon - Description
=head1 INHERITANCE
This module inherits attributes and methods from Bio::EnsEMBL::Exon module
=head1 SYNOPSIS
use Bio::EnsEMBL::Compara::AlignSlice::Exon;
my $exon = new Bio::EnsEMBL::Compara::AlignSlice::Exon(
);
SET VALUES
GET VALUES
=head1 OBJECT ATTRIBUTES
=over
=item exon
original Bio::EnsEMBL::Exon object
=item slice
Bio::EnsEMBL::Slice object on which this Bio::EnsEMBL::Compara::AlignSlice::Exon is defined
=item cigar_line
A string describing the mapping of this exon on the slice
=item phase
This exon results from the mapping of a real exon. It may suffer indels and duplications
during the process which makes this mapped exon unreadable by a translation machinery.
The phase is set to -1 by default.
=item end_phase
This exon results from the mapping of a real exon. It may suffer indels and duplications
during the process which makes this mapped exon unreadable by a translation machinery.
The end_phase is set to -1 by default.
=back
=head1 AUTHORS
Javier Herrero (jherrero@ebi.ac.uk)
=head1 COPYRIGHT
Copyright (c) 2004. EnsEMBL Team
You may distribute this module under the same terms as perl itself
=head1 CONTACT
This modules is part of the EnsEMBL project ()
Questions can be posted to the ensembl-dev mailing list:
ensembl-dev@ebi.ac.uk
=head1 APPENDIX
The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _
=cut
# Let the code begin...
package Bio::EnsEMBL::Compara::AlignSlice::Exon;
use strict;
use Bio::EnsEMBL::Exon;
use Bio::EnsEMBL::Utils::Argument qw(rearrange);
use Bio::EnsEMBL::Utils::Exception qw(throw warning info);
our @ISA = qw(Bio::EnsEMBL::Exon);
=head2 new (CONSTRUCTOR)
Arg[1] : a reference to a hash where keys can be:
-exon
-adaptor
-reference_slice
Example : my $align_slice =
new Bio::EnsEMBL::Compara::AlignSlice(
-exon => $original_exon
-reference_slice => $reference_slice,
);
Description: Creates a new Bio::EnsEMBL::AlignSlice::Exon object
Returntype : Bio::EnsEMBL::Compara::AlignSlice::Exon object
Exceptions : return an object with no start, end nor strand if the
exon cannot be mapped on the reference Slice.
=cut
sub new {
my ($class, @args) = @_;
my $self = {};
bless($self, $class);
my ($exon, $align_slice, $from_mapper, $to_mapper, $original_rank) =
rearrange([qw(
EXON ALIGN_SLICE FROM_MAPPER TO_MAPPER ORIGINAL_RANK
)], @args);
$self->exon($exon) if (defined ($exon));
# $self->genomic_align($genomic_align) if (defined($genomic_align));
# $self->from_genomic_align_id($from_genomic_align_id) if (defined($from_genomic_align_id));
# $self->to_genomic_align_id($to_genomic_align_id) if (defined($to_genomic_align_id));
$self->slice($align_slice) if (defined($align_slice));
$self->original_rank($original_rank) if (defined($original_rank));
$self->phase(-1);
$self->end_phase(-1);
return $self->map_Exon_on_Slice($from_mapper, $to_mapper);
}
=head2 copy (CONSTRUCTOR)
Arg[1] : none
Example : my $new_align_slice = $old_align_slice->copy()
Description: Creates a new Bio::EnsEMBL::AlignSlice::Exon object which
is an exact copy of the calling object
Returntype : Bio::EnsEMBL::Compara::AlignSlice::Exon object
Exceptions :
Caller : $obeject->methodname
=cut
sub copy {
my ($self) = @_;
my $copy;
while (my ($key, $value) = each %$self) {
$copy->{$key} = $value;
}
bless($copy, ref($self));
return $copy;
}
=head2 slice
Arg[1] : (optional) Bio::EnsEMBL::Slice $reference_slice
Example : $align_exon->slice($reference_slice);
Example : $reference_slice = $align_exon->slice();
Description: Get/set the attribute slice. This method is overloaded in order
to map original coordinates onto the reference Slice.
Returntype : Bio::EnsEMBL::Slice object
Exceptions :
=cut
sub slice {
my ($self, $slice) = @_;
if (defined($slice)) {
$self->{'slice'} = $slice;
}
return $self->{'slice'};
}
=head2 original_rank
Arg[1] : (optional) integer $original_rank
Example : $align_exon->original_rank(5);
Example : $original_rank = $align_exon->original_rank();
Description: Get/set the attribute original_rank. The orignal_rank
is the position of the orginal Exon in the original
Transcript
Returntype : integer
Exceptions :
=cut
sub original_rank {
my ($self, $original_rank) = @_;
if (defined($original_rank)) {
$self->{'original_rank'} = $original_rank;
}
return $self->{'original_rank'};
}
=head2 map_Exon_on_Slice
Arg[1] : Bio::EnsEMBL::Mapper $from_mapper
Arg[2] : Bio::EnsEMBL::Mapper $to_mapper
Example : $align_exon->map_Exon_on_Slice($from_mapper, $to_mapper);
Description: This function takes the original exon and maps it on the slice
using the mappers.
Returntype : Bio::EnsEMBL::Compara::AlignSlice::Exon object
Exceptions : returns undef if not enough information is provided
Exceptions : returns undef if no piece of the original exon can be mapped.
Caller : new
=cut
sub map_Exon_on_Slice {
my ($self, $from_mapper, $to_mapper) = @_;
my $original_exon = $self->exon;
my $slice = $self->slice;
if (!defined($slice) or !defined($original_exon) or !defined($from_mapper)) {
return $self;
}
my @alignment_coords = $from_mapper->map_coordinates(
"sequence", # $self->genomic_align->dbID,
$original_exon->slice->start + $original_exon->start - 1,
$original_exon->slice->start + $original_exon->end - 1,
$original_exon->strand,
"sequence" # $from_mapper->from
);
my $aligned_start;
my $aligned_end;
my $aligned_strand = 0;
my $aligned_sequence = "";
my $aligned_cigar = "";
my $global_alignment_coord_start;
my $global_alignment_coord_end;
my $last_alignment_coord_end;
my $last_alignment_coord_start;
foreach my $alignment_coord (@alignment_coords) {
## $alignment_coord refer to genomic_align_block: (1 to genomic_align_block->length) [+]
if ($alignment_coord->isa("Bio::EnsEMBL::Mapper::Coordinate")) {
if ($alignment_coord->strand == 1) {
if ($last_alignment_coord_end) {
# Consider gap between this piece and the previous as a deletion
my $length = $alignment_coord->start - $last_alignment_coord_end - 1;
$aligned_cigar .= $length if ($length>1);
$aligned_cigar .= "D" if ($length);
}
$last_alignment_coord_end = $alignment_coord->end;
$global_alignment_coord_start = $alignment_coord->start if (!$global_alignment_coord_start);
$global_alignment_coord_end = $alignment_coord->end;
} else {
if ($last_alignment_coord_start) {
# Consider gap between this piece and the previous as a deletion
my $length = $last_alignment_coord_start - $alignment_coord->end - 1;
$aligned_cigar .= $length if ($length>1);
$aligned_cigar .= "D" if ($length);
}
$last_alignment_coord_start = $alignment_coord->start;
$global_alignment_coord_end = $alignment_coord->end if (!$global_alignment_coord_end);
$global_alignment_coord_start = $alignment_coord->start;
}
} else {
# This piece is outside of the alignment -> consider as an insertion
my $length = $alignment_coord->length;
$aligned_cigar .= $length if ($length>1);
$aligned_cigar .= "I" if ($length);
next;
}
if (!defined($to_mapper)) {
## Mapping on the alignment (expanded mode)
if ($alignment_coord->strand == 1) {
$aligned_strand = 1;
$aligned_start = $alignment_coord->start if (!$aligned_start);
$aligned_end = $alignment_coord->end;
} else {
$aligned_strand = -1;
$aligned_start = $alignment_coord->start;
$aligned_end = $alignment_coord->end if (!$aligned_end);
}
my $num = $alignment_coord->end - $alignment_coord->start + 1;
$aligned_cigar .= $num if ($num > 1);
$aligned_cigar .= "M" if ($num);
} else {
## Mapping on the reference_Slice (collapsed mode)
my @mapped_coords = $to_mapper->map_coordinates(
"alignment", # $self->genomic_align->genomic_align_block->dbID,
$alignment_coord->start,
$alignment_coord->end,
$alignment_coord->strand,
"alignment" # $to_mapper->to
);
foreach my $mapped_coord (@mapped_coords) {
## $mapped_coord refer to reference_slice
if ($mapped_coord->isa("Bio::EnsEMBL::Mapper::Coordinate")) {
if ($alignment_coord->strand == 1) {
$aligned_strand = 1;
$aligned_start = $mapped_coord->start if (!$aligned_start);
$aligned_end = $mapped_coord->end;
} else {
$aligned_strand = -1;
$aligned_start = $mapped_coord->start;
$aligned_end = $mapped_coord->end if (!$aligned_end);
}
my $num = $mapped_coord->end - $mapped_coord->start + 1;
$aligned_cigar .= $num if ($num > 1);
$aligned_cigar .= "M" if ($num);
} else {
my $num = $mapped_coord->end - $mapped_coord->start + 1;
$aligned_cigar .= $num if ($num > 1);
$aligned_cigar .= "I" if ($num);
}
}
}
}
if ($aligned_strand == 0) {
## the whole sequence maps on a gap
$self->{start} = undef;
$self->{end} = undef;
$self->{strand} = undef;
return $self;
}
## Set coordinates on "slice" coordinates
$self->start($aligned_start - $slice->start + 1);
$self->end($aligned_end - $slice->start + 1);
$self->strand($aligned_strand);
$self->cigar_line($aligned_cigar);
if ($self->start > $slice->length or $self->end < 1) {
$self->{start} = undef;
$self->{end} = undef;
$self->{strand} = undef;
return $self;
}
return $self;
}
=head2 exon
Arg[1] : (optional) Bio::EnsEMBL::Exon $original_exon
Example : $align_exon->exon($original_exon);
Example : $start = $align_exon->start();
Description: Get/set the attribute start. This method is overloaded in order to
return the starting postion on the AlignSlice instead of the
original one. Original starting position may be retrieved using
the SUPER::start() method or the orginal_start() method
Returntype : Bio::EnsEMBL::Exon object
Exceptions :
=cut
sub exon {
my ($self, $exon) = @_;
if (defined($exon)) {
$self->{'exon'} = $exon;
$self->stable_id($exon->stable_id) if (defined($exon->stable_id));
}
return $self->{'exon'};
}
=head2 cigar_line
Arg[1] : (optional) string $cigar_line
Example : $align_exon->cigar_line($cigar_line);
Example : $cigar_line = $align_exon->cigar_line();
Description: Get/set the attribute cigar_line.
Returntype : string
Exceptions : none
Caller : object->methodname
=cut
sub cigar_line {
my ($self, $cigar_line) = @_;
if (defined($cigar_line)) {
$self->{'cigar_line'} = $cigar_line;
}
return $self->{'cigar_line'};
}
sub get_aligned_start {
my ($self) = @_;
my $cigar_line = $self->cigar_line;
if (defined($cigar_line)) {
my @cig = ( $cigar_line =~ /(\d*[GMDI])/g );
my $cigType = substr( $cig[0], -1, 1 );
my $cigCount = substr( $cig[0], 0 ,-1 );
$cigCount = 1 unless ($cigCount =~ /^\d+$/);
next if ($cigCount == 0);
if ($cigType eq "I") {
return (1 + $cigCount);
} else {
return 1;
}
}
return undef;
}
sub get_aligned_end {
my ($self) = @_;
my $cigar_line = $self->cigar_line;
if (defined($cigar_line)) {
my @cig = ( $cigar_line =~ /(\d*[GMDI])/g );
my $cigType = substr( $cig[-1], -1, 1 );
my $cigCount = substr( $cig[-1], 0 ,-1 );
$cigCount = 1 unless ($cigCount =~ /^\d+$/);
next if ($cigCount == 0);
if ($cigType eq "I") {
return ($self->exon->end - $self->exon->start + 1 - $cigCount);
} else {
return ($self->exon->end - $self->exon->start + 1);
}
}
return undef;
}
=head2 seq
Arg [1] : none
Example : my $seq_str = $exon->seq->seq;
Description: Retrieves the dna sequence of this Exon.
Returned in a Bio::Seq object. Note that the sequence may
include UTRs (or even be entirely UTR).
Returntype : Bio::Seq or undef
Exceptions : warning if argument passed,
warning if exon does not have attatched slice
warning if exon strand is not defined (or 0)
Caller : general
=cut
sub seq {
my ($self, $seq) = @_;
if (defined($seq)) {
$self->{'_seq_cache'} = $seq->seq();
}
## Use _template_seq if defined. It is a concatenation of several original
## exon sequences and is produced during the merging of align_exons.
if(!defined($self->{'_seq_cache'})) {
my $seq = &_get_aligned_sequence_from_original_sequence_and_cigar_line(
($self->{'_template_seq'} or $self->exon->seq->seq),
$self->cigar_line,
"ref"
);
$self->{'_seq_cache'} = $seq;
}
return Bio::Seq->new(
-seq => $self->{'_seq_cache'},
-id => $self->stable_id,
-moltype => 'dna',
-alphabet => 'dna',
);
}
=head2 append_Exon
Arg [1] :
Example :
Description:
Returntype :
Exceptions :
Caller :
=cut
sub append_Exon {
my ($self, $exon, $gap_length) = @_;
$self->seq(new Bio::Seq(-seq =>
$self->seq->seq.("-"x$gap_length).$exon->seq->seq));
## As it is possible to merge two partially repeated parts of an Exon,
## the merging is done by concatenating both cigar_lines with the right
## number of gaps in the middle. The underlaying sequence must be lengthen
## accordingly. This is stored in the _template_seq private attribute
if (defined($self->{'_template_seq'})) {
$self->{'_template_seq'} .= $self->exon->seq->seq
} else {
$self->{'_template_seq'} = $self->exon->seq->seq x 2;
}
if ($gap_length) {
$gap_length = "" if ($gap_length == 1);
$self->cigar_line(
$self->cigar_line.
$gap_length."D".
$exon->cigar_line);
} else {
$self->cigar_line(
$self->cigar_line.
$exon->cigar_line);
}
return $self;
}
=head2 prepend_Exon
Arg [1] :
Example :
Description:
Returntype :
Exceptions :
Caller :
=cut
sub prepend_Exon {
my ($self, $exon, $gap_length) = @_;
$self->seq(new Bio::Seq(-seq =>
$exon->seq->seq.("-"x$gap_length).$self->seq->seq));
## As it is possible to merge two partially repeated parts of an Exon,
## the merging is done by concatenating both cigar_lines with the right
## number of gaps in the middle. The underlaying sequence must be lengthen
## accordingly. This is stored in the _template_seq private attribute
if (defined($self->{'_template_seq'})) {
$self->{'_template_seq'} .= $self->exon->seq->seq
} else {
$self->{'_template_seq'} = $self->exon->seq->seq x 2;
}
if ($gap_length) {
$gap_length = "" if ($gap_length == 1);
$self->cigar_line(
$exon->cigar_line.
$gap_length."D".
$self->cigar_line);
} else {
$self->cigar_line(
$exon->cigar_line.
$self->cigar_line);
}
return $self;
}
=head2 _get_aligned_sequence_from_original_sequence_and_cigar_line
Arg [1] : string $original_sequence
Arg [1] : string $cigar_line
Example : $aligned_sequence = _get_aligned_sequence_from_original_sequence_and_cigar_line(
"CGTAACTGATGTTA", "3MD8M2D3M")
Description: get gapped sequence from original one and cigar line
Returntype : string $aligned_sequence
Exceptions : thrown if cigar_line does not match sequence length
Caller : methodname
=cut
sub _get_aligned_sequence_from_original_sequence_and_cigar_line {
my ($original_sequence, $cigar_line, $mode) = @_;
my $aligned_sequence = "";
$mode ||= "";
return undef if (!$original_sequence or !$cigar_line);
my $seq_pos = 0;
my @cig = ( $cigar_line =~ /(\d*[GMDI])/g );
for my $cigElem ( @cig ) {
my $cigType = substr( $cigElem, -1, 1 );
my $cigCount = substr( $cigElem, 0 ,-1 );
$cigCount = 1 unless ($cigCount =~ /^\d+$/);
if( $cigType eq "M" ) {
$aligned_sequence .= substr($original_sequence, $seq_pos, $cigCount);
$seq_pos += $cigCount;
} elsif( $cigType eq "G" || $cigType eq "D") {
$aligned_sequence .= "-" x $cigCount;
} elsif( $cigType eq "I") {
$aligned_sequence .= "-" x $cigCount if ($mode ne "ref");
$seq_pos += $cigCount;
}
}
throw("Cigar line ($seq_pos) does not match sequence lenght (".length($original_sequence).")") if ($seq_pos != length($original_sequence));
return $aligned_sequence;
}
1;