Raw content of Bio::EnsEMBL::Compara::GenomicAlign
#
# Ensembl module for Bio::EnsEMBL::Compara::GenomicAlign
#
# Cared for by Ewan Birney
#
# Copyright Ewan Birney
#
# You may distribute this module under the same terms as perl itself
# pod documentation - main docs before the code
=head1 NAME
Bio::EnsEMBL::Compara::GenomicAlign - Defines one of the sequences involved in a genomic alignment
=head1 SYNOPSIS
use Bio::EnsEMBL::Compara::GenomicAlign;
my $genomic_align = new Bio::EnsEMBL::Compara::GenomicAlign(
-adaptor => $genomic_align_adaptor,
-genomic_align_block => $genomic_align_block,
-method_link_species_set => $method_link_species_set,
-dnafrag => $dnafrag,
-dnafrag_start => 100001,
-dnafrag_end => 100050,
-dnafrag_strand => -1,
-aligned_sequence => "TTGCAGGTAGGCCATCTGCAAGC----TGAGGAGCAAGGACTCCAGTCGGAGTC"
-level_id => 1,
);
SET VALUES
$genomic_align->adaptor($adaptor);
$genomic_align->dbID(12);
$genomic_align->genomic_align_block($genomic_align_block);
$genomic_align->genomic_align_block_id(1032);
$genomic_align->method_link_species_set($method_link_species_set);
$genomic_align->method_link_species_set_id(3);
$genomic_align->dnafrag($dnafrag);
$genomic_align->dnafrag_id(134);
$genomic_align->dnafrag_start(100001);
$genomic_align->dnafrag_end(100050);
$genomic_align->dnafrag_strand(-1);
$genomic_align->aligned_sequence("TTGCAGGTAGGCCATCTGCAAGC----TGAGGAGCAAGGACTCCAGTCGGAGTC");
$genomic_align->original_sequence("TTGCAGGTAGGCCATCTGCAAGCTGAGGAGCAAGGACTCCAGTCGGAGTC");
$genomic_align->cigar_line("23M4D27M");
$genomic_align->level_id(1);
GET VALUES
$adaptor = $genomic_align->adaptor;
$dbID = $genomic_align->dbID;
$genomic_align_block = $genomic_align->genomic_block;
$genomic_align_block_id = $genomic_align->genomic_align_block_id;
$method_link_species_set = $genomic_align->method_link_species_set;
$method_link_species_set_id = $genomic_align->method_link_species_set_id;
$dnafrag = $genomic_align->dnafrag;
$dnafrag_id = $genomic_align->dnafrag_id;
$dnafrag_start = $genomic_align->dnafrag_start;
$dnafrag_end = $genomic_align->dnafrag_end;
$dnafrag_strand = $genomic_align->dnafrag_strand;
$aligned_sequence = $genomic_align->aligned_sequence;
$original_sequence = $genomic_align->original_sequence;
$cigar_line = $genomic_align->cigar_line;
$level_id = $genomic_align->level_id;
$slice = $genomic_align->get_Slice();
=head1 DESCRIPTION
The GenomicAlign object stores information about a single sequence within an alignment.
=head1 OBJECT ATTRIBUTES
=over
=item dbID
corresponds to genomic_align.genomic_align_id
=item adaptor
Bio::EnsEMBL::Compara::DBSQL::GenomicAlignAdaptor object to access DB
=item genomic_align_block_id
corresponds to genomic_align_block.genomic_align_block_id (ext. reference)
=item genomic_align_block
Bio::EnsEMBL::Compara::DBSQL::GenomicAlignBlock object corresponding to genomic_align_block_id
=item method_link_species_set_id
corresponds to method_link_species_set.method_link_species_set_id (external ref.)
=item method_link_species_set
Bio::EnsEMBL::Compara::DBSQL::MethodLinkSpeciesSet object corresponding to method_link_species_set_id
=item dnafrag_id
corresponds to dnafrag.dnafrag_id (external ref.)
=item dnafrag
Bio::EnsEMBL::Compara::DnaFrag object corresponding to dnafrag_id
=item dnafrag_start
corresponds to genomic_align.dnafrag_start
=item dnafrag_end
corresponds to genomic_align.dnafrag_end
=item dnafrag_strand
corresponds to genomic_align.dnafrag_strand
=item cigar_line
corresponds to genomic_align.cigar_line
=item level_id
corresponds to genomic_align.level_id
=item aligned_sequence
corresponds to the sequence rebuilt using dnafrag and cigar_line
=item original_sequence
corresponds to the original sequence. It can be rebuilt from the aligned_sequence, the dnafrag object or can be used
in conjuction with cigar_line to get the aligned_sequence.
=back
=head1 AUTHOR
Javier Herrero (jherrero@ebi.ac.uk)
=head1 CONTACT
This modules is part of the EnsEMBL project ()
Questions can be posted to the ensembl-dev mailing list:
ensembl-dev@ebi.ac.uk
=head1 APPENDIX
The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _
=cut
# Let the code begin...
package Bio::EnsEMBL::Compara::GenomicAlign;
use strict;
use Bio::EnsEMBL::Utils::Exception qw(throw warning deprecate verbose);
use Bio::EnsEMBL::Utils::Argument qw(rearrange);
use Scalar::Util qw(weaken);
use Bio::EnsEMBL::Compara::GenomicAlignBlock;
use Bio::EnsEMBL::Compara::MethodLinkSpeciesSet;
use Bio::EnsEMBL::Mapper;
=head2 new (CONSTRUCTOR)
Arg [-DBID] : (opt.) int $dbID (the database internal ID for this object)
Arg [-ADAPTOR]
: (opt.) Bio::EnsEMBL::Compara::DBSQL::GenomicAlignAdaptor $adaptor
(the adaptor for connecting to the database)
Arg [-GENOMIC_ALIGN_BLOCK]
: (opt.) Bio::EnsEMBL::Compara::GenomicAlignBlock $genomic_align_block
(the block to which this Bio::EnsEMBL::Compara::GenomicAlign object
belongs to)
Arg [-GENOMIC_ALIGN_BLOCK_ID]
: (opt.) int $genomic_align_block_id (the database internal ID for the
$genomic_align_block)
Arg [-METHOD_LINK_SPECIES_SET]
: (opt.) Bio::EnsEMBL::Compara::MethodLinkSpeciesSet $mlss
(this defines the type of alignment and the set of species used
to get this GenomicAlignBlock)
Arg [-METHOD_LINK_SPECIES_SET_ID]
: (opt.) int $mlss_id (the database internal ID for the $mlss)
Arg [-DNAFRAG]
: (opt.) Bio::EnsEMBL::Compara::DnaFrag $dnafrag (the genomic
sequence object to which this object refers to)
Arg [-DNAFRAG_ID]
: (opt.) int $dnafrag_id (the database internal ID for the $dnafrag)
Arg [-DNAFRAG_START]
: (opt.) int $dnafrag_start (the starting position of this
Bio::EnsEMBL::Compara::GenomicAling within its corresponding $dnafrag)
Arg [-DNAFRAG_END]
: (opt.) int $dnafrag_end (the ending position of this
Bio::EnsEMBL::Compara::GenomicAling within its corresponding $dnafrag)
Arg [-DNAFRAG_STRAND]
: (opt.) int $dnafrag_strand (1 or -1; defines in which strand of its
corresponding $dnafrag this Bio::EnsEMBL::Compara::GenomicAlign is)
Arg [-ALIGNED_SEQUENCE]
: (opt.) string $aligned_sequence (the sequence of this object, including
gaps and all)
Arg [-CIGAR_LINE]
: (opt.) string $cigar_line (a compressed way of representing the indels in
the $aligned_sequence of this object)
Arg [-LEVEL_ID]
: (opt.) int $level_id (level of orhologous layer. 1 corresponds to the first
layer of orthologous sequences found, 2 and over are addiotional layers)
Example : my $genomic_align = new Bio::EnsEMBL::Compara::GenomicAlign(
-adaptor => $genomic_align_adaptor,
-genomic_align_block => $genomic_align_block,
-method_link_species_set => $method_link_species_set,
-dnafrag => $dnafrag,
-dnafrag_start => 100001,
-dnafrag_end => 100050,
-dnafrag_strand => -1,
-aligned_sequence => "TTGCAGGTAGGCCATCTGCAAGC----TGAGGAGCAAGGACTCCAGTCGGAGTC"
-level_id => 1,
);
Description : Creates a new Bio::EnsEMBL::Compara::GenomicAlign object
Returntype : Bio::EnsEMBL::Compara::GenomicAlign object
Exceptions : none
Caller : general
Status : Stable
=cut
sub new {
my($class, @args) = @_;
my $self = {};
bless $self,$class;
my ($cigar_line, $adaptor,
$dbID, $genomic_align_block, $genomic_align_block_id, $method_link_species_set,
$method_link_species_set_id, $dnafrag, $dnafrag_id,
$dnafrag_start, $dnafrag_end, $dnafrag_strand,
$aligned_sequence, $level_id ) =
rearrange([qw(
CIGAR_LINE ADAPTOR
DBID GENOMIC_ALIGN_BLOCK GENOMIC_ALIGN_BLOCK_ID METHOD_LINK_SPECIES_SET
METHOD_LINK_SPECIES_SET_ID DNAFRAG DNAFRAG_ID
DNAFRAG_START DNAFRAG_END DNAFRAG_STRAND
ALIGNED_SEQUENCE LEVEL_ID)], @args);
$self->adaptor( $adaptor ) if defined $adaptor;
$self->cigar_line( $cigar_line ) if defined $cigar_line;
$self->dbID($dbID) if (defined($dbID));
$self->genomic_align_block($genomic_align_block) if (defined($genomic_align_block));
$self->genomic_align_block_id($genomic_align_block_id) if (defined($genomic_align_block_id));
$self->method_link_species_set($method_link_species_set) if (defined($method_link_species_set));
$self->method_link_species_set_id($method_link_species_set_id) if (defined($method_link_species_set_id));
$self->dnafrag($dnafrag) if (defined($dnafrag));
$self->dnafrag_id($dnafrag_id) if (defined($dnafrag_id));
$self->dnafrag_start($dnafrag_start) if (defined($dnafrag_start));
$self->dnafrag_end($dnafrag_end) if (defined($dnafrag_end));
$self->dnafrag_strand($dnafrag_strand) if (defined($dnafrag_strand));
$self->aligned_sequence($aligned_sequence) if (defined($aligned_sequence));
$self->level_id($level_id) if (defined($level_id));
return $self;
}
=head2 new_fast
Arg [1] : hash reference $hashref
Example : none
Description: This is an ultra fast constructor which requires knowledge of
the objects internals to be used.
Returntype :
Exceptions : none
Caller :
Status : Stable
=cut
sub new_fast {
my $class = shift;
my $hashref = shift;
return bless $hashref, $class;
}
=head2 copy (CONSTRUCTOR)
Arg : -none-
Example : my $new_genomic_align = $genomic_align->copy();
Description : Create a new object with the same attributes
as this one.
Returntype : Bio::EnsEMBL::Compara::GenomicAlign (or subclassed) object
Exceptions :
Status : Stable
=cut
sub copy {
my ($self) = @_;
my $new_copy = {};
bless $new_copy, ref($self);
while (my ($key, $value) = each %$self) {
$new_copy->{$key} = $value;
}
return $new_copy;
}
=head2 adaptor
Arg [1] : Bio::EnsEMBL::Compara::DBSQL::GenomicAlignAdaptor
Example : $adaptor = $genomic_align->adaptor;
Example : $genomic_align->adaptor($adaptor);
Description: Getter/Setter for the adaptor this object uses for database
interaction.
Returntype : Bio::EnsEMBL::Compara::DBSQL::GenomicAlignAdaptor
Exceptions : thrown if $adaptor is not a
Bio::EnsEMBL::Compara::DBSQL::GenomicAlignAdaptor object
Caller : object->methodname
Status : Stable
=cut
sub adaptor {
my $self = shift;
if (@_) {
$self->{'adaptor'} = shift;
throw($self->{'adaptor'}." is not a Bio::EnsEMBL::Compara::DBSQL::GenomicAlignAdaptor object")
if ($self->{'adaptor'} and !UNIVERSAL::isa($self->{'adaptor'}, "Bio::EnsEMBL::Compara::DBSQL::GenomicAlignAdaptor"));
}
return $self->{'adaptor'};
}
=head2 dbID
Arg [1] : integer $dbID
Example : $dbID = $genomic_align->dbID;
Example : $genomic_align->dbID(12);
Description: Getter/Setter for the attribute dbID
Returntype : integer
Exceptions : none
Caller : object->methodname
Status : Stable
=cut
sub dbID {
my ($self, $dbID) = @_;
if (defined($dbID)) {
$self->{'dbID'} = $dbID;
}
return $self->{'dbID'};
}
=head2 genomic_align_block
Arg [1] : Bio::EnsEMBL::Compara::GenomicAlignBlock $genomic_align_block
Example : $genomic_align_block = $genomic_align->genomic_align_block;
Example : $genomic_align->genomic_align_block($genomic_align_block);
Description: Getter/Setter for the attribute genomic_align_block
Returntype : Bio::EnsEMBL::Compara::GenomicAlignBlock object. If no
argument is given, the genomic_align_block is not defined but
both the genomic_align_block_id and the adaptor are, it tries
to fetch the data using the genomic_align_block_id.
Exception : throws if $genomic_align_block is not a
Bio::EnsEMBL::Compara::GenomicAlignBlock object or if
$genomic_align_block does not match a previously defined
genomic_align_block_id
Warning : warns if getting data from other sources fails.
Caller : object->methodname
Status : Stable
=cut
sub genomic_align_block {
my ($self, $genomic_align_block) = @_;
if (defined($genomic_align_block)) {
throw("$genomic_align_block is not a Bio::EnsEMBL::Compara::GenomicAlignBlock object")
if (!$genomic_align_block->isa("Bio::EnsEMBL::Compara::GenomicAlignBlock"));
weaken($self->{'genomic_align_block'} = $genomic_align_block);
## Add adaptor to genomic_align_block object if possible and needed
if (!defined($genomic_align_block->{'adaptor'}) and defined($self->{'adaptor'})) {
$genomic_align_block->adaptor($self->adaptor->db->get_GenomicAlignBlockAdaptor);
}
if ($self->{'genomic_align_block_id'}) {
if (!$self->{'genomic_align_block'}->{'dbID'}) {
$self->{'genomic_align_block'}->dbID($self->{'genomic_align_block_id'});
}
# warning("Defining both genomic_align_block_id and genomic_align_block");
throw("dbID of genomic_align_block object does not match previously defined".
" genomic_align_block_id. If you want to override a".
" Bio::EnsEMBL::Compara::GenomicAlign object, you can reset the ".
"genomic_align_block_id using \$genomic_align->genomic_align_block_id(0)")
if ($self->{'genomic_align_block'}->{'dbID'} != $self->{'genomic_align_block_id'});
} else {
$self->{'genomic_align_block_id'} = $genomic_align_block->{'dbID'};
}
} elsif (!defined($self->{'genomic_align_block'})) {
# Try to get the genomic_align_block from other sources...
if (defined($self->genomic_align_block_id) and defined($self->{'adaptor'})) {
# ...from the genomic_align_block_id. Uses genomic_align_block_id function
# and not the attribute in the clause because the attribute can be retrieved from other
# sources if it has not been set before.
my $genomic_align_block_adaptor = $self->{'adaptor'}->db->get_GenomicAlignBlockAdaptor;
$self->{'genomic_align_block'} = $genomic_align_block_adaptor->fetch_by_dbID(
$self->{'genomic_align_block_id'});
} else {
# warning("Fail to get data from other sources in Bio::EnsEMBL::Compara::GenomicAlign->genomic_align_block".
# " You either have to specify more information (see perldoc for".
# " Bio::EnsEMBL::Compara::GenomicAlign) or to set it up directly");
}
}
return $self->{'genomic_align_block'};
}
=head2 genomic_align_block_id
Arg [1] : integer $genomic_align_block_id
Example : $genomic_align_block_id = $genomic_align->genomic_align_block_id;
Example : $genomic_align->genomic_align_block_id(1032);
Description: Getter/Setter for the attribute genomic_align_block_id. If no
argument is given and the genomic_align_block_id is not defined, it
tries to get the data from other sources like the corresponding
Bio::EnsEMBL::Compara::GenomicAlignBlock object or the database using
the dbID of the Bio::EnsEMBL::Compara::GenomicAlign object.
Use 0 as argument to clear this attribute.
Returntype : integer
Exceptions : thrown if $genomic_align_block_id does not match a previously defined
genomic_align_block
Warning : warns if getting data from other sources fails.
Caller : object->methodname
Status : Stable
=cut
sub genomic_align_block_id {
my ($self, $genomic_align_block_id) = @_;
if (defined($genomic_align_block_id)) {
$self->{'genomic_align_block_id'} = ($genomic_align_block_id or undef);
if (defined($self->{'genomic_align_block'}) and $self->{'genomic_align_block_id'}) {
# warning("Defining both genomic_align_block_id and genomic_align_block");
throw("genomic_align_block_id does not match previously defined genomic_align_block object")
if ($self->{'genomic_align_block'} and
$self->{'genomic_align_block'}->dbID != $self->{'genomic_align_block_id'});
}
} elsif (!($self->{'genomic_align_block_id'})) {
# Try to get the ID from other sources...
if (defined($self->{'genomic_align_block'}) and defined($self->{'genomic_align_block'}->dbID)) {
# ...from the corresponding Bio::EnsEMBL::Compara::GenomicAlignBlock object
$self->{'genomic_align_block_id'} = $self->{'genomic_align_block'}->dbID;
} elsif (defined($self->{'adaptor'}) and defined($self->{'dbID'})) {
# ...from the database using the dbID of the Bio::EnsEMBL::Compara::GenomicAlign object
$self->adaptor->retrieve_all_direct_attributes($self);
} else {
# warning("Fail to get data from other sources in Bio::EnsEMBL::Compara::GenomicAlign->genomic_align_block_id".
# " You either have to specify more information (see perldoc for".
# " Bio::EnsEMBL::Compara::GenomicAlign) or to set it up directly");
}
}
return $self->{'genomic_align_block_id'};
}
=head2 method_link_species_set
Arg [1] : Bio::EnsEMBL::Compara::MethodLinkSpeciesSet $method_link_species_set
Example : $method_link_species_set = $genomic_align->method_link_species_set;
Example : $genomic_align->method_link_species_set($method_link_species_set);
Description: Getter/Setter for the attribute method_link_species_set. If no
argument is given and the method_link_species_set is not defined, it
tries to get the data from other sources like the corresponding
Bio::EnsEMBL::Compara::GenomicAlignBlock object or from
the method_link_species_set_id.
Returntype : Bio::EnsEMBL::Compara::MethodLinkSpeciesSet object
Exceptions : thrown if $method_link_species_set is not a
Bio::EnsEMBL::Compara::MethodLinkSpeciesSet object or if
$method_link_species_set does not match a previously defined
method_link_species_set_id
Warning : warns if getting data from other sources fails.
Caller : object->methodname
Status : Stable
=cut
sub method_link_species_set {
my ($self, $method_link_species_set) = @_;
if (defined($method_link_species_set)) {
throw("$method_link_species_set is not a Bio::EnsEMBL::Compara::MethodLinkSpeciesSet object")
if (!$method_link_species_set->isa("Bio::EnsEMBL::Compara::MethodLinkSpeciesSet"));
$self->{'method_link_species_set'} = $method_link_species_set;
if ($self->{'method_link_species_set_id'}) {
if (!$self->{'method_link_species_set'}->dbID) {
$self->{'method_link_species_set'}->dbID($self->{'method_link_species_set_id'});
} else {
$self->{'method_link_species_set_id'} = $self->{'method_link_species_set'}->dbID();
}
} else {
$self->{'method_link_species_set_id'} = $self->{'method_link_species_set'}->dbID;
}
} elsif (!defined($self->{'method_link_species_set'})) {
# Try to get the object from other sources...
if (defined($self->genomic_align_block) and ($self->{'genomic_align_block'}->method_link_species_set)) {
# ...from the corresponding Bio::EnsEMBL::Compara::GenomicAlignBlock object. Uses genomic_align_block
# function and not the attribute in the clause because the attribute can be retrieved from other
# sources if it has not been already defined.
$self->{'method_link_species_set'} = $self->genomic_align_block->method_link_species_set;
} elsif (defined($self->method_link_species_set_id) and defined($self->{'adaptor'})) {
# ...from the method_link_species_set_id. Uses method_link_species_set_id function and not the attribute
# in the clause because the attribute can be retrieved from other sources if it has not been
# already defined.
my $method_link_species_set_adaptor = $self->adaptor->db->get_MethodLinkSpeciesSetAdaptor;
$self->{'method_link_species_set'} = $method_link_species_set_adaptor->fetch_by_dbID(
$self->{'method_link_species_set_id'});
} else {
warning("Fail to get data from other sources in Bio::EnsEMBL::Compara::GenomicAlign->method_link_species_set".
" You either have to specify more information (see perldoc for".
" Bio::EnsEMBL::Compara::GenomicAlign) or to set it up directly");
}
}
return $self->{'method_link_species_set'};
}
=head2 method_link_species_set_id
Arg [1] : integer $method_link_species_set_id
Example : $method_link_species_set_id = $genomic_align->method_link_species_set_id;
Example : $genomic_align->method_link_species_set_id(3);
Description: Getter/Setter for the attribute method_link_species_set_id. If no
argument is given and the method_link_species_set_id is not defined, it
tries to get the data from other sources like the corresponding
Bio::EnsEMBL::Compara::MethodLinkSpeciesSet object or the database
using the dbID of the Bio::EnsEMBL::Compara::GenomicAlign object.
Use 0 as argument to clear this attribute.
Returntype : integer
Exceptions : thrown if $method_link_species_set_id does not match a previously defined
method_link_species_set
Warning : warns if getting data from other sources fails.
Caller : object->methodname
Status : Stable
=cut
sub method_link_species_set_id {
my ($self, $method_link_species_set_id) = @_;
if (defined($method_link_species_set_id)) {
$self->{'method_link_species_set_id'} = $method_link_species_set_id;
if (defined($self->{'method_link_species_set'}) and $self->{'method_link_species_set_id'}) {
$self->{'method_link_species_set'} = undef;
}
} elsif (!$self->{'method_link_species_set_id'}) {
# Try to get the ID from other sources...
if (defined($self->{'method_link_species_set'}) and $self->{'method_link_species_set'}->dbID) {
# ...from the corresponding Bio::EnsEMBL::Compara::MethodLinkSpeciesSet object
$self->{'method_link_species_set_id'} = $self->{'method_link_species_set'}->dbID;
} elsif (defined($self->{'dbID'}) and defined($self->{'adaptor'})) {
# ...from the database using the dbID of the Bio::EnsEMBL::Compara::GenomicAlign object
$self->adaptor->retrieve_all_direct_attributes($self);
} else {
warning("Fail to get data from other sources in Bio::EnsEMBL::Compara::GenomicAlign->method_link_species_set_id".
" You either have to specify more information (see perldoc for".
" Bio::EnsEMBL::Compara::GenomicAlign) or to set it up directly");
}
}
return $self->{'method_link_species_set_id'};
}
=head2 genome_db
Arg [1] : Bio::EnsEMBL::Compara::GenomeDB $genome_db
Example : $genome_db = $genomic_align->genome_db;
Example : $genomic_align->genome_db($genome_db);
Description: Getter/Setter for the attribute genome_db of
the dnafrag. This method is a short cut for
$genomic_align->dnafrag->genome_db()
Returntype : Bio::EnsEMBL::Compara::DnaFrag object
Exceptions : thrown if $genomic_align->dnafrag is not
defined and cannot be fetched from other
sources.
Caller : object->methodname
Status : Stable
=cut
sub genome_db {
my ($self, $genome_db) = @_;
if (defined($genome_db)) {
throw("$genome_db is not a Bio::EnsEMBL::Compara::GenomeDB object")
if (!UNIVERSAL::isa($genome_db, "Bio::EnsEMBL::Compara::GenomeDB"));
my $dnafrag = $self->dnafrag();
if (!$dnafrag) {
throw("Cannot set genome_db if dnafrag does not exist");
} else {
$self->dnafrag->genome_db($genome_db);
}
}
return $self->dnafrag->genome_db;
}
=head2 dnafrag
Arg [1] : Bio::EnsEMBL::Compara::DnaFrag $dnafrag
Example : $dnafrag = $genomic_align->dnafrag;
Example : $genomic_align->dnafrag($dnafrag);
Description: Getter/Setter for the attribute dnafrag. If no
argument is given, the dnafrag is not defined but
both the dnafrag_id and the adaptor are, it tries
to fetch the data using the dnafrag_id
Returntype : Bio::EnsEMBL::Compara::DnaFrag object
Exceptions : thrown if $dnafrag is not a Bio::EnsEMBL::Compara::DnaFrag
object or if $dnafrag does not match a previously defined
dnafrag_id
Warning : warns if getting data from other sources fails.
Caller : object->methodname
Status : Stable
=cut
sub dnafrag {
my ($self, $dnafrag) = @_;
if (defined($dnafrag)) {
throw("$dnafrag is not a Bio::EnsEMBL::Compara::DnaFrag object")
if (!$dnafrag->isa("Bio::EnsEMBL::Compara::DnaFrag"));
$self->{'dnafrag'} = $dnafrag;
if ($self->{'dnafrag_id'}) {
if (!$self->{'dnafrag'}->dbID) {
$self->{'dnafrag'}->dbID($self->{'dnafrag_id'});
}
# warning("Defining both dnafrag_id and dnafrag");
throw("dnafrag object does not match previously defined dnafrag_id")
if ($self->{'dnafrag'}->dbID != $self->{'dnafrag_id'});
} else {
$self->{'dnafrag_id'} = $self->{'dnafrag'}->dbID;
}
} elsif (!defined($self->{'dnafrag'})) {
# Try to get data from other sources...
if (defined($self->dnafrag_id) and defined($self->{'adaptor'})) {
# ...from the dnafrag_id. Use dnafrag_id function and not the attribute in the
# clause because the attribute can be retrieved from other sources if it has not been already defined.
my $dnafrag_adaptor = $self->adaptor->db->get_DnaFragAdaptor;
$self->{'dnafrag'} = $dnafrag_adaptor->fetch_by_dbID($self->{'dnafrag_id'});
} else {
warning("Fail to get data from other sources in Bio::EnsEMBL::Compara::GenomicAlign->dnafrag".
" You either have to specify more information (see perldoc for".
" Bio::EnsEMBL::Compara::GenomicAlign) or to set it up directly");
}
}
return $self->{'dnafrag'};
}
=head2 dnafrag_id
Arg [1] : integer $dnafrag_id
Example : $dnafrag_id = $genomic_align->dnafrag_id;
Example : $genomic_align->dnafrag_id(134);
Description: Getter/Setter for the attribute dnafrag_id. If no
argument is given and the dnafrag_id is not defined, it tries to
get the ID from other sources like the corresponding
Bio::EnsEMBL::Compara::DnaFrag object or the database using the dbID
of the Bio::EnsEMBL::Compara::GenomicAlign object.
Use 0 as argument to clear this attribute.
Returntype : integer
Exceptions : thrown if $dnafrag_id does not match a previously defined
dnafrag
Warning : warns if getting data from other sources fails.
Caller : object->methodname
Status : Stable
=cut
sub dnafrag_id {
my ($self, $dnafrag_id) = @_;
if (defined($dnafrag_id)) {
$self->{'dnafrag_id'} = $dnafrag_id;
if (defined($self->{'dnafrag'}) and $self->{'dnafrag_id'}) {
# warning("Defining both dnafrag_id and dnafrag");
throw("dnafrag_id does not match previously defined dnafrag object")
if ($self->{'dnafrag'} and $self->{'dnafrag'}->dbID != $self->{'dnafrag_id'});
}
} elsif (!($self->{'dnafrag_id'})) {
# Try to get the ID from other sources...
if (defined($self->{'dnafrag'}) and defined($self->{'dnafrag'}->dbID)) {
# ...from the corresponding Bio::EnsEMBL::Compara::DnaFrag object
$self->{'dnafrag_id'} = $self->{'dnafrag'}->dbID;
} elsif (defined($self->{'adaptor'}) and defined($self->{'dbID'})) {
# ...from the database using the dbID of the Bio::EnsEMBL::Compara::GenomicAlign object
$self->adaptor->retrieve_all_direct_attributes($self);
} else {
warning("Fail to get data from other sources in Bio::EnsEMBL::Compara::GenomicAlign->dnafrag_id".
" You either have to specify more information (see perldoc for".
" Bio::EnsEMBL::Compara::GenomicAlign) or to set it up directly");
}
}
return $self->{'dnafrag_id'};
}
=head2 dnafrag_start
Arg [1] : integer $dnafrag_start
Example : $dnafrag_start = $genomic_align->dnafrag_start;
Example : $genomic_align->dnafrag_start(1233354);
Description: Getter/Setter for the attribute dnafrag_start. If no argument is given, the
dnafrag_start is not defined but both the dbID and the adaptor are, it tries
to fetch and set all the direct attributes from the database using the
dbID of the Bio::EnsEMBL::Compara::GenomicAlign object.
Returntype : integer
Exceptions : none
Warning : warns if getting data from other sources fails.
Caller : object->methodname
Status : Stable
=cut
sub dnafrag_start {
my ($self, $dnafrag_start) = @_;
if (defined($dnafrag_start)) {
$self->{'dnafrag_start'} = $dnafrag_start;
} elsif (!defined($self->{'dnafrag_start'})) {
if (defined($self->{'dbID'}) and defined($self->{'adaptor'})) {
# Try to get the values from the database using the dbID of the Bio::EnsEMBL::Compara::GenomicAlign object
$self->adaptor->retrieve_all_direct_attributes($self);
} else {
warning("Fail to get data from other sources in Bio::EnsEMBL::Compara::GenomicAlign->dnafrag_start".
" You either have to specify more information (see perldoc for".
" Bio::EnsEMBL::Compara::GenomicAlign) or to set it up directly");
}
}
return $self->{'dnafrag_start'};
}
=head2 dnafrag_end
Arg [1] : integer $dnafrag_end
Example : $dnafrag_end = $genomic_align->dnafrag_end;
Example : $genomic_align->dnafrag_end(1235320);
Description: Getter/Setter for the attribute dnafrag_end. If no argument is given, the
dnafrag_end is not defined but both the dbID and the adaptor are, it tries
to fetch and set all the direct attributes from the database using the
dbID of the Bio::EnsEMBL::Compara::GenomicAlign object.
Returntype : integer
Exceptions : none
Warning : warns if getting data from other sources fails.
Caller : object->methodname
Status : Stable
=cut
sub dnafrag_end {
my ($self, $dnafrag_end) = @_;
if (defined($dnafrag_end)) {
$self->{'dnafrag_end'} = $dnafrag_end;
} elsif (!defined($self->{'dnafrag_end'})) {
if (defined($self->{'dbID'}) and defined($self->{'adaptor'})) {
# Try to get the values from the database using the dbID of the Bio::EnsEMBL::Compara::GenomicAlign object
$self->adaptor->retrieve_all_direct_attributes($self);
} else {
warning("Fail to get data from other sources in Bio::EnsEMBL::Compara::GenomicAlign->dnafrag_end".
" You either have to specify more information (see perldoc for".
" Bio::EnsEMBL::Compara::GenomicAlign) or to set it up directly");
}
}
return $self->{'dnafrag_end'};
}
=head2 dnafrag_strand
Arg [1] : integer $dnafrag_strand (1 or -1)
Example : $dnafrag_strand = $genomic_align->dnafrag_strand;
Example : $genomic_align->dnafrag_strand(1);
Description: Getter/Setter for the attribute dnafrag_strand. If no argument is given, the
dnafrag_strand is not defined but both the dbID and the adaptor are, it tries
to fetch and set all the direct attributes from the database using the
dbID of the Bio::EnsEMBL::Compara::GenomicAlign object.
Returntype : integer
Exceptions : none
Warning : warns if getting data from other sources fails.
Caller : object->methodname
Status : Stable
=cut
sub dnafrag_strand {
my ($self, $dnafrag_strand) = @_;
if (defined($dnafrag_strand)) {
$self->{'dnafrag_strand'} = $dnafrag_strand;
} elsif (!defined($self->{'dnafrag_strand'})) {
if (defined($self->{'dbID'}) and defined($self->{'adaptor'})) {
# Try to get the values from the database using the dbID of the Bio::EnsEMBL::Compara::GenomicAlign object
$self->adaptor->retrieve_all_direct_attributes($self);
} else {
warning("Fail to get data from other sources in Bio::EnsEMBL::Compara::GenomicAlign->dnafrag_strand".
" You either have to specify more information (see perldoc for".
" Bio::EnsEMBL::Compara::GenomicAlign) or to set it up directly");
}
}
return $self->{'dnafrag_strand'};
}
=head2 aligned_sequence
Arg [1...] : string $aligned_sequence or string @flags
Example : $aligned_sequence = $genomic_align->aligned_sequence
Example : $aligned_sequence = $genomic_align->aligned_sequence("+FIX_SEQ");
Example : $genomic_align->aligned_sequence("ACTAGTTAGCT---TATCT--TTAAA")
Description: With no arguments, rebuilds the alignment string for this sequence
using the cigar_line information and the original sequence if needed.
This sequence depends on the strand defined by the dnafrag_strand attribute.
Flags : +FIX_SEQ
With this flag, the method will return a sequence that could be
directly aligned with the original_sequence of the reference
genomic_align.
Returntype : string $aligned_sequence
Exceptions : thrown if sequence contains unknown symbols
Warning : warns if getting data from other sources fails.
Caller : object->methodname
Status : Stable
=cut
sub aligned_sequence {
my ($self, @aligned_sequence_or_flags) = @_;
my $aligned_sequence;
my $fix_seq = 0;
my $fake_seq = 0;
foreach my $flag (@aligned_sequence_or_flags) {
if ($flag =~ /^\+/) {
if ($flag eq "+FIX_SEQ") {
$fix_seq = 1;
} elsif ($flag eq "+FAKE_SEQ") {
$fake_seq = 1;
} else {
warning("Unknow flag $flag when calling".
" Bio::EnsEMBL::Compara::GenomicAlign::aligned_sequence()");
}
} else {
$aligned_sequence = $flag;
}
}
if (defined($aligned_sequence)) {
$aligned_sequence =~ s/[\r\n]+$//;
if ($aligned_sequence) {
## Check sequence
throw("Unreadable sequence ($aligned_sequence)") if ($aligned_sequence !~ /^[\-\.A-Z]+$/i);
$self->{'aligned_sequence'} = $aligned_sequence;
} else {
$self->{'aligned_sequence'} = undef;
}
} elsif (!defined($self->{'aligned_sequence'})) {
# Try to get the aligned_sequence from other sources...
if (defined($self->cigar_line) and $fake_seq) {
# ...from the corresponding cigar_line (using a fake seq)
$aligned_sequence = _get_fake_aligned_sequence_from_cigar_line(
$self->{'cigar_line'});
} elsif (defined($self->cigar_line) and defined($self->original_sequence)) {
my $original_sequence = $self->original_sequence;
# ...from the corresponding orginial_sequence and cigar_line
$aligned_sequence = _get_aligned_sequence_from_original_sequence_and_cigar_line(
$original_sequence, $self->{'cigar_line'});
$self->{'aligned_sequence'} = $aligned_sequence;
} else {
warning("Fail to get data from other sources in Bio::EnsEMBL::Compara::GenomicAlign->aligned_sequence".
" You either have to specify more information (see perldoc for".
" Bio::EnsEMBL::Compara::GenomicAlign) or to set it up directly");
}
}
$aligned_sequence = $self->{'aligned_sequence'} if (defined($self->{'aligned_sequence'}));
if ($aligned_sequence and $fix_seq) {
$aligned_sequence = _get_aligned_sequence_from_original_sequence_and_cigar_line(
$aligned_sequence, $self->genomic_align_block->reference_genomic_align->cigar_line, $fix_seq);
}
return $aligned_sequence;
}
=head2 length
Arg [1] : -none-
Example : $length = $genomic_align->length;
Description: get the length of the aligned sequence. This method will try to
get the length from the aligned_sequence if already set or by
parsing the cigar_line otherwise
Returntype : int
Exceptions : none
Warning :
Caller : object->methodname
Status : Stable
=cut
sub length {
my $self = shift;
if ($self->{aligned_sequence}) {
return length($self->{aligned_sequence});
} elsif ($self->{cigar_line}) {
my $length = 0;
my $cigar_line = $self->{cigar_line};
my @cig = ( $cigar_line =~ /(\d*[GMDXI])/g );
for my $cigElem ( @cig ) {
my $cigType = substr( $cigElem, -1, 1 );
my $cigCount = substr( $cigElem, 0 ,-1 );
$cigCount = 1 unless ($cigCount =~ /^\d+$/);
$length += $cigCount unless ($cigType eq "I");
}
return $length;
}
return undef;
}
=head2 cigar_line
Arg [1] : string $cigar_line
Example : $cigar_line = $genomic_align->cigar_line;
Example : $genomic_align->cigar_line("35M2D233M7D23MD100M");
Description: get/set for attribute cigar_line.
If no argument is given, the cigar line has not been
defined yet but the aligned sequence was, it calculates
the cigar line based on the aligned (gapped) sequence.
If no argument is given, the cigar_line is not defined but both
the dbID and the adaptor are, it tries to fetch and set all
the direct attributes from the database using the dbID of the
Bio::EnsEMBL::Compara::GenomicAlign object. You can reset this
attribute using an empty string as argument.
The cigar_line depends on the strand defined by the dnafrag_strand
attribute.
Returntype : string
Exceptions : none
Warning : warns if getting data from other sources fails.
Caller : object->methodname
Status : Stable
=cut
sub cigar_line {
my ($self, $arg) = @_;
if (defined($arg)) {
if ($arg) {
$self->{'cigar_line'} = $arg;
} else {
$self->{'cigar_line'} = undef;
}
} elsif (!defined($self->{'cigar_line'})) {
# Try to get the cigar_line from other sources...
if (defined($self->{'aligned_sequence'})) {
# ...from the aligned sequence
my $cigar_line = _get_cigar_line_from_aligned_sequence($self->{'aligned_sequence'});
$self->cigar_line($cigar_line);
} elsif (defined($self->{'dbID'}) and defined($self->{'adaptor'})) {
# ...from the database using the dbID of the Bio::EnsEMBL::Compara::GenomicAlign object
$self->adaptor->retrieve_all_direct_attributes($self);
} else {
warning("Fail to get data from other sources in Bio::EnsEMBL::Compara::GenomicAlign->cigar_line".
" You either have to specify more information (see perldoc for".
" Bio::EnsEMBL::Compara::GenomicAlign) or to set it up directly");
}
}
return $self->{'cigar_line'};
}
=head2 level_id
Arg [1] : int $level_id
Example : $level_id = $genomic_align->level_id;
Example : $genomic_align->level_id(1);
Description: get/set for attribute level_id. If no argument is given, the level_id
is not defined but both the dbID and the adaptor are, it tries to
fetch and set all the direct attributes from the database using the
dbID of the Bio::EnsEMBL::Compara::GenomicAlign object.
Returntype : int
Exceptions : none
Warning : warns if getting data from other sources fails.
Caller : object->methodname
Status : Stable
=cut
sub level_id {
my ($self, $level_id) = @_;
if (defined($level_id)) {
$self->{'level_id'} = $level_id;
} elsif (!defined($self->{'level_id'})) {
if (defined($self->{'dbID'}) and defined($self->{'adaptor'})) {
# Try to get the values from the database using the dbID of the Bio::EnsEMBL::Compara::GenomicAlign object
$self->adaptor->retrieve_all_direct_attributes($self);
} else {
warning("Fail to get data from other sources in Bio::EnsEMBL::Compara::GenomicAlign->level_id".
" You either have to specify more information (see perldoc for".
" Bio::EnsEMBL::Compara::GenomicAlign) or to set it up directly");
}
}
return $self->{'level_id'};
}
=head2 genomic_align_groups
Arg [1] : a ref. to an array of Bio::EnsEMBL::Compara::GenomicAlignGroup $genomic_align_groups
Example : $genomic_align_groups = $genomic_align->genomic_align_groups;
Example : $genomic_align->genomic_align_groups($genomic_align_groups);
Description: get/set for attribute genomic_align_groups. If no argument is given, the
genomic_align_groups are not defined but both the dbID and the adaptor are,
it tries to fetch and set teh data from the database using the
dbID of the Bio::EnsEMBL::Compara::GenomicAlign object.
Returntype : int
Exceptions : none
Warning : warns if getting data from other sources fails.
Caller : object->methodname
Status : Stable
=cut
sub genomic_align_groups {
my ($self, $genomic_align_groups) = @_;
if (defined($genomic_align_groups)) {
foreach my $this_genomic_align_group (@$genomic_align_groups) {
my $type = $this_genomic_align_group->type;
$self->{'genomic_align_group'}->{$type} = $this_genomic_align_group;
}
} elsif (!defined($self->{'genomic_align_group'})) {
if (defined($self->{'dbID'}) and defined($self->{'adaptor'})) {
# Try to get the values from the database using the dbID of the Bio::EnsEMBL::Compara::GenomicAlign object
$genomic_align_groups = $self->adaptor->db->get_GenomicAlignGroupAdaptor->fetch_all_by_GenomicAlign($self);
foreach my $this_genomic_align_group (@$genomic_align_groups) {
my $type = $this_genomic_align_group->type;
$self->{'genomic_align_group'}->{$type} = $this_genomic_align_group;
$self->{'genomic_align_group_id'}->{$type} = $this_genomic_align_group->dbID;
}
} else {
warning("Fail to get data from other sources in Bio::EnsEMBL::Compara::GenomicAlign->genomic_align_groups".
" You either have to specify more information (see perldoc for".
" Bio::EnsEMBL::Compara::GenomicAlign) or to set it up directly");
}
}
$genomic_align_groups = [values %{$self->{'genomic_align_group'}}];
return $genomic_align_groups;
}
=head2 genomic_align_group_by_type
Arg [1] : [mandatory] string $type (genomic_align_group.type)
Arg [2] : [optional] Bio::EnsEMBL::Compara::GenomicAlignGroup $genomic_align_group
Example : $genomic_align_group = $genomic_align->genomic_align_group_by_type("default");
Example : $genomic_align->genomic_align_group_by_type("default", $genomic_align_group);
Description: get/set for the Bio::EnsEMBL::Compara::GenomicAlginGroup object
corresponding to this Bio::EnsEMBL::Compara::GenomicAlign object and the
given type.
Returntype : int
Exceptions : none
Warning : warns if getting data from other sources fails.
Caller : object->methodname
Status : Stable
=cut
sub genomic_align_group_by_type {
my ($self, $type, $genomic_align_group) = @_;
$type = "default" if (!$type);
if (defined($genomic_align_group)) {
$self->{'genomic_align_group'}->{$type} = $genomic_align_group;
} elsif (!defined($self->{'genomic_align_group'}->{$type})) {
if (defined($self->{'dbID'}) and defined($self->{'adaptor'})) {
# Try to get the values from the database using the dbID of the Bio::EnsEMBL::Compara::GenomicAlign object
my $genomic_align_group_adaptor = $self->adaptor->db->get_GenomicAlignGroupAdaptor;
my $genomic_align_groups = $genomic_align_group_adaptor->fetch_all_by_GenomicAlign($self);
foreach my $this_genomic_align_group (@$genomic_align_groups) {
$self->{'genomic_align_group'}->{$this_genomic_align_group->{'type'}} = $this_genomic_align_group;
$self->{'genomic_align_group_id'}->{$this_genomic_align_group->{'type'}} = $this_genomic_align_group->dbID;
}
}
}
return $self->{'genomic_align_group'}->{$type};
}
=head2 genomic_align_group_id_by_type
Arg [1] : [mandatory] string $type (genomic_align_group.type)
Arg [2] : [optional] int $genomic_align_group_id
Example : $genomic_align_group_id = $genomic_align->genomic_align_group_by_type("default");
Example : $genomic_align->genomic_align_group_by_type("default", 18);
Description: get/set for the genomic_align_group_id corresponding to this
Bio::EnsEMBL::Compara::GenomicAlign object and the given type.
Returntype : int
Exceptions : none
Warning : warns if getting data from other sources fails.
Caller : object->methodname
Status : Stable
=cut
sub genomic_align_group_id_by_type {
my ($self, $type, $genomic_align_group_id) = @_;
$type = "default" if (!$type);
if (defined($genomic_align_group_id)) {
$self->{'genomic_align_group_id'}->{$type} = $genomic_align_group_id;
} elsif (!defined($self->{'genomic_align_group_id'}->{$type})) {
if (defined($self->{'dbID'}) and defined($self->{'adaptor'})) {
# Try to get the values from the database using the dbID of the Bio::EnsEMBL::Compara::GenomicAlign object
my $genomic_align_group_adaptor = $self->adaptor->db->get_GenomicAlignGroupAdaptor;
my $genomic_align_groups = $genomic_align_group_adaptor->fetch_all_by_GenomicAlign($self);
foreach my $this_genomic_align_group (@$genomic_align_groups) {
$self->{'genomic_align_group'}->{$this_genomic_align_group->{'type'}} = $this_genomic_align_group;
$self->{'genomic_align_group_id'}->{$this_genomic_align_group->{'type'}} = $this_genomic_align_group->dbID;
}
} else {
warning("Fail to get data from other sources in Bio::EnsEMBL::Compara::GenomicAlign->genomic_align_group_id_by_type".
" You either have to specify more information (see perldoc for".
" Bio::EnsEMBL::Compara::GenomicAlign) or to set it up directly");
}
}
return $self->{'genomic_align_group_id'}->{$type};
}
=head2 original_sequence
Arg [1] : none
Example : $original_sequence = $genomic_align->original_sequence
Description: get/set original sequence. If no argument is given and the original_sequence
is not defined, it tries to fetch the data from other sources like the
aligned sequence or the the Bio::EnsEMBL::Compara:DnaFrag object. You can
reset this attribute using an empty string as argument.
This sequence depends on the strand defined by the dnafrag_strand attribute.
Returntype : string $original_sequence
Exceptions :
Caller : object->methodname
Status : Stable
=cut
sub original_sequence {
my ($self, $original_sequence) = @_;
if (defined($original_sequence)) {
if ($original_sequence) {
$self->{'original_sequence'} = $original_sequence;
} else {
$self->{'original_sequence'} = undef;
}
} elsif (!defined($self->{'original_sequence'})) {
# Try to get the data from other sources...
if ($self->{'aligned_sequence'} and $self->{'cigar_line'} !~ /I/) {
# ...from the aligned sequence
$self->{'original_sequence'} = $self->{'aligned_sequence'};
$self->{'original_sequence'} =~ s/\-//g;
} elsif (!defined($self->{'original_sequence'}) and defined($self->dnafrag)
and defined($self->dnafrag_start) and defined($self->dnafrag_end)
and defined($self->dnafrag_strand) and defined($self->dnafrag->slice)) {
# ...from the dnafrag object. Uses dnafrag, dnafrag_start and dnafrag_methods instead of the attibutes
# in the clause because the attributes can be retrieved from other sources if they have not been
# already defined.
$self->{'original_sequence'} = $self->dnafrag->slice->subseq(
$self->dnafrag_start,
$self->dnafrag_end,
$self->dnafrag_strand
);
} else {
warning("Fail to get data from other sources in Bio::EnsEMBL::Compara::GenomicAlign->genomic_align_groups".
" You either have to specify more information (see perldoc for".
" Bio::EnsEMBL::Compara::GenomicAlign) or to set it up directly");
}
}
return $self->{'original_sequence'};
}
=head2 _get_cigar_line_from_aligned_sequence
Arg [1] : string $aligned_sequence
Example : $cigar_line = _get_cigar_line_from_aligned_sequence("CGT-AACTGATG--TTA")
Description: get cigar line from gapped sequence
Returntype : string $cigar_line
Exceptions :
Caller : methodname
Status : Stable
=cut
sub _get_cigar_line_from_aligned_sequence {
my ($aligned_sequence) = @_;
my $cigar_line = "";
my @pieces = grep {$_} split(/(\-+)|(\.+)/, $aligned_sequence);
foreach my $piece (@pieces) {
my $mode;
if ($piece =~ /\-/) {
$mode = "D"; # D for gaps (deletions)
} elsif ($piece =~ /\./) {
$mode = "X"; # X for pads (in 2X genomes)
} else {
$mode = "M"; # M for matches/mismatches
}
if (CORE::length($piece) == 1) {
$cigar_line .= $mode;
} elsif (CORE::length($piece) > 1) { #length can be 0 if the sequence starts with a gap
$cigar_line .= CORE::length($piece).$mode;
}
}
return $cigar_line;
}
=head2 _get_aligned_sequence_from_original_sequence_and_cigar_line
Arg [1] : string $original_sequence
Arg [1] : string $cigar_line
Example : $aligned_sequence = _get_aligned_sequence_from_original_sequence_and_cigar_line(
"CGTAACTGATGTTA", "3MD8M2D3M")
Description: get gapped sequence from original one and cigar line
Returntype : string $aligned_sequence
Exceptions : thrown if cigar_line does not match sequence length
Caller : methodname
Status : Stable
=cut
sub _get_aligned_sequence_from_original_sequence_and_cigar_line {
my ($original_sequence, $cigar_line, $fix_seq) = @_;
my $aligned_sequence = "";
return undef if (!defined($original_sequence) or !$cigar_line);
my $seq_pos = 0;
my @cig = ( $cigar_line =~ /(\d*[GMDXI])/g );
for my $cigElem ( @cig ) {
my $cigType = substr( $cigElem, -1, 1 );
my $cigCount = substr( $cigElem, 0 ,-1 );
$cigCount = 1 unless ($cigCount =~ /^\d+$/);
if( $cigType eq "M" ) {
$aligned_sequence .= substr($original_sequence, $seq_pos, $cigCount);
$seq_pos += $cigCount;
} elsif( $cigType eq "I") {
$seq_pos += $cigCount;
} elsif( $cigType eq "X") {
$aligned_sequence .= "." x $cigCount;
} elsif( $cigType eq "G" || $cigType eq "D") {
if ($fix_seq) {
$seq_pos += $cigCount;
} else {
$aligned_sequence .= "-" x $cigCount;
}
}
}
throw("Cigar line ($seq_pos) does not match sequence length (".CORE::length($original_sequence).")")
if ($seq_pos != CORE::length($original_sequence));
return $aligned_sequence;
}
=head2 _get_fake_aligned_sequence_from_cigar_line
Arg [1] : string $cigar_line
Example : $aligned_sequence = _get_fake_aligned_sequence_from_cigar_line(
"3MD8M2D3M")
Description: get gapped sequence of N\'s from the cigar line
Returntype : string $fake_aligned_sequence or undef if no $cigar_line
Exceptions :
Caller : methodname
Status : Stable
=cut
sub _get_fake_aligned_sequence_from_cigar_line {
my ($cigar_line, $fix_seq) = @_;
my $fake_aligned_sequence = "";
return undef if (!$cigar_line);
my $seq_pos = 0;
my @cig = ( $cigar_line =~ /(\d*[GMDXI])/g );
for my $cigElem ( @cig ) {
my $cigType = substr( $cigElem, -1, 1 );
my $cigCount = substr( $cigElem, 0 ,-1 );
$cigCount = 1 unless ($cigCount =~ /^\d+$/);
if( $cigType eq "M" ) {
$fake_aligned_sequence .= "N" x $cigCount;
$seq_pos += $cigCount;
} elsif( $cigType eq "I") {
$seq_pos += $cigCount;
} elsif( $cigType eq "X") {
$fake_aligned_sequence .= "." x $cigCount;
} elsif( $cigType eq "G" || $cigType eq "D") {
if ($fix_seq) {
$seq_pos += $cigCount;
} else {
$fake_aligned_sequence .= "-" x $cigCount;
}
}
}
return $fake_aligned_sequence;
}
=head2 _print
Arg [1] : ref to a FILEHANDLE
Example : $genomic_align->_print
Description: print attributes of the object to the STDOUT or to the FILEHANDLE.
Used for debuging purposes.
Returntype : none
Exceptions :
Caller : object->methodname
Status : At risk
=cut
sub _print {
my ($self, $FILEH) = @_;
my $verbose = verbose;
verbose(0);
$FILEH ||= \*STDOUT;
print $FILEH
"Bio::EnsEMBL::Compara::GenomicAlign object ($self)
dbID = ".($self->dbID or "-undef-")."
adaptor = ".($self->adaptor or "-undef-")."
genomic_align_block = ".($self->genomic_align_block or "-undef-")."
genomic_align_block_id = ".($self->genomic_align_block_id or "-undef-")."
method_link_species_set = ".($self->method_link_species_set or "-undef-")."
method_link_species_set_id = ".($self->method_link_species_set_id or "-undef-")."
dnafrag = ".($self->dnafrag or "-undef-")."
dnafrag_id = ".($self->dnafrag_id or "-undef-")."
dnafrag_start = ".($self->dnafrag_start or "-undef-")."
dnafrag_end = ".($self->dnafrag_end or "-undef-")."
dnafrag_strand = ".($self->dnafrag_strand or "-undef-")."
cigar_line = ".($self->cigar_line or "-undef-")."
level_id = ".($self->level_id or "-undef-")."
original_sequence = ".($self->original_sequence or "-undef-")."
aligned_sequence = ".($self->aligned_sequence or "-undef-")."
";
verbose($verbose);
}
=head2 display_id
Args : none
Example : my $id = $genomic_align->display_id;
Description: returns string describing this genomic_align which can be used
as display_id of a Bio::Seq object or in a fasta file. The actual form is
taxon_id:genome_db_id:coord_system_name:dnafrag_name:dnafrag_start:dnafrag_end:dnafrag_strand
e.g.
9606:1:chromosome:14:50000000:51000000:-1
Uses dnafrag information in addition to start and end.
Returntype : string
Exceptions : none
Caller : general
Status : Stable
=cut
sub display_id {
my $self = shift;
my $dnafrag = $self->dnafrag;
return "" unless($dnafrag);
my $id = join(':',
$dnafrag->genome_db->taxon_id,
$dnafrag->genome_db->dbID,
$dnafrag->coord_system_name,
$dnafrag->name,
$self->dnafrag_start,
$self->dnafrag_end,
$self->dnafrag_strand);
return $id;
}
=head2 reverse_complement
Args : none
Example : none
Description: reverse complement the object modifing dnafrag_strand and cigar_line
Returntype : none
Exceptions : none
Caller : general
Status : Stable
=cut
sub reverse_complement {
my ($self) = @_;
# reverse strand
#$self->dnafrag_strand($self->dnafrag_strand * -1);
$self->dnafrag_strand($self->{'dnafrag_strand'} * -1);
# reverse original and aligned sequences if cached
my $original_sequence = $self->{'original_sequence'};
if ($original_sequence) {
$original_sequence = reverse $original_sequence;
$original_sequence =~ tr/ATCGatcg/TAGCtagc/;
$self->original_sequence($original_sequence);
}
my $aligned_sequence = $self->{'aligned_sequence'};
if ($aligned_sequence) {
$aligned_sequence = reverse $aligned_sequence;
$aligned_sequence =~ tr/ATCGatcg/TAGCtagc/;
$self->aligned_sequence($aligned_sequence);
}
# reverse cigar_string as consequence
my $cigar_line = $self->{'cigar_line'};
#$cigar_line = join("", reverse grep {$_} split(/(\d*[GDMIX])/, $cigar_line));
$cigar_line = join("", reverse ($cigar_line=~(/(\d*[GDMIX])/g)));
$self->cigar_line($cigar_line);
}
=head2 get_Mapper
Arg[1] : [optional] integer $cache (default = FALSE)
Arg[2] : [optional] boolean $condensed (default = FALSE)
Example : $this_mapper = $genomic_align->get_Mapper();
Example : $mapper1 = $genomic_align1->get_Mapper();
$mapper2 = $genomic_align2->get_Mapper();
Description: creates and returns a Bio::EnsEMBL::Mapper to map coordinates from
the original sequence of this Bio::EnsEMBL::Compara::GenomicAlign
to the aligned sequence, i.e. the alignment. In order to map a sequence
from this Bio::EnsEMBL::Compara::GenomicAlign object to another
Bio::EnsEMBL::Compara::GenomicAlign of the same
Bio::EnsEMBL::Compara::GenomicAlignBlock object, you may use this mapper
to transform coordinates into the "alignment" coordinates and then to
the other Bio::EnsEMBL::Compara::GenomicAlign coordinates using the
corresponding Bio::EnsEMBL::Mapper.
The coordinates of the "alignment" starts with the reference_slice_start
position of the GenomicAlignBlock if available or 1 otherwise.
With the $cache argument you can decide whether you want to cache the
result or not. Result is *not* cached by default.
Returntype : Bio::EnsEMBL::Mapper object
Exceptions : throw if no cigar_line can be found
Status : Stable
=cut
sub get_Mapper {
my ($self, $cache, $condensed) = @_;
my $mapper;
$cache = 0 if (!defined($cache));
my $mode = "expanded";
if (defined($condensed) and $condensed) {
$mode = "condensed";
}
if (!defined($self->{$mode.'_mapper'})) {
if ($mode eq "condensed") {
$mapper = Bio::EnsEMBL::Mapper->new("sequence", "alignment");
my $rel_strand = $self->dnafrag_strand;
my $ref_cigar_line = $self->genomic_align_block->reference_genomic_align->cigar_line;
my $aln_pos = (eval{$self->genomic_align_block->reference_slice_start} or 1);
my $aln_seq_pos = 0;
my $seq_pos = 0;
my $insertions = 0;
my $target_cigar_pieces;
@$target_cigar_pieces = $self->cigar_line =~ /(\d*[GMDXI])/g;
my $ref_cigar_pieces;
@$ref_cigar_pieces = $ref_cigar_line =~ /(\d*[GMDXI])/g;
my $i = 0;
my $j = 0;
my ($ref_num, $ref_type) = $ref_cigar_pieces->[$i] =~ /(\d*)([GMDXI])/;
$ref_num = 1 if (!defined($ref_num) or $ref_num eq "");
my ($target_num, $target_type) = $target_cigar_pieces->[$j] =~ /(\d*)([GMDXI])/;
$target_num = 1 if (!defined($target_num) or $target_num eq "");
while ($i < @$ref_cigar_pieces and $j<@$target_cigar_pieces) {
while ($ref_type eq "I") {
$aln_pos += $ref_num;
$i++;
last if ($i >= @$ref_cigar_pieces);
($ref_num, $ref_type) = $ref_cigar_pieces->[$i] =~ /(\d*)([GMDXI])/;
$ref_num = 1 if (!defined($ref_num) or $ref_num eq "");
}
while ($target_type eq "I") {
$seq_pos += $target_num;
$j++;
last if ($j >= @$target_cigar_pieces);
($target_num, $target_type) = $target_cigar_pieces->[$j] =~ /(\d*)([GMDXI])/;
$target_num = 1 if (!defined($target_num) or $target_num eq "");
}
my $length;
if ($ref_num == $target_num) {
$length = $ref_num;
} elsif ($ref_num > $target_num) {
$length = $target_num;
} elsif ($ref_num < $target_num) {
$length = $ref_num;
}
my $this_piece_of_cigar_line = $length.$target_type;
if ($ref_type eq "M") {
my $this_mapper;
if ($rel_strand == 1) {
_add_cigar_line_to_Mapper($this_piece_of_cigar_line, $aln_pos,
$seq_pos + $self->dnafrag_start, 1, $mapper);
} else {
_add_cigar_line_to_Mapper($this_piece_of_cigar_line, $aln_pos, $self->dnafrag_end - $seq_pos, -1, $mapper);
}
$aln_pos += $length;
}
my $gaps = 0;
if ($target_type eq "D" || $target_type eq "X") {
$gaps += $length;
}
$seq_pos -= $gaps;
$seq_pos += $length;
if ($ref_num == $target_num) {
$i++;
$j++;
last if ($i >= @$ref_cigar_pieces);
last if ($j >= @$target_cigar_pieces);
($ref_num, $ref_type) = $ref_cigar_pieces->[$i] =~ /(\d*)([GMDXI])/;
$ref_num = 1 if (!defined($ref_num) or $ref_num eq "");
($target_num, $target_type) = $target_cigar_pieces->[$j] =~ /(\d*)([GMDXI])/;
$target_num = 1 if (!defined($target_num) or $target_num eq "");
} elsif ($ref_num > $target_num) {
$j++;
$ref_num -= $target_num;
last if ($j >= @$target_cigar_pieces);
($target_num, $target_type) = $target_cigar_pieces->[$j] =~ /(\d*)([GMDXI])/;
$target_num = 1 if (!defined($target_num) or $target_num eq "");
} elsif ($ref_num < $target_num) {
$i++;
$target_num -= $ref_num;
last if ($i >= @$ref_cigar_pieces);
($ref_num, $ref_type) = $ref_cigar_pieces->[$i] =~ /(\d*)([GMDXI])/;
$ref_num = 1 if (!defined($ref_num) or $ref_num eq "");
}
}
} else {
my $cigar_line = $self->cigar_line;
if (!$cigar_line) {
throw("[$self] has no cigar_line and cannot be retrieved by any means");
}
my $alignment_position = (eval{$self->genomic_align_block->reference_slice_start} or 1);
my $sequence_position = $self->dnafrag_start;
my $rel_strand = $self->dnafrag_strand;
if ($rel_strand == 1) {
$sequence_position = $self->dnafrag_start;
} else {
$sequence_position = $self->dnafrag_end;
}
$mapper = _get_Mapper_from_cigar_line($cigar_line, $alignment_position, $sequence_position, $rel_strand);
}
return $mapper if (!$cache);
$self->{$mode.'_mapper'} = $mapper;
}
return $self->{$mode.'_mapper'};
}
sub get_MapperOLD {
my ($self, $cache, $condensed) = @_;
my $mapper;
$cache = 0 if (!defined($cache));
my $mode = "expanded";
if (defined($condensed) and $condensed) {
$mode = "condensed";
}
if (!defined($self->{$mode.'_mapper'})) {
if ($mode eq "condensed") {
$mapper = Bio::EnsEMBL::Mapper->new("sequence", "alignment");
my $rel_strand = $self->dnafrag_strand;
my $ref_cigar_line = $self->genomic_align_block->reference_genomic_align->cigar_line;
my $this_aligned_seq = $self->aligned_sequence("+FAKE_SEQ");
my $aln_pos = (eval{$self->genomic_align_block->reference_slice_start} or 1);
my $aln_seq_pos = 0;
my $seq_pos = 0;
my $target_cigar_pieces;
@$target_cigar_pieces = $self->cigar_line =~ /(\d*[GMDXI])/g;
my $insertions = 0;
my $array_index = 0;
my $this_target_pos = 0;
foreach my $cigar_piece ($ref_cigar_line =~ /(\d*[GMDX])/g) {
my ($cig_count, $cig_mode) = $cigar_piece =~ /(\d*)([GMDX])/;
$cig_count = 1 if (!defined($cig_count) or $cig_count eq "");
#because of 2X genomes, need different method for extracting the
#cigar_line
#my $this_piece_of_cigar_line = _get_sub_cigar_line_slow($target_cigar_pieces, $aln_seq_pos, $cig_count);
#quicker method which keeps track of how far through the
#target_cigar_pieces array we are
my $this_piece_of_cigar_line;
($this_piece_of_cigar_line,$array_index, $this_target_pos) = _get_sub_cigar_line($target_cigar_pieces, $aln_seq_pos, $cig_count, $array_index, $this_target_pos);
#find number of each cigar_line mode in cigar_line
my $num_cigar_elements = _count_cigar_elements($this_piece_of_cigar_line);
if ($cig_mode eq "M") {
my $this_mapper;
if ($rel_strand == 1) {
$this_mapper = _get_Mapper_from_cigar_line($this_piece_of_cigar_line, $aln_pos,
$seq_pos + $self->dnafrag_start, 1);
} else {
$this_mapper = _get_Mapper_from_cigar_line($this_piece_of_cigar_line, $aln_pos,
$self->dnafrag_end - $seq_pos, -1);
}
$mapper->add_Mapper($this_mapper);
$aln_pos += $cig_count;
$insertions = $num_cigar_elements->{"I"};
$seq_pos += $insertions;
}
my $gaps = $num_cigar_elements->{"D"};
$gaps += $num_cigar_elements->{"X"};
$seq_pos -= $gaps;
$seq_pos += $cig_count;
$aln_seq_pos += $cig_count;
}
} else {
my $cigar_line = $self->cigar_line;
if (!$cigar_line) {
throw("[$self] has no cigar_line and cannot be retrieved by any means");
}
my $alignment_position = (eval{$self->genomic_align_block->reference_slice_start} or 1);
my $sequence_position = $self->dnafrag_start;
my $rel_strand = $self->dnafrag_strand;
if ($rel_strand == 1) {
$sequence_position = $self->dnafrag_start;
} else {
$sequence_position = $self->dnafrag_end;
}
$mapper = _get_Mapper_from_cigar_line($cigar_line, $alignment_position, $sequence_position, $rel_strand);
}
return $mapper if (!$cache);
$self->{$mode.'_mapper'} = $mapper;
}
return $self->{$mode.'_mapper'};
}
=head2 _count_cigar_elements
Arg[1] : string $cigar_line
Example : $num_elements = _count_cigar_elements("5M3D2M5D")
Description: Counts the number of each cigar_line mode in a cigar_line
and stores them in a hash reference. In the above example
$num_elements->{"M"} is 7, $num_elements->{"D"} is 8
Returntype : hash reference
Exceptions : None
Status : At risk
=cut
sub _count_cigar_elements {
my ($cigar_line) = @_;
my $this_count = 0;
my $num_elements;
#initialise each element to 0
foreach my $mode (qw(G M D X I)) {
$num_elements->{$mode} = 0;
}
foreach my $cigar_piece ($cigar_line =~ /(\d*[GMDXI])/g) {
my ($cig_count, $cig_mode) = $cigar_piece =~ /(\d*)([GMDXI])/;
$cig_count = 1 if (!defined($cig_count) or $cig_count eq "");
$num_elements->{$cig_mode} += $cig_count;
}
return $num_elements;
}
=head2 _get_sub_cigar_line
Arg[1] : ref to array of target cigar_line elements
Arg[2] : int $offset start position
Arg[3] : int $length amount to extract
Arg[4] : int $start_array_index current element in target array
Arg[5] : int $start_target_pos current position in target coords
Example : my $new_cigar_line = _get_sub_cigar_line($target_cigar_pieces, $pos, $count);
Description: Extracts a cigar_line of size $length starting at $offset
Returntype : string
Exceptions : None
Status : At risk
=cut
sub _get_sub_cigar_line {
my ($target_cigar_pieces, $offset, $length, $start_array_index, $start_target_pos) = @_;
my $ref_pos = $offset + $length;
my $i = $start_array_index;
my $target_pos = $start_target_pos;
#current target element
my ($target_cig_count, $target_cig_mode) = $target_cigar_pieces->[$i] =~ /(\d*)([GMDXI])/;
$target_cig_count = 1 if (!defined($target_cig_count) or $target_cig_count eq "");
my $new_cigar_line = "";
#check to see if previous target overlaps this ref_pos
if ($offset) {
if ($target_pos > $offset) {
#need to only add on cig_count amount
my $new_count;
if ($target_pos - $offset < $length) {
$new_count = ($target_pos - $offset);
} else {
$new_count = $length;
}
#$new_cigar_line .= $new_count . $target_cig_mode;
$new_cigar_line .= _cigar_element($target_cig_mode,$new_count);
#print "here1 $target_cig_mode $new_count\n";
}
#increment to next target element
$i++;
}
while ($target_pos < $ref_pos && $i < @$target_cigar_pieces) {
($target_cig_count, $target_cig_mode) = $target_cigar_pieces->[$i++] =~ /(\d*)([GMDXI])/;
$target_cig_count = 1 if (!defined($target_cig_count) or $target_cig_count eq "");
#first piece
if (!$target_pos) {
if ($target_cig_count >= $length) {
$new_cigar_line .= _cigar_element($target_cig_mode,$length);
} else {
$new_cigar_line .= _cigar_element($target_cig_mode,$target_cig_count);
}
} else {
if ($target_cig_mode ne "I" &&
$target_cig_count + $target_pos > $ref_pos) {
#if new target piece extends beyond ref_piece but is not I
#(since this doesn't count to target_pos) need to shorten it
my $count = $ref_pos - $target_pos;
$new_cigar_line .= _cigar_element($target_cig_mode,$count);
} else {
$new_cigar_line .= _cigar_element($target_cig_mode,$target_cig_count);
}
}
$target_pos += $target_cig_count unless $target_cig_mode eq "I";
}
#need to check if the next element is an I which doesn't count to
#target_pos but need to add it to cigar_line
if ($i < @$target_cigar_pieces) {
($target_cig_count, $target_cig_mode) = $target_cigar_pieces->[$i] =~ /(\d*)([GMDXI])/;
$target_cig_count = 1 if (!defined($target_cig_count) or $target_cig_count eq "");
if ($target_cig_mode eq "I") {
$new_cigar_line .= _cigar_element($target_cig_mode,$target_cig_count);
}
}
#decrement to return current target element
if ( $i > 0) {
$i--;
}
return ($new_cigar_line, $i, $target_pos);
}
sub _get_sub_cigar_line_slow {
my ($target_cigar_pieces, $offset, $length) = @_;
my $i = 0;
my $ref_pos = $offset + $length;
my ($target_cig_count, $target_cig_mode);
my $target_pos = 0;
#skip through target_cigar_line until get to correct position
while ($target_pos < $offset && $i < @$target_cigar_pieces) {
($target_cig_count, $target_cig_mode) = $target_cigar_pieces->[$i++] =~ /(\d*)([GMDXI])/;
$target_cig_count = 1 if (!defined($target_cig_count) or $target_cig_count eq "");
$target_pos += $target_cig_count unless $target_cig_mode eq "I";
}
my $new_cigar_line = "";
#check to see if previous target overlaps this ref_pos
if ($offset) {
if ($target_pos > $offset) {
#need to only add on cig_count amount
my $new_count;
if ($target_pos - $offset < $length) {
$new_count = ($target_pos - $offset);
} else {
$new_count = $length;
}
#$new_cigar_line .= $new_count . $target_cig_mode;
$new_cigar_line .= _cigar_element($target_cig_mode,$new_count);
}
}
while ($target_pos < $ref_pos && $i < @$target_cigar_pieces) {
($target_cig_count, $target_cig_mode) = $target_cigar_pieces->[$i++] =~ /(\d*)([GMDXI])/;
$target_cig_count = 1 if (!defined($target_cig_count) or $target_cig_count eq "");
#first piece
if (!$target_pos) {
if ($target_cig_count >= $length) {
$new_cigar_line .= _cigar_element($target_cig_mode,$length);
} else {
$new_cigar_line .= _cigar_element($target_cig_mode,$target_cig_count);
}
} else {
if ($target_cig_mode ne "I" &&
$target_cig_count + $target_pos > $ref_pos) {
#if new target piece extends beyond ref_piece but is not I
#(since this doesn't count to target_pos) need to shorten it
my $count = $ref_pos - $target_pos;
$new_cigar_line .= _cigar_element($target_cig_mode,$count);
} else {
$new_cigar_line .= _cigar_element($target_cig_mode,$target_cig_count);
}
}
$target_pos += $target_cig_count unless $target_cig_mode eq "I";
}
#need to check if the next element is an I which doesn't count to
#target_pos but need to add it to cigar_line
if ($i < @$target_cigar_pieces) {
($target_cig_count, $target_cig_mode) = $target_cigar_pieces->[$i++] =~ /(\d*)([GMDXI])/;
$target_cig_count = 1 if (!defined($target_cig_count) or $target_cig_count eq "");
if ($target_cig_mode eq "I") {
$new_cigar_line .= _cigar_element($target_cig_mode,$target_cig_count);
}
}
return $new_cigar_line;
}
=head2 _cigar_element
Arg[1] : char $mode valid cigar_line mode
Arg[2] : int $length size of element
Example : $elem = _cigar_element("M", 5);
Description: Creates a valid cigar element
Returntype : integer
Exceptions : None
Status : At risk
=cut
sub _cigar_element {
my ($mode, $len) = @_;
my $elem;
if ($len == 1) {
$elem = $mode;
#} elsif ($len > 1) { #length can be 0 if the sequence starts with a gap
} else { #length can be 0 if the sequence starts with a gap
$elem = $len.$mode;
}
return $elem;
}
=head2 _get_Mapper_from_cigar_line
Arg[1] : $cigar_line
Arg[2] : $alignment_position
Arg[3] : $sequence_position
Arg[4] : $relative_strand
Example : $this_mapper = _get_Mapper_from_cigar_line($cigar_line,
$aln_pos, $seq_pos, 1);
Description: creates a new Bio::EnsEMBL::Mapper object for mapping between
sequence and alignment coordinate systems using the cigar_line
and starting from the $alignment_position and sequence_position.
Returntype : Bio::EnsEMBL::Mapper object
Exceptions : None
Status : Stable
=cut
sub _get_Mapper_from_cigar_line {
my ($cigar_line, $alignment_position, $sequence_position, $rel_strand) = @_;
my $mapper = Bio::EnsEMBL::Mapper->new("sequence", "alignment");
my @cigar_pieces = ($cigar_line =~ /(\d*[GMDXI])/g);
if ($rel_strand == 1) {
foreach my $cigar_piece (@cigar_pieces) {
my $cigar_type = substr($cigar_piece, -1, 1 );
my $cigar_count = substr($cigar_piece, 0 ,-1 );
$cigar_count = 1 unless ($cigar_count =~ /^\d+$/);
next if ($cigar_count < 1);
if( $cigar_type eq "M" ) {
$mapper->add_map_coordinates(
"sequence", #$self->dbID,
$sequence_position,
$sequence_position + $cigar_count - 1,
$rel_strand,
"alignment", #$self->genomic_align_block->dbID,
$alignment_position,
$alignment_position + $cigar_count - 1
);
$sequence_position += $cigar_count;
$alignment_position += $cigar_count;
} elsif( $cigar_type eq "I") {
#add to sequence_position but not alignment_position
$sequence_position += $cigar_count;
} elsif( $cigar_type eq "G" || $cigar_type eq "D" || $cigar_type eq "X") {
$alignment_position += $cigar_count;
}
}
} else {
foreach my $cigar_piece (@cigar_pieces) {
my $cigar_type = substr($cigar_piece, -1, 1 );
my $cigar_count = substr($cigar_piece, 0 ,-1 );
$cigar_count = 1 unless ($cigar_count =~ /^\d+$/);
next if ($cigar_count < 1);
if( $cigar_type eq "M" ) {
$mapper->add_map_coordinates(
"sequence", #$self->dbID,
$sequence_position - $cigar_count + 1,
$sequence_position,
$rel_strand,
"alignment", #$self->genomic_align_block->dbID,
$alignment_position,
$alignment_position + $cigar_count - 1
);
$sequence_position -= $cigar_count;
$alignment_position += $cigar_count;
} elsif( $cigar_type eq "I") {
#add to sequence_position but not alignment_position
$sequence_position -= $cigar_count;
} elsif( $cigar_type eq "G" || $cigar_type eq "D" || $cigar_type eq "X") {
$alignment_position += $cigar_count;
}
}
}
return $mapper;
}
sub _add_cigar_line_to_Mapper {
my ($cigar_line, $alignment_position, $sequence_position, $rel_strand, $mapper) = @_;
my @cigar_pieces = ($cigar_line =~ /(\d*[GMDXI])/g);
if ($rel_strand == 1) {
foreach my $cigar_piece (@cigar_pieces) {
my $cigar_type = substr($cigar_piece, -1, 1 );
my $cigar_count = substr($cigar_piece, 0 ,-1 );
$cigar_count = 1 unless ($cigar_count =~ /^\d+$/);
next if ($cigar_count < 1);
if( $cigar_type eq "M" ) {
$mapper->add_map_coordinates(
"sequence", #$self->dbID,
$sequence_position,
$sequence_position + $cigar_count - 1,
$rel_strand,
"alignment", #$self->genomic_align_block->dbID,
$alignment_position,
$alignment_position + $cigar_count - 1
);
$sequence_position += $cigar_count;
$alignment_position += $cigar_count;
} elsif( $cigar_type eq "I") {
#add to sequence_position but not alignment_position
$sequence_position += $cigar_count;
} elsif( $cigar_type eq "G" || $cigar_type eq "D" || $cigar_type eq "X") {
$alignment_position += $cigar_count;
}
}
} else {
foreach my $cigar_piece (@cigar_pieces) {
my $cigar_type = substr($cigar_piece, -1, 1 );
my $cigar_count = substr($cigar_piece, 0 ,-1 );
$cigar_count = 1 unless ($cigar_count =~ /^\d+$/);
next if ($cigar_count < 1);
if( $cigar_type eq "M" ) {
$mapper->add_map_coordinates(
"sequence", #$self->dbID,
$sequence_position - $cigar_count + 1,
$sequence_position,
$rel_strand,
"alignment", #$self->genomic_align_block->dbID,
$alignment_position,
$alignment_position + $cigar_count - 1
);
$sequence_position -= $cigar_count;
$alignment_position += $cigar_count;
} elsif( $cigar_type eq "I") {
#add to sequence_position but not alignment_position
$sequence_position -= $cigar_count;
} elsif( $cigar_type eq "G" || $cigar_type eq "D" || $cigar_type eq "X") {
$alignment_position += $cigar_count;
}
}
}
return $mapper;
}
=head2 get_Slice
Arg[1] : -none-
Example : $slice = $genomic_align->get_Slice();
Description: creates and returns a Bio::EnsEMBL::Slice which corresponds to
this Bio::EnsEMBL::Compara::GenomicAlign
Returntype : Bio::EnsEMBL::Slice object
Exceptions : return -undef- if slice cannot be created (this is likely to
happen if the Registry is misconfigured)
Status : Stable
=cut
sub get_Slice {
my ($self) = @_;
my $slice = $self->dnafrag->slice;
return undef if (!defined($slice));
$slice = $slice->sub_Slice(
$self->dnafrag_start,
$self->dnafrag_end,
$self->dnafrag_strand
);
return $slice;
}
=head2 restrict
Arg[1] : int start
Arg[1] : int end
Example : my $genomic_align = $genomic_align->restrict(10, 20);
Description: restrict (trim) this GenomicAlign to the start and end
positions (in alignment coordinates). If no trimming is
required, the original object is returned instead.
Returntype : Bio::EnsEMBL::Compara::GenomicAlign object
Exceptions :
Status : At risk
=cut
sub restrict {
my ($self, $start, $end) = @_;
throw("Wrong arguments") if (!$start or !$end);
my $restricted_genomic_align = $self->copy();
delete($restricted_genomic_align->{dbID});
delete($restricted_genomic_align->{genomic_align_block_id});
delete($restricted_genomic_align->{original_sequence});
delete($restricted_genomic_align->{aligned_sequence});
delete($restricted_genomic_align->{cigar_line});
$restricted_genomic_align->{original_dbID} = $self->dbID if ($self->dbID);
my $aligned_seq_length = 0;
#unable to retrieve $self->genomic_align_block->length
if ((!$self->{'genomic_align_block_id'}) or (!$self->genomic_align_block->length)) {
my @cigar = grep {$_} split(/(\d*[GDMXI])/, $self->cigar_line);
while (my $cigar = shift(@cigar)) {
my ($num, $type) = ($cigar =~ /^(\d*)([GDMXI])/);
$num = 1 if ($num eq "");
$aligned_seq_length += $num unless ($type eq "I");
}
} else {
$aligned_seq_length = $self->genomic_align_block->length;
}
my $final_aligned_length = $end - $start + 1;
my $length_of_truncated_seq_at_the_start = $start - 1;
my $length_of_truncated_seq_at_the_end = $aligned_seq_length - $end;
my @cigar = grep {$_} split(/(\d*[GDMXI])/, $self->cigar_line);
## Trim start of cigar_line if needed
if ($length_of_truncated_seq_at_the_start >= 0) {
$aligned_seq_length = 0;
my $original_seq_length = 0;
my $new_cigar_piece = "";
while (my $cigar = shift(@cigar)) {
my ($num, $type) = ($cigar =~ /^(\d*)([GDMXI])/);
$num = 1 if ($num eq "");
if ($type ne "I") {
$aligned_seq_length += $num;
}
if ($aligned_seq_length >= $length_of_truncated_seq_at_the_start) {
my $length = $aligned_seq_length - $length_of_truncated_seq_at_the_start;
if ($length > 1) {
$new_cigar_piece = $length.$type;
} elsif ($length == 1) {
$new_cigar_piece = $type;
}
unshift(@cigar, $new_cigar_piece) if ($new_cigar_piece);
if ($type eq "M" || $type eq "I") {
$original_seq_length += $length_of_truncated_seq_at_the_start - ($aligned_seq_length - $num);
}
last;
}
$original_seq_length += $num if ($type eq "M" || $type eq "I");
}
if ($self->dnafrag_strand == 1) {
$restricted_genomic_align->dnafrag_start($self->dnafrag_start + $original_seq_length);
} else {
$restricted_genomic_align->dnafrag_end($self->dnafrag_end - $original_seq_length);
}
}
my @final_cigar = ();
## Trim end of cigar_line if needed
if ($length_of_truncated_seq_at_the_end >= 0) {
## Truncate all the GenomicAligns
$aligned_seq_length = 0;
my $original_seq_length = 0;
my $new_cigar_piece = "";
while (my $cigar = shift(@cigar)) {
my ($num, $type) = ($cigar =~ /^(\d*)([GDMIX])/);
$num = 1 if ($num eq "");
if ($type ne "I") {
$aligned_seq_length += $num;
}
if ($aligned_seq_length >= $final_aligned_length) {
my $length = $num - $aligned_seq_length + $final_aligned_length;
if ($length > 1) {
$new_cigar_piece = $length.$type;
} elsif ($length == 1) {
$new_cigar_piece = $type;
}
push(@final_cigar, $new_cigar_piece) if ($new_cigar_piece);
if ($type eq "M" || $type eq "I") {
$original_seq_length += $length;
}
last;
} else {
push(@final_cigar, $cigar);
}
$original_seq_length += $num if ($type eq "M" || $type eq "I");
}
if ($self->dnafrag_strand == 1) {
$restricted_genomic_align->dnafrag_end($restricted_genomic_align->dnafrag_start + $original_seq_length - 1);
} else {
$restricted_genomic_align->dnafrag_start($restricted_genomic_align->dnafrag_end - $original_seq_length + 1);
}
} else {
@final_cigar = @cigar;
}
## Save genomic_align's cigar_line
$restricted_genomic_align->aligned_sequence(0);
$restricted_genomic_align->cigar_line(join("", @final_cigar));
return $restricted_genomic_align;
}
1;