Raw content of Bio::DB::GFF::Segment
=head1 NAME
Bio::DB::GFF::Segment -- Simple DNA segment object
=head1 SYNOPSIS
See L.
=head1 DESCRIPTION
Bio::DB::GFF::Segment provides the basic representation of a range of
DNA contained in a GFF database. It is the base class from which the
Bio::DB::GFF::RelSegment and Bio::DB::GFF::Feature classes are
derived.
Generally, you will not create or manipulate Bio::DB::GFF::Segment
objects directly, but use those that are returned by the Bio::DB::GFF
module.
=cut
package Bio::DB::GFF::Segment;
use strict;
use Bio::Root::Root;
use Bio::Annotation::Collection;
use Bio::RangeI;
use Bio::Das::SegmentI;
use Bio::SeqI;
use vars qw(@ISA);
@ISA = qw(Bio::Root::Root Bio::RangeI Bio::SeqI Bio::Das::SegmentI);
use overload
'""' => 'asString',
eq => 'equals',
fallback => 1;
=head1 API
The remainder of this document describes the API for
Bio::DB::GFF::Segment.
=cut
=head2 new
Title : new
Usage : $s = Bio::DB::GFF::Segment->new(@args)
Function: create a new segment
Returns : a new Bio::DB::GFF::Segment object
Args : see below
Status : Public
This method creates a new Bio::DB::GFF::Segment object. Generally
this is called automatically by the Bio::DB::GFF module and
derivatives.
There are five positional arguments:
$factory a Bio::DB::GFF::Adaptor to use for database access
$sourceseq ID of the source sequence
$sourceclass class of the source sequence
$start start of the desired segment relative to source sequence
$stop stop of the desired segment relative to source sequence
=cut
sub new {
my $class = shift;
my ($factory,$segclass,$segname,$start,$stop) = @_;
$segclass = $segname->class if ref($segname) && $segname->can('class');
$segclass ||= 'Sequence';
$factory or $class->throw("->new(): provide a factory argument");
$class = ref $class if ref $class;
return bless { factory => $factory,
sourceseq => $segname,
class => $segclass,
start => $start,
stop => $stop,
strand => 0,
},$class;
}
# read-only accessors
=head2 factory
Title : factory
Usage : $s->factory
Function: get the factory object
Returns : a Bio::DB::GFF::Adaptor
Args : none
Status : Public
This is a read-only accessor for the Bio::DB::GFF::Adaptor object used
to create the segment.
=cut
sub factory { shift->{factory} }
# start, stop, length
=head2 start
Title : start
Usage : $s->start
Function: start of segment
Returns : integer
Args : none
Status : Public
This is a read-only accessor for the start of the segment.
=cut
sub start { shift->{start} }
=head2 end
Title : end
Usage : $s->end
Function: end of segment
Returns : integer
Args : none
Status : Public
This is a read-only accessor for the end of the segment.
=cut
sub end { shift->{stop} }
=head2 stop
Title : stop
Usage : $s->stop
Function: stop of segment
Returns : integer
Args : none
Status : Public
This is an alias for end(), provided for AcePerl compatibility.
=cut
*stop = \&end;
=head2 length
Title : length
Usage : $s->length
Function: length of segment
Returns : integer
Args : none
Status : Public
Returns the length of the segment. Always a positive number.
=cut
sub length { abs($_[0]->{start} - $_[0]->{stop})+1 }
=head2 strand
Title : strand
Usage : $s->strand
Function: strand of segment
Returns : +1,0,-1
Args : none
Status : Public
Returns the strand on which the segment resides, either +1, 0 or -1.
=cut
sub strand {
my $self = shift;
0;
}
=head2 low
Title : low
Usage : $s->low
Function: return lower coordinate
Returns : lower coordinate
Args : none
Status : Public
Returns the lower coordinate, either start or end.
=cut
sub low {
my $self = shift;
my ($start,$stop) = ($self->start,$self->stop);
return $start < $stop ? $start : $stop;
}
*abs_low = \&low;
=head2 high
Title : high
Usage : $s->high
Function: return higher coordinate
Returns : higher coordinate
Args : none
Status : Public
Returns the higher coordinate, either start or end.
=cut
sub high {
my $self = shift;
my ($start,$stop) = ($self->start,$self->stop);
return $start > $stop ? $start : $stop;
}
*abs_high = \&high;
=head2 sourceseq
Title : sourceseq
Usage : $s->sourceseq
Function: get the segment source
Returns : a string
Args : none
Status : Public
Returns the name of the source sequence for this segment.
=cut
sub sourceseq { shift->{sourceseq} }
=head2 class
Title : class
Usage : $s->class([$newclass])
Function: get the source sequence class
Returns : a string
Args : new class (optional)
Status : Public
Gets or sets the class for the source sequence for this segment.
=cut
sub class {
my $self = shift;
my $d = $self->{class};
$self->{class} = shift if @_;
$d;
}
=head2 subseq
Title : subseq
Usage : $s->subseq($start,$stop)
Function: generate a subsequence
Returns : a Bio::DB::GFF::Segment object
Args : start and end of subsequence
Status : Public
This method generates a new segment from the start and end positions
given in the arguments. If stop E start, then the strand is reversed.
=cut
sub subseq {
my $self = shift;
my ($newstart,$newstop) = @_;
my ($refseq,$start,$stop,$class) = ($self->{sourceseq},
$self->{start},$self->{stop},
$self->class);
# We deliberately force subseq to return objects of type RelSegment
# Otherwise, when we get a subsequence from a Feature object,
# its method and source go along for the ride, which is incorrect.
my $new = $self->new_from_segment($self);
if ($start <= $stop) {
@{$new}{qw(start stop)} = ($start + $newstart - 1, $start + $newstop - 1);
} else {
@{$new}{qw(start stop)} = ($start - ($newstart - 1), $start - ($newstop - 1)),
}
$new;
}
=head2 seq
Title : seq
Usage : $s->seq
Function: get the sequence string for this segment
Returns : a string
Args : none
Status : Public
Returns the sequence for this segment as a simple string. (-) strand
segments are automatically reverse complemented
This method is also called dna() and protein() for backward
compatibility with AceDB.
=cut
sub seq {
my $self = shift;
my ($ref,$class,$start,$stop,$strand)
= @{$self}{qw(sourceseq class start stop strand)};
# ($start,$stop) = ($stop,$start) if $strand eq '-';
$self->factory->dna($ref,$start,$stop,$class);
}
*protein = *dna = \&seq;
=head2 primary_seq
Title : primary_seq
Usage : $s->primary_seq
Function: returns a Bio::PrimarySeqI compatible object
Returns : a Bio::PrimarySeqI object
Args : none
Status : Public
This is for compatibility with BioPerl's separation of SeqI
from PrimarySeqI. It just returns itself.
=cut
#'
sub primary_seq { shift }
=head2 type
Title : type
Usage : $s->type
Function: return the string "feature"
Returns : the string "feature"
Args : none
Status : Public
This is for future sequence ontology-compatibility and
represents the default type of a feature on the genome
=cut
sub type { "feature" }
=head2 equals
Title : equals
Usage : $s->equals($d)
Function: segment equality
Returns : true, if two segments are equal
Args : another segment
Status : Public
Returns true if the two segments have the same source sequence, start and stop.
=cut
sub equals {
my $self = shift;
my $peer = shift;
return unless defined $peer;
return $self->asString eq $peer unless ref($peer) && $peer->isa('Bio::DB::GFF::Segment');
return $self->{start} eq $peer->{start}
&& $self->{stop} eq $peer->{stop}
&& $self->{sourceseq} eq $peer->{sourceseq};
}
=head2 asString
Title : asString
Usage : $s->asString
Function: human-readable string for segment
Returns : a string
Args : none
Status : Public
Returns a human-readable string representing this sequence. Format
is:
sourceseq/start,stop
=cut
sub asString {
my $self = shift;
my $label = $self->refseq;
my $start = $self->start;
my $stop = $self->stop;
return "$label:$start,$stop";
}
=head2 clone
Title : clone
Usage : $copy = $s->clone
Function: make a copy of this segment
Returns : a Bio::DB::GFF::Segment object
Args : none
Status : Public
This method creates a copy of the segment and returns it.
=cut
# deep copy of the thing
sub clone {
my $self = shift;
my %h = %$self;
return bless \%h,ref($self);
}
=head2 error
Title : error
Usage : $error = $s->error([$new_error])
Function: get or set the last error
Returns : a string
Args : an error message (optional)
Status : Public
In case of a fault, this method can be used to obtain the last error
message. Internally it is called to set the error message.
=cut
sub error {
my $self = shift;
my $g = $self->{error};
$self->{error} = shift if @_;
$g;
}
=head1 Relative Addressing Methods
The following methods are provided for compatibility with
Bio::DB::GFF::RelSegment, which provides relative addressing
functions.
=head2 abs_start
Title : abs_start
Usage : $s->abs_start
Function: the absolute start of the segment
Returns : an integer
Args : none
Status : Public
This is an alias to start(), and provided for API compatibility with
Bio::DB::GFF::RelSegment.
=cut
*abs_start = \&start;
=head2 abs_end
Title : abs_end
Usage : $s->abs_end
Function: the absolute stop of the segment
Returns : an integer
Args : none
Status : Public
This is an alias to stop(), and provided for API compatibility with
Bio::DB::GFF::RelSegment.
=cut
*abs_stop = \&stop;
*abs_end = \&stop;
=head2 abs_strand
Title : abs_strand
Usage : $s->abs_strand
Function: the absolute strand of the segment
Returns : +1,0,-1
Args : none
Status : Public
This is an alias to strand(), and provided for API compatibility with
Bio::DB::GFF::RelSegment.
=cut
sub abs_strand {
my $self = shift;
return $self->abs_end <=> $self->abs_start;
}
=head2 abs_ref
Title : abs_ref
Usage : $s->abs_ref
Function: the reference sequence for this segment
Returns : a string
Args : none
Status : Public
This is an alias to sourceseq(), and is here to provide API
compatibility with Bio::DB::GFF::RelSegment.
=cut
*abs_ref = \&sourceseq;
=head2 refseq
Title : refseq
Usage : $s->refseq
Function: get or set the reference sequence
Returns : a string
Args : none
Status : Public
Examine or change the reference sequence. This is an alias to
sourceseq(), provided here for API compatibility with
Bio::DB::GFF::RelSegment.
=cut
*refseq = \&sourceseq;
=head2 ref
Title : ref
Usage : $s->refseq
Function: get or set the reference sequence
Returns : a string
Args : none
Status : Public
An alias for refseq()
=cut
sub ref { shift->refseq(@_) }
=head2 seq_id
Title : seq_id
Usage : $ref = $s->seq_id
Function: get the reference sequence in a LocationI-compatible way
Returns : a string
Args : none
Status : Public
An alias for refseq() but only allows reading.
=cut
sub seq_id { shift->refseq }
=head2 truncated
Title : truncated
Usage : $truncated = $s->truncated
Function: Flag indicating that the segment was truncated during creation
Returns : A boolean flag
Args : none
Status : Public
This indicates that the sequence was truncated during creation. The
returned flag is undef if no truncation occured. If truncation did
occur, the flag is actually an array ref in which the first element is
true if truncation occurred on the left, and the second element
occurred if truncation occurred on the right.
=cut
sub truncated {
my $self = shift;
my $hash = $self->{truncated} or return;
CORE::ref($hash) eq 'HASH' or return [1,1]; # paranoia -- not that this would ever happen ;-)
return [$hash->{start},$hash->{stop}];
}
=head2 Bio::RangeI Methods
The following Bio::RangeI methods are supported:
overlaps(), contains(), equals(),intersection(),union(),overlap_extent()
=cut
sub overlaps {
my $self = shift;
my($other,$so) = @_;
if ($other->isa('Bio::DB::GFF::RelSegment')) {
return if $self->abs_ref ne $other->abs_ref;
}
$self->SUPER::overlaps(@_);
}
sub contains {
my $self = shift;
my($other,$so) = @_;
if ($other->isa('Bio::DB::GFF::RelSegment')) {
return if $self->abs_ref ne $other->abs_ref;
}
$self->SUPER::contains(@_);
}
#sub equals {
# my $self = shift;
# my($other,$so) = @_;
# if ($other->isa('Bio::DB::GFF::RelSegment')) {
# return if $self->abs_ref ne $other->abs_ref;
# }
# $self->SUPER::equals(@_);
#}
sub intersection {
my $self = shift;
my($other,$so) = @_;
if ($other->isa('Bio::DB::GFF::RelSegment')) {
return if $self->abs_ref ne $other->abs_ref;
}
$self->SUPER::intersection(@_);
}
sub union {
my $self = shift;
my($other) = @_;
if ($other->isa('Bio::DB::GFF::RelSegment')) {
return if $self->abs_ref ne $other->abs_ref;
}
$self->SUPER::union(@_);
}
sub overlap_extent {
my $self = shift;
my($other) = @_;
if ($other->isa('Bio::DB::GFF::RelSegment')) {
return if $self->abs_ref ne $other->abs_ref;
}
$self->SUPER::overlap_extent(@_);
}
=head2 Bio::SeqI implementation
=cut
=head2 primary_id
Title : primary_id
Usage : $unique_implementation_key = $obj->primary_id;
Function: Returns the unique id for this object in this
implementation. This allows implementations to manage their
own object ids in a way the implementaiton can control
clients can expect one id to map to one object.
For sequences with no accession number, this method should
return a stringified memory location.
Returns : A string
Args : None
Status : Virtual
=cut
sub primary_id {
my ($obj,$value) = @_;
if( defined $value) {
$obj->{'primary_id'} = $value;
}
if( ! exists $obj->{'primary_id'} ) {
return "$obj";
}
return $obj->{'primary_id'};
}
=head2 display_name
Title : display_name
Usage : $id = $obj->display_name or $obj->display_name($newid);
Function: Gets or sets the display id, also known as the common name of
the Seq object.
The semantics of this is that it is the most likely string
to be used as an identifier of the sequence, and likely to
have "human" readability. The id is equivalent to the LOCUS
field of the GenBank/EMBL databanks and the ID field of the
Swissprot/sptrembl database. In fasta format, the >(\S+) is
presumed to be the id, though some people overload the id
to embed other information. Bioperl does not use any
embedded information in the ID field, and people are
encouraged to use other mechanisms (accession field for
example, or extending the sequence object) to solve this.
Notice that $seq->id() maps to this function, mainly for
legacy/convenience issues.
Returns : A string
Args : None or a new id
Note, this used to be called display_id(), and this name is preserved for
backward compatibility. The default is to return the seq_id().
=cut
sub display_name { shift->seq_id }
*display_id = \&display_name;
=head2 accession_number
Title : accession_number
Usage : $unique_biological_key = $obj->accession_number;
Function: Returns the unique biological id for a sequence, commonly
called the accession_number. For sequences from established
databases, the implementors should try to use the correct
accession number. Notice that primary_id() provides the
unique id for the implemetation, allowing multiple objects
to have the same accession number in a particular implementation.
For sequences with no accession number, this method should return
"unknown".
Returns : A string
Args : None
=cut
sub accession_number {
return 'unknown';
}
=head2 alphabet
Title : alphabet
Usage : if( $obj->alphabet eq 'dna' ) { /Do Something/ }
Function: Returns the type of sequence being one of
'dna', 'rna' or 'protein'. This is case sensitive.
This is not called because this would cause
upgrade problems from the 0.5 and earlier Seq objects.
Returns : a string either 'dna','rna','protein'. NB - the object must
make a call of the type - if there is no type specified it
has to guess.
Args : none
Status : Virtual
=cut
sub alphabet{
return 'dna'; # no way this will be anything other than dna!
}
=head2 desc
Title : desc
Usage : $seqobj->desc($string) or $seqobj->desc()
Function: Sets or gets the description of the sequence
Example :
Returns : The description
Args : The description or none
=cut
sub desc { shift->asString }
=head2 species
Title : species
Usage : $species = $seq->species() or $seq->species($species)
Function: Gets or sets the species
Example :
Returns : Bio::Species object
Args : None or Bio::Species object
See L for more information
=cut
sub species {
my ($self, $species) = @_;
if ($species) {
$self->{'species'} = $species;
} else {
return $self->{'species'};
}
}
=head2 annotation
Title : annotation
Usage : $ann = $seq->annotation or $seq->annotation($annotation)
Function: Gets or sets the annotation
Example :
Returns : Bio::Annotation object
Args : None or Bio::Annotation object
See L for more information
=cut
sub annotation {
my ($obj,$value) = @_;
if( defined $value || ! defined $obj->{'annotation'} ) {
$value = new Bio::Annotation::Collection() unless defined $value;
$obj->{'annotation'} = $value;
}
return $obj->{'annotation'};
}
=head2 is_circular
Title : is_circular
Usage : if( $obj->is_circular) { /Do Something/ }
Function: Returns true if the molecule is circular
Returns : Boolean value
Args : none
=cut
sub is_circular{
return 0;
}
1;
__END__
=head1 BUGS
Report them please.
=head1 SEE ALSO
L
=head1 AUTHOR
Lincoln Stein Elstein@cshl.orgE.
Copyright (c) 2001 Cold Spring Harbor Laboratory.
This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself.
=head1 CONTRIBUTORS
Jason Stajich Ejason@bioperl.orgE.
=cut