Bio::EnsEMBL::DBSQL
SequenceAdaptor
Toolbar
Summary
Bio::EnsEMBL::DBSQL::SequenceAdaptor - produce sequence strings from locations
Package variables
No package variables defined.
Included modules
Inherit
Synopsis
my $sa = $registry->get_adaptor( 'Human', 'Core', 'Sequence' );
my $dna =
${ $sa->fetch_by_Slice_start_end_strand( $slice, 1, 1000, -1 ) };
Description
An adaptor for the retrieval of DNA sequence from the EnsEMBL database
Methods
Methods description
Description: DEPRECATED use fetch_by_Slice_start_end_strand instead |
Arg [1] : Bio::EnsEMBL::Slice slice The slice from which you want the sequence Arg [2] : (optional) int startBasePair The start base pair relative to the start of the slice. Negative values or values greater than the length of the slice are fine. default = 1 Arg [3] : (optional) int endBasePair The end base pair relative to the start of the slice. Negative values or values greater than the length of the slice are fine, but the end must be greater than or equal to the start count from 1 default = the length of the slice Arg [4] : (optional) int strand 1, -1 default = 1 Example : $dna = $seq_adptr->fetch_by_Slice_start_end_strand($slice, 1, 1000, -1); Description: retrieves from db the sequence for this slice uses AssemblyMapper to find the assembly Returntype : string Exceptions : endBasePair should be less or equal to length of slice Caller : Bio::EnsEMBL::Slice::seq(), Slice::subseq() Status : Stable |
Description: DEPRECATED use fetch_by_Slice_start_end_strand() instead. |
Arg [1] : none Example : my $sa = $db_adaptor->get_SequenceAdaptor(); Description: Constructor. Calls superclass constructor and initialises internal cache structure. Returntype : Bio::EnsEMBL:DBSQL::SequenceAdaptor Exceptions : none Caller : DBAdaptor::get_SequenceAdaptor Status : Stable |
Arg [1] : int $seq_region_id the id of the sequence region this dna will be associated with. Arg [2] : string $sequence the dna sequence to be stored in the database. Note that the sequence passed in will be converted to uppercase. Example : $seq_adaptor->store(11, 'ACTGGGTACCAAACAAACACAACA'); Description: stores a dna sequence in the databases dna table and returns the database identifier for the new record. Returntype : none Exceptions : throw if the database insert fails Caller : sequence loading scripts Status : Stable |
Methods code
sub _fetch_seq
{ my $self = shift;
my $seq_region_id = shift;
my $start = shift;
my $length = shift;
my $cache = $self->{'seq_cache'};
if($length < $SEQ_CACHE_MAX) {
my $chunk_min = ($start-1) >> $SEQ_CHUNK_PWR;
my $chunk_max = ($start + $length - 1) >> $SEQ_CHUNK_PWR;
my $entire_seq = undef;
for(my $i = $chunk_min; $i <= $chunk_max; $i++) {
if($cache->{"$seq_region_id:$i"}) {
$entire_seq .= $cache->{"$seq_region_id:$i"};
} else {
my $sth =
$self->prepare( "SELECT SUBSTRING(d.sequence, ?, ?) "
. "FROM dna d "
. "WHERE d.seq_region_id = ?" );
my $tmp_seq;
my $min = ($i << $SEQ_CHUNK_PWR) + 1;
$sth->bind_param( 1, $min, SQL_INTEGER );
$sth->bind_param( 2, 1 << $SEQ_CHUNK_PWR, SQL_INTEGER );
$sth->bind_param( 3, $seq_region_id, SQL_INTEGER );
$sth->execute();
$sth->bind_columns(\$tmp_seq);
$sth->fetch();
$sth->finish();
$entire_seq .= uc($tmp_seq);
$cache->{"$seq_region_id:$i"} = $tmp_seq;
}
}
my $min = ( $chunk_min << $SEQ_CHUNK_PWR ) + 1;
my $max = ( $chunk_max + 1 ) << $SEQ_CHUNK_PWR;
my $seq = substr( $entire_seq, $start - $min, $length );
return\$ seq;
} else {
my $sth =
$self->prepare( "SELECT SUBSTRING(d.sequence, ?, ?) "
. "FROM dna d "
. "WHERE d.seq_region_id = ?" );
my $tmp_seq;
$sth->bind_param( 1, $start, SQL_INTEGER );
$sth->bind_param( 2, $length, SQL_INTEGER );
$sth->bind_param( 3, $seq_region_id, SQL_INTEGER );
$sth->execute();
$sth->bind_columns(\$tmp_seq);
$sth->fetch();
$sth->finish();
$tmp_seq = uc($tmp_seq);
return\$ tmp_seq;
} } |
sub fetch_by_RawContig_start_end_strand
{ deprecate('Use fetch_by_Slice_start_end_strand instead.');
fetch_by_Slice_start_end_strand(@_);
}
1; } |
sub fetch_by_Slice_start_end_strand
{ my ( $self, $slice, $start, $end, $strand ) = @_;
if(!ref($slice) || !$slice->isa("Bio::EnsEMBL::Slice")) {
throw("Slice argument is required.");
}
$start = 1 if(!defined($start));
$end = $slice->end() - $slice->start() + 1 if(!defined($end));
throw("Start must be less than or equal to end.") if($start > $end);
$strand ||= 1;
my $right_expand = $end - $slice->length(); my $left_expand = 1 - $start;
if($right_expand || $left_expand) {
$slice = $slice->expand($left_expand, $right_expand);
}
my $slice_adaptor = $slice->adaptor();
my @symproj=@{$slice_adaptor->fetch_normalized_slice_projection($slice)};
if(@symproj == 0) {
throw('Could not retrieve normalized Slices. Database contains ' .
'incorrect assembly_exception information.');
}
if(@symproj != 1 || $symproj[0]->[2] != $slice) {
my $seq;
foreach my $segment (@symproj) {
my $symlink_slice = $segment->[2];
$seq .= ${$self->fetch_by_Slice_start_end_strand($symlink_slice,
1,undef,1)};
}
if($strand == -1) {
reverse_comp(\$seq);
}
return\$ seq;
}
my $csa = $self->db->get_CoordSystemAdaptor();
my $seqlevel = $csa->fetch_sequence_level();
my @projection=@{$slice->project($seqlevel->name(), $seqlevel->version())};
my $seq = '';
my $total = 0;
my $tmp_seq;
foreach my $segment (@projection) {
my ($start, $end, $seq_slice) = @$segment;
my $gap = $start - $total - 1;
if($gap) {
$seq .= 'N' x $gap;
}
my $seq_region_id = $slice_adaptor->get_seq_region_id($seq_slice);
$tmp_seq = ${$self->_fetch_seq($seq_region_id,
$seq_slice->start, $seq_slice->length())};
if($seq_slice->strand == -1) {
reverse_comp(\$tmp_seq);
}
$seq .= $tmp_seq;
$total = $end;
}
my $gap = $slice->length - $total;
if($gap) {
$seq .= 'N' x $gap;
}
if(length($seq) != $slice->length()) {
$seq .= 'N' x ($slice->length() - length($seq));
}
reverse_comp(\$seq) if($strand == -1);
return\$ seq; } |
sub fetch_by_assembly_location
{ my ( $self, $chrStart, $chrEnd,
$strand, $chrName, $assemblyType ) = @_;
deprecate('Use fetch_by_Slice_start_end_strand() instead');
my $csa = $self->db->get_CoordSystem();
my $top_cs = @{$csa->fetch_all};
my $slice_adaptor = $self->db->get_SliceAdaptor();
my $slice = $slice_adaptor->fetch_by_region($top_cs->name(), $chrName,
$chrStart, $chrEnd,
$strand, $top_cs->version);
return $self->fetch_by_Slice_start_end_strand($slice,1, $slice->length,1); } |
sub new
{ my $caller = shift;
my $class = ref($caller) || $caller;
my $self = $class->SUPER::new(@_);
my %seq_cache;
tie(%seq_cache, 'Bio::EnsEMBL::Utils::Cache', $SEQ_CACHE_SZ);
$self->{'seq_cache'} =\% seq_cache;
return $self; } |
sub store
{ my ($self, $seq_region_id, $sequence) = @_;
if(!$seq_region_id) {
throw('seq_region_id is required');
}
$sequence = uc($sequence);
my $statement =
$self->prepare("INSERT INTO dna(seq_region_id, sequence) VALUES(?,?)");
$statement->bind_param(1,$seq_region_id,SQL_INTEGER);
$statement->bind_param(2,$sequence,SQL_LONGVARCHAR);
$statement->execute();
$statement->finish();
return; } |
General documentation
Copyright (c) 1999-2009 The European Bioinformatics Institute and
Genome Research Limited. All rights reserved.
This software is distributed under a modified Apache license.
For license details, please see
/info/about/code_licence.html