BioMart::Dataset::GenomicSequence
DNAAdaptor
Toolbar
Summary
BioMart::Dataset::GenomicSequence::DNAAdaptor.pm
Package variables
Privates (from "my" definitions)
$logger = Log::Log4perl->get_logger(__PACKAGE__)
Included modules
Inherit
Synopsis
Description
Methods
Methods description
Usage : my $dna = BioMart::Dataset::GenomicSequence::DNAAdaptor-> new("seq_name" => $name, "dna_tablename" => $dnatablename, "seq_fieldname" => $seqfieldname, "chunk_name_fieldname" => $chunkfieldname, "chunk_start_fieldname" => $chunk_start_fieldname, "configurator" => $conf, "chunk_size" => 10 );
Description : creates a DNAAdaptor, for retrieving dna sequence. Requires
a name, which maps to a mart sequence database, the name of
the dna table within the database(eg, dna_chunks in ensembl)
, and the names of that tables sequence, chunkname, and
chunk_start fields (eg, sequence, chr_name, chr_start in
ensmbl), a BioMart::Configurator object, and the dna chunk
size.
Returntype : BioMart::Dataset::GenomicSequence::DNAAdaptor object. Meant
for use by the BioMart::Dataset::GenomicSequence object.
Exceptions : Missing or invalid params
Caller : BioMart::Dataset::GenomicSequence |
Usage : $dna->close; Description : closes all DBI resources involved with fetching sequences. To be called by the sequence parser when it has exhausted its resultSet for the given query. Returntype : none Exceptions : none Caller : sequence parser module |
Usage : my $seq = $dna->getSequence($chunk_name, $start, $end); Description : gets the dna sequence at a particular genomic location chunk_name corresponds to the value that would occur in the chunk_name_fieldname field passed to the constructor for DNAAdaptor (eg. the value of chr_name in ensembl). start corresonds to the value in the chunk_start_fieldname field passed to the constructor (eg. the value of chr_start in ensembl). Returntype : scalar $seq Exceptions : none Caller : BioMart::SubSequence::GenomicSequence |
Methods code
sub DESTROY
{ my $self = shift;
$self->close;
}
1; } |
sub _Npad
{ my ($self, $num)= @_;
my $ret = '';
my $i = 0;
while ($i < $num) {
$ret .= 'N';
$i++;
}
return $ret; } |
sub _fetchChunkSubstring
{ my ($self, $chr, $start, $chunkStart, $len) = @_;
my $coord = $start - $chunkStart + 1;
my $sth = $self->get('subSth');
my $sql_statement = $sth->{Statement};
$sql_statement =~ s/\?/$_/ foreach ("\"$coord\"", "\"$len\"", "\"$chunkStart\"", "\"$chr\"");
$logger->info("QUERY SUBSTRING SQL: $sql_statement\;");
$sth->execute($coord, $len, $chunkStart, $chr);
my $ret = $sth->fetchrow;
$sth->finish;
$self->set('subSth', $sth);
return $ret; } |
sub _fetchFullChunk
{ my ($self, $chr, $chunkStart) = @_;
my $sth = $self->get('fullSth');
my $sql_statement = $sth->{Statement};
$sql_statement =~ s/\?/$_/ foreach ("\"$chunkStart\"", "\"$chr\"");
$logger->info("QUERY FULL SQL: $sql_statement\;");
$sth->execute($chunkStart, $chr);
my $ret = $sth->fetchrow;
$sth->finish;
$self->set('fullSth', $sth);
return $ret; } |
sub _fetchResidualSequence
{ my ($self, $chr, $start, $len, $initialSeq) = @_;
my $currentLength = length(${$initialSeq});
my $currentStart = $start + $currentLength;
while ($currentLength < $len) {
my $residual = $len - $currentLength;
my $curr = $self->_fetchSequence($chr, $currentStart, $residual);
my $currLength = length($curr);
last if ($currLength < 1);
${$initialSeq} .= $curr;
$currentLength += $currLength;
$currentStart = $start + $currentLength;
}
}
} |
sub _fetchSequence
{ my ($self, $chr, $start, $len) = @_;
my $chunkSize = $self->getParam(CHUNKSIZE);
my $chunkStart = $start - ( ( $start - 1 ) % $chunkSize );
if ($start == $chunkStart && $len == $chunkSize) {
return $self->_fetchFullChunk($chr, $chunkStart);
}
return $self->_fetchChunkSubstring($chr, $start, $chunkStart, $len); } |
sub _initialize
{ my $self = shift;
my $dbname = $self->getParam(SEQNAME);
my $location=$self->getParam(CONF)->get('location');
$location->openConnection();
my $dbh = $location->dbh();
if ($self->getParam('configurator')->get('location')->databaseType
eq 'oracle'){
$dbh->{LongReadLen} = 2**25;
}
$self->throw("Could not connect to sequence db $dbname ".DBI::errstr."!\n")
unless ($dbh);
my $fullSQL = sprintf(SQLFULL,
$self->getParam(SEQFIELDNAME),
$self->getParam(SEQTABLENAME),
$self->getParam(CHUNKSTARTFIELD),
$self->getParam(CHUNKNAMEFIELD));
my $subSQL;
if ($self->getParam('configurator')->get('location')->databaseType
eq 'oracle'){
$subSQL = sprintf(SQLSUBORACLE,
$self->getParam(SEQFIELDNAME),
$self->getParam(SEQTABLENAME),
$self->getParam(CHUNKSTARTFIELD),
$self->getParam(CHUNKNAMEFIELD));
}
else{
$subSQL = sprintf(SQLSUB,
$self->getParam(SEQFIELDNAME),
$self->getParam(SEQTABLENAME),
$self->getParam(CHUNKSTARTFIELD),
$self->getParam(CHUNKNAMEFIELD));
}
my $fullSth = $dbh->prepare($fullSQL) or
$self->throw("Couldnt prepare fullSQL ".$dbh->errstr."!\n");
my $subSth = $dbh->prepare($subSQL) or
$self->throw("Couldnt prepare subSQL ".$dbh->errstr."!\n");
$self->set('fullSth', $fullSth);
$self->set('subSth', $subSth);
$self->set('dbh', $dbh); } |
sub _new
{ my ($self, @param) = @_;
$self->SUPER::_new(@param);
$self->addParams(TITLES, @param);
$self->checkRequiredParams(TITLES);
$self->attr('fullSth', undef);
$self->attr('subSth', undef);
$self->attr('dbh', undef);
$self->_initialize;
}
} |
sub close
{ my $self = shift;
my $dbh = $self->get('dbh');
if ($dbh) {
my $fullSth = $self->get('fullSth');
my $subSth = $self->get('subSth');
if ($fullSth) {
$fullSth->finish;
}
if ($subSth) {
$subSth->finish;
}
$dbh->disconnect;
} } |
sub getSequence
{ my ($self, $chr, $start, $end) = @_;
my $len = ($end - $start) + 1;
my $ret = $self->_fetchSequence($chr, $start, $len);
my $seqLen = 0;
if ($ret) {
$seqLen = length($ret);
}
unless ($seqLen) {
$logger->info("Padding with Ns");
return $self->_Npad($len);
}
if ($seqLen < $len) {
$self->_fetchResidualSequence($chr, $start, $len,\$ ret);
}
return $ret; } |
sub throw
{ my ($self, $message) = @_;
$self->close;
$self->SUPER::throw($message);
}
} |
General documentation
AUTHOR Darin London, Damian Smedley | Top |