Raw content of Bio::DB::InMemoryCache
# POD documentation - main docs before the code
#
#
=head1 NAME
Bio::DB::InMemoryCache - Abstract interface for a sequence database
=head1 SYNOPSIS
$cachedb = Bio::DB::InMemoryCache->new( -seqdb => $real_db,
-number => 1000);
#
# get a database object somehow using a concrete class
#
$seq = $cachedb->get_Seq_by_id('ROA1_HUMAN');
#
# $seq is a Bio::Seq object
#
=head1 DESCRIPTION
This is a memory cache system which saves the objects returned by Bio::DB::RandomAccessI in
memory to a hard limit of sequences.
=head1 CONTACT
Ewan Birney
=head2 Reporting Bugs
Report bugs to the Bioperl bug tracking system to help us keep track
the bugs and their resolution. Bug reports can be submitted via email
or the web:
bioperl-bugs@bio.perl.org
http://bugzilla.bioperl.org/
=head1 APPENDIX
The rest of the documentation details each of the object
methods. Internal methods are usually preceded with a _
=cut
# Let the code begin...
package Bio::DB::InMemoryCache;
use Bio::DB::SeqI;
use vars qw(@ISA);
use strict;
use Bio::Root::Root;
use Bio::Seq;
@ISA = qw(Bio::Root::Root Bio::DB::SeqI);
sub new {
my ($class,@args) = @_;
my $self = Bio::Root::Root->new();
bless $self,$class;
my ($seqdb,$number,$agr) = $self->_rearrange([qw(SEQDB NUMBER AGRESSION)],@args);
if( !defined $seqdb || !ref $seqdb || !$seqdb->isa('Bio::DB::RandomAccessI') ) {
$self->throw("Must be a randomaccess database not a [$seqdb]");
}
if( !defined $number ) {
$number = 1000;
}
$self->seqdb($seqdb);
$self->number($number);
$self->agr($agr);
# we consider acc as the primary id here
$self->{'_cache_number_hash'} = {};
$self->{'_cache_id_hash'} = {};
$self->{'_cache_acc_hash'} = {};
$self->{'_cache_number'} = 1;
return $self;
}
=head2 get_Seq_by_id
Title : get_Seq_by_id
Usage : $seq = $db->get_Seq_by_id('ROA1_HUMAN')
Function: Gets a Bio::Seq object by its name
Returns : a Bio::Seq object
Args : the id (as a string) of a sequence
Throws : "id does not exist" exception
=cut
sub get_Seq_by_id{
my ($self,$id) = @_;
if( defined $self->{'_cache_id_hash'}->{$id} ) {
my $acc = $self->{'_cache_id_hash'}->{$id};
my $seq = $self->{'_cache_acc_hash'}->{$acc};
$self->{'_cache_number_hash'}->{$seq->accession} = $self->{'_cache_number'}++;
return $seq;
} else {
return $self->_load_Seq('id',$id);
}
}
=head2 get_Seq_by_acc
Title : get_Seq_by_acc
Usage : $seq = $db->get_Seq_by_acc('X77802');
Function: Gets a Bio::Seq object by accession number
Returns : A Bio::Seq object
Args : accession number (as a string)
Throws : "acc does not exist" exception
=cut
sub get_Seq_by_acc{
my ($self,$acc) = @_;
#print STDERR "In cache get for $acc\n";
if( defined $self->{'_cache_acc_hash'}->{$acc} ) {
#print STDERR "Returning cached $acc\n";
my $seq = $self->{'_cache_acc_hash'}->{$acc};
$self->{'_cache_number_hash'}->{$seq->accession} = $self->{'_cache_number'}++;
return $seq;
} else {
return $self->_load_Seq('acc',$acc);
}
}
sub number {
my ($self, $number) = @_;
if ($number) {
$self->{'number'} = $number;
} else {
return $self->{'number'};
}
}
sub seqdb {
my ($self, $seqdb) = @_;
if ($seqdb) {
$self->{'seqdb'} = $seqdb;
} else {
return $self->{'seqdb'};
}
}
sub agr {
my ($self, $agr) = @_;
if ($agr) {
$self->{'agr'} = $agr;
} else {
return $self->{'agr'};
}
}
sub _load_Seq {
my ($self,$type,$id) = @_;
my $seq;
if( $type eq 'id') {
$seq = $self->seqdb->get_Seq_by_id($id);
}elsif ( $type eq 'acc' ) {
$seq = $self->seqdb->get_Seq_by_acc($id);
} else {
$self->throw("Bad internal error. Don't understand $type");
}
if( $self->agr() ) {
#print STDERR "Pulling out into memory\n";
my $newseq = Bio::Seq->new( -display_id => $seq->display_id,
-accession_number => $seq->accession,
-seq => $seq->seq,
-desc => $seq->desc,
);
if( $self->agr() == 1 ) {
foreach my $sf ( $seq->top_SeqFeatures() ) {
$newseq->add_SeqFeature($sf);
}
$newseq->annotation($seq->annotation);
}
$seq = $newseq;
}
if( $self->_number_free < 1 ) {
# remove the latest thing from the hash
my @accs = sort { $self->{'_cache_number_hash'}->{$a} <=> $self->{'_cache_number_hash'}->{$b} } keys %{$self->{'_cache_number_hash'}};
my $acc = shift @accs;
# remove this guy
my $seq = $self->{'_cache_acc_hash'}->{$acc};
delete $self->{'_cache_number_hash'}->{$acc};
delete $self->{'_cache_id_hash'}->{$seq->id};
delete $self->{'_cache_acc_hash'}->{$acc};
}
# up the number, register this sequence into the hash.
$self->{'_cache_id_hash'}->{$seq->id} = $seq->accession;
$self->{'_cache_acc_hash'}->{$seq->accession} = $seq;
$self->{'_cache_number_hash'}->{$seq->accession} = $self->{'_cache_number'}++;
return $seq;
}
sub _number_free {
my $self = shift;
return $self->number - scalar(keys %{$self->{'_cache_number_hash'}});
}
=head2 get_Seq_by_version
Title : get_Seq_by_version
Usage : $seq = $db->get_Seq_by_version('X77802.1');
Function: Gets a Bio::Seq object by sequence version
Returns : A Bio::Seq object
Args : accession.version (as a string)
Throws : "acc.version does not exist" exception
=cut
sub get_Seq_by_version{
my ($self,@args) = @_;
$self->throw("Not implemented it");
}
## End of Package
1;