Raw content of Bio::DB::InMemoryCache # POD documentation - main docs before the code # # =head1 NAME Bio::DB::InMemoryCache - Abstract interface for a sequence database =head1 SYNOPSIS $cachedb = Bio::DB::InMemoryCache->new( -seqdb => $real_db, -number => 1000); # # get a database object somehow using a concrete class # $seq = $cachedb->get_Seq_by_id('ROA1_HUMAN'); # # $seq is a Bio::Seq object # =head1 DESCRIPTION This is a memory cache system which saves the objects returned by Bio::DB::RandomAccessI in memory to a hard limit of sequences. =head1 CONTACT Ewan Birney =head2 Reporting Bugs Report bugs to the Bioperl bug tracking system to help us keep track the bugs and their resolution. Bug reports can be submitted via email or the web: bioperl-bugs@bio.perl.org http://bugzilla.bioperl.org/ =head1 APPENDIX The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _ =cut # Let the code begin... package Bio::DB::InMemoryCache; use Bio::DB::SeqI; use vars qw(@ISA); use strict; use Bio::Root::Root; use Bio::Seq; @ISA = qw(Bio::Root::Root Bio::DB::SeqI); sub new { my ($class,@args) = @_; my $self = Bio::Root::Root->new(); bless $self,$class; my ($seqdb,$number,$agr) = $self->_rearrange([qw(SEQDB NUMBER AGRESSION)],@args); if( !defined $seqdb || !ref $seqdb || !$seqdb->isa('Bio::DB::RandomAccessI') ) { $self->throw("Must be a randomaccess database not a [$seqdb]"); } if( !defined $number ) { $number = 1000; } $self->seqdb($seqdb); $self->number($number); $self->agr($agr); # we consider acc as the primary id here $self->{'_cache_number_hash'} = {}; $self->{'_cache_id_hash'} = {}; $self->{'_cache_acc_hash'} = {}; $self->{'_cache_number'} = 1; return $self; } =head2 get_Seq_by_id Title : get_Seq_by_id Usage : $seq = $db->get_Seq_by_id('ROA1_HUMAN') Function: Gets a Bio::Seq object by its name Returns : a Bio::Seq object Args : the id (as a string) of a sequence Throws : "id does not exist" exception =cut sub get_Seq_by_id{ my ($self,$id) = @_; if( defined $self->{'_cache_id_hash'}->{$id} ) { my $acc = $self->{'_cache_id_hash'}->{$id}; my $seq = $self->{'_cache_acc_hash'}->{$acc}; $self->{'_cache_number_hash'}->{$seq->accession} = $self->{'_cache_number'}++; return $seq; } else { return $self->_load_Seq('id',$id); } } =head2 get_Seq_by_acc Title : get_Seq_by_acc Usage : $seq = $db->get_Seq_by_acc('X77802'); Function: Gets a Bio::Seq object by accession number Returns : A Bio::Seq object Args : accession number (as a string) Throws : "acc does not exist" exception =cut sub get_Seq_by_acc{ my ($self,$acc) = @_; #print STDERR "In cache get for $acc\n"; if( defined $self->{'_cache_acc_hash'}->{$acc} ) { #print STDERR "Returning cached $acc\n"; my $seq = $self->{'_cache_acc_hash'}->{$acc}; $self->{'_cache_number_hash'}->{$seq->accession} = $self->{'_cache_number'}++; return $seq; } else { return $self->_load_Seq('acc',$acc); } } sub number { my ($self, $number) = @_; if ($number) { $self->{'number'} = $number; } else { return $self->{'number'}; } } sub seqdb { my ($self, $seqdb) = @_; if ($seqdb) { $self->{'seqdb'} = $seqdb; } else { return $self->{'seqdb'}; } } sub agr { my ($self, $agr) = @_; if ($agr) { $self->{'agr'} = $agr; } else { return $self->{'agr'}; } } sub _load_Seq { my ($self,$type,$id) = @_; my $seq; if( $type eq 'id') { $seq = $self->seqdb->get_Seq_by_id($id); }elsif ( $type eq 'acc' ) { $seq = $self->seqdb->get_Seq_by_acc($id); } else { $self->throw("Bad internal error. Don't understand $type"); } if( $self->agr() ) { #print STDERR "Pulling out into memory\n"; my $newseq = Bio::Seq->new( -display_id => $seq->display_id, -accession_number => $seq->accession, -seq => $seq->seq, -desc => $seq->desc, ); if( $self->agr() == 1 ) { foreach my $sf ( $seq->top_SeqFeatures() ) { $newseq->add_SeqFeature($sf); } $newseq->annotation($seq->annotation); } $seq = $newseq; } if( $self->_number_free < 1 ) { # remove the latest thing from the hash my @accs = sort { $self->{'_cache_number_hash'}->{$a} <=> $self->{'_cache_number_hash'}->{$b} } keys %{$self->{'_cache_number_hash'}}; my $acc = shift @accs; # remove this guy my $seq = $self->{'_cache_acc_hash'}->{$acc}; delete $self->{'_cache_number_hash'}->{$acc}; delete $self->{'_cache_id_hash'}->{$seq->id}; delete $self->{'_cache_acc_hash'}->{$acc}; } # up the number, register this sequence into the hash. $self->{'_cache_id_hash'}->{$seq->id} = $seq->accession; $self->{'_cache_acc_hash'}->{$seq->accession} = $seq; $self->{'_cache_number_hash'}->{$seq->accession} = $self->{'_cache_number'}++; return $seq; } sub _number_free { my $self = shift; return $self->number - scalar(keys %{$self->{'_cache_number_hash'}}); } =head2 get_Seq_by_version Title : get_Seq_by_version Usage : $seq = $db->get_Seq_by_version('X77802.1'); Function: Gets a Bio::Seq object by sequence version Returns : A Bio::Seq object Args : accession.version (as a string) Throws : "acc.version does not exist" exception =cut sub get_Seq_by_version{ my ($self,@args) = @_; $self->throw("Not implemented it"); } ## End of Package 1;