Raw content of Bio::DB::FileCache # POD documentation - main docs before the code # # =head1 NAME Bio::DB::FileCache - In file cache for BioSeq objects =head1 SYNOPSIS $cachedb = Bio::DB::FileCache->new($real_db); # # $real_db is a Bio::DB::RandomAccessI database # $seq = $cachedb->get_Seq_by_id('ROA1_HUMAN'); # # $seq is a Bio::Seq object # # more control provided with named-parameter form $cachedb = Bio::DB::FileCache->new( -seqdb => $real_db, -file => $path, -keep => $flag, ); =head1 DESCRIPTION This is a disk cache system which saves the objects returned by Bio::DB::RandomAccessI on disk. The disk cache grows without limit, while the process is running, but is automatically unlinked at process termination unless the -keep flag is set. This module requires DB_File and Storable. =head1 CONTACT Lincoln Stein =head2 Reporting Bugs Report bugs to the Bioperl bug tracking system to help us keep track the bugs and their resolution. Bug reports can be submitted via email or the web: bioperl-bugs@bio.perl.org http://bugzilla.bioperl.org/ =head1 APPENDIX The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _ =cut # Let the code begin... package Bio::DB::FileCache; use DB_File; use Storable qw(freeze thaw); use Fcntl qw(O_CREAT O_RDWR O_RDONLY); use File::Temp 'tmpnam'; use vars qw(@ISA); use strict; use Bio::Root::Root; @ISA = qw(Bio::Root::Root Bio::DB::SeqI); use Bio::DB::SeqI; use Bio::Seq::RichSeq; use Bio::Location::Split; use Bio::Location::Fuzzy; use Bio::Seq; use Bio::SeqFeature::Generic; use Bio::Species; use Bio::Annotation::Collection; =head2 new Title : new Usage : $db = Bio::DB::FileCache->new( -seqdb => $db, # Bio::DB::RandomAccessI database -file => $path, # path to index file -keep => $flag, # don't unlink index file ) Function: creates a new on-disk cache Returns : a Bio::DB::RandomAccessI database Args : as above Throws : "Must be a randomaccess database" exception "Could not open primary index file" exception If no index file is specified, will create a temporary file in your system's temporary file directory. The name of this temporary file can be retrieved using file_name(). =cut sub new { my ($class,@args) = @_; my $self = Bio::Root::Root->new(); bless $self,$class; my ($seqdb,$file_name,$keep) = $self->_rearrange([qw(SEQDB FILE KEEP)],@args); if( !defined $seqdb || !ref $seqdb || !$seqdb->isa('Bio::DB::RandomAccessI') ) { $self->throw("Must be a randomaccess database not a [$seqdb]"); } $self->seqdb($seqdb); $file_name ||= tmpnam(); $self->file_name($file_name); $self->keep($keep); $self->_open_database($file_name); return $self; } =head2 get_Seq_by_id Title : get_Seq_by_id Usage : $seq = $db->get_Seq_by_id('ROA1_HUMAN') Function: Gets a Bio::Seq object by its name Returns : a Bio::Seq object Args : the id (as a string) of a sequence Throws : "id does not exist" exception =cut sub get_Seq_by_id{ my ($self,$id) = @_; # look in the cache first my $obj = $self->_get('id' => $id); return $obj if defined $obj; # get object from seqdb $obj = $self->seqdb->get_Seq_by_id($id); $self->_store('id' => $id, $obj); return $obj; } =head2 get_Seq_by_acc Title : get_Seq_by_acc Usage : $seq = $db->get_Seq_by_acc('X77802'); Function: Gets a Bio::Seq object by accession number Returns : A Bio::Seq object Args : accession number (as a string) Throws : "acc does not exist" exception =cut sub get_Seq_by_acc{ my ($self,$acc) = @_; # look in the cache first my $obj = $self->_get('acc' => $acc); return $obj if defined $obj; # get object from seqdb $obj = $self->seqdb->get_Seq_by_acc($acc); $self->_store('acc' => $acc, $obj); return $obj; } =head2 seqdb Title : seqdb Usage : $seqdb = $db->seqdb([$seqdb]) Function: gets/sets the Bio::DB::RandomAccessI database Returns : a Bio::DB::RandomAccessI database Args : new sequence database (optional) Throws : nothing =cut sub seqdb { my ($self, $seqdb) = @_; if ($seqdb) { $self->{'seqdb'} = $seqdb; } else { return $self->{'seqdb'}; } } =head2 file_name Title : file_name Usage : $path = $db->file_name([$file_name]) Function: gets/sets the name of the cache file Returns : a path Args : new cache file name (optional) Throws : nothing It probably isn't useful to set the cache file name after you've opened it. =cut #' sub file_name { my $self = shift; my $d = $self->{file_name}; $self->{file_name} = shift if @_; $d; } =head2 keep Title : keep Usage : $keep = $db->keep([$flag]) Function: gets/sets the value of the "keep" flag Returns : current value Args : new value (optional) Throws : nothing The keep flag will cause the index file to be unlinked when the process exits. Since on some operating systems (Unix, OS/2) the unlinking occurs during the new() call immediately after opening the file, it probably isn't safe to change this value. =cut sub keep { my $self = shift; my $d = $self->{keep}; $self->{keep} = shift if @_; $d; } =head2 db Title : db Usage : $db->db Function: returns tied hash to index database Returns : a Berkeley DB tied hashref Args : none Throws : nothing =cut sub db { shift->{db} } =head2 flush Title : flush Usage : $db->flush Function: flushes the cache Returns : nothing Args : none Throws : nothing =cut sub flush { my $db = shift->db or return; %{$db} = (); } sub _get { my $self = shift; my ($type,$id) = @_; my $serialized = $self->db->{"${type}_${id}"}; my $obj = thaw($serialized); $obj; } sub _store { my $self = shift; my ($type,$id,$obj) = @_; my $serialized = freeze($obj); $self->db->{"${type}_${id}"} = $serialized; } =head2 get_Seq_by_version Title : get_Seq_by_version Usage : $seq = $db->get_Seq_by_version('X77802.1'); Function: Gets a Bio::Seq object by sequence version Returns : A Bio::Seq object Args : accession.version (as a string) Throws : "acc.version does not exist" exception =cut sub get_Seq_by_version{ my ($self,@args) = @_; $self->throw("Not implemented it"); } sub DESTROY { my $self = shift; unlink $self->file_name unless $self->keep; } sub _open_database { my $self = shift; my $file = shift; my $flags = O_CREAT|O_RDWR; my %db; tie(%db,'DB_File',$file,$flags,0666,$DB_BTREE) or $self->throw("Could not open primary index file"); $self->{db} = \%db; unlink $file unless $self->keep; } ## End of Package 1;