[elephant-devel] db gc
Gábor Melis
mega at hotpop.com
Sun Mar 13 20:01:26 UTC 2005
On Thursday 03 March 2005 21:10, Ben wrote:
> it appears that maybe i'm storing objects incorrectly. perhaps the
> right way to do this is to store objects as OIDs without classes, and
> then have a separate OID -> class table. that way change-class can
> work correctly. it depends on if you think change-class should update
> the DB or not, though. (mental note to self: if one implements this,
> one should make sure the instance cache code does the right thing
> e.g. check the class before handing back a cached instance!)
This patch does what you describe except for the mental not which I do not
understand. I thought my full text indices were big because the class name is
stored in each reference to the persistent object. Turns out I was mistaken,
but here it is anyway. The gc had to be modified a bit, too.
Gábor
-------------- next part --------------
A non-text attachment was scrubbed...
Name: persistent-serialization.patch
Type: text/x-diff
Size: 4445 bytes
Desc: not available
URL: <https://mailman.common-lisp.net/pipermail/elephant-devel/attachments/20050313/7b1ec1db/attachment.patch>
-------------- next part --------------
;;; -*- Mode: Lisp; Syntax: ANSI-Common-Lisp; Base: 10 -*-
;;; TODO:
;;;
;;; * read db linearly and record the object graph in memory, or just
;;; cache it when it's too big to hold
;;;
;;; * different classes for the same oid?
;;;
;;; * incremental gc
(in-package "ELEPHANT")
(eval-when (:compile-toplevel :load-toplevel :execute)
(defparameter *debug-gc* nil))
(defmacro when-debug-gc (&body body)
(when *debug-gc*
`(progn
, at body)))
(defmacro debug-gc (&rest args)
(when *debug-gc*
`(format t , at args)))
(defun mark-oids (buf-str marker)
"Read oids from BUF-STR as if it was deserialized, but skip
irrelevant data and be fast. Call MARKER with controller oid and
class."
(declare (optimize (speed 3) (safety 0))
(type (or null buffer-stream) buf-str))
(labels
((%mark-oids (bs)
(declare (optimize (speed 3) (safety 0))
(type buffer-stream bs))
(let ((tag (buffer-read-byte bs)))
(declare (type foreign-char tag))
(debug-gc "buffer=~S~%tag=~S~%" bs tag)
(cond
((= tag +fixnum+)
(buffer-skip bs 4))
((= tag +nil+) nil)
((or (= tag +ucs1-symbol+)
(= tag +ucs2-symbol+)
(= tag +ucs4-symbol+))
(buffer-skip bs (buffer-read-fixnum bs))
(%mark-oids bs))
((or (= tag +ucs1-string+)
(= tag +ucs2-string+)
(= tag +ucs4-string+))
(buffer-skip bs (buffer-read-fixnum bs)))
((= tag +persistent+)
(funcall marker *store-controller*
(buffer-read-fixnum bs)
(really-deserialize bs :recursivep t)))
((= tag +single-float+)
(buffer-skip bs 4))
((= tag +double-float+)
(buffer-skip bs 8))
((= tag +char+)
(buffer-skip bs 4))
((or (= tag +ucs1-pathname+)
(= tag +ucs2-pathname+)
(= tag +ucs4-pathname+))
(buffer-skip bs (buffer-read-fixnum bs)))
((or (= tag +positive-bignum+)
(= tag +negative-bignum+))
(buffer-skip bs (buffer-read-fixnum bs)))
((= tag +rational+)
(%mark-oids bs)
(%mark-oids bs))
((= tag +cons+)
(let* ((id (buffer-read-fixnum bs))
(maybe-cons (gethash id *circularity-hash*)))
(unless maybe-cons
(setf (gethash id *circularity-hash*) t)
(%mark-oids bs)
(%mark-oids bs))))
((= tag +hash-table+)
(let* ((id (buffer-read-fixnum bs))
(maybe-hash (gethash id *circularity-hash*)))
(unless maybe-hash
;; test, rehash-size, rehash-threshold
(%mark-oids bs)
(%mark-oids bs)
(%mark-oids bs)
(setf (gethash id *circularity-hash*) t)
(loop for i fixnum from 0
below (really-deserialize bs :recursivep t)
do
;; key, value
(%mark-oids bs)
(%mark-oids bs)))))
((= tag +object+)
(let* ((id (buffer-read-fixnum bs))
(maybe-o (gethash id *circularity-hash*)))
(unless maybe-o
;; class
(%mark-oids bs)
(setf (gethash id *circularity-hash*) t)
(loop for i fixnum from 0
below (really-deserialize bs :recursivep t)
do
;; slot, value
(%mark-oids bs)
(%mark-oids bs)))))
((= tag +array+)
(let* ((id (buffer-read-fixnum bs))
(maybe-array (gethash id *circularity-hash*)))
(unless maybe-array
(let ((flags (buffer-read-byte bs))
(total-size 1))
(loop for i fixnum from 0 below (buffer-read-int bs) do
(setf total-size (* total-size (buffer-read-int bs))))
;; has fill pointer?
(when (/= 0 (logand +fill-pointer-p+ flags))
(buffer-read-int bs))
(setf (gethash id *circularity-hash*) t)
(loop for i fixnum from 0 below total-size
do (%mark-oids bs))))))
(t (error "mark-oids fubar!"))))))
(etypecase buf-str
(null (return-from mark-oids nil))
(buffer-stream
(setq *lisp-obj-id* 0)
(clrhash *circularity-hash*)
(%mark-oids buf-str)))))
(defmacro with-marking-deserialize ((marker) &body body)
"Execute BODY in an environment where calls to DESERIALIZE are
hijacked and end up as MARK-OIDS calls that call MARKER for each
oid/class encountered."
(let ((buf (gensym)))
`(let ((*deserialize-fn* #'(lambda (,buf) (mark-oids ,buf ,marker))))
, at body)))
(defparameter *persistent-effective-slot-definition-class*
(find-class 'persistent-effective-slot-definition))
(defun walk (oid class)
"Read persistent object slots and btree key/value pairs to force
deserialization."
(setq class (find-class class))
(flet ((walk-slot (slot-name)
(with-buffer-streams (key-buf value-buf)
(buffer-write-int oid key-buf)
(serialize slot-name key-buf)
(let ((buf (db-get-key-buffered
(controller-db *store-controller*)
key-buf value-buf)))
(when buf
(deserialize buf))))))
(when (subtypep class 'persistent)
(debug-gc "Walking object:~S (~S) ~%" oid class)
(loop for slot-definition in (class-slots class)
when (eq (class-of slot-definition)
*persistent-effective-slot-definition-class*)
do (debug-gc "Walking object slot:~S ~S~%" oid
(slot-definition-name slot-definition))
(walk-slot (slot-definition-name slot-definition)))))
(when (subtypep class 'btree)
(debug-gc "Walking btree:~S~%" oid)
;; FIXME: make-instance should be avoided here
(map-btree (lambda (key value)
(declare (ignore key value))
(debug-gc "Walked btree kv:~S ~S~%" key value))
(make-instance 'btree :from-oid oid))))
(defmacro with-db-cursor ((name value) &body body)
`(let ((,name ,value))
(unwind-protect (progn , at body)
(db-cursor-close ,name))))
(defun db-cursor-move (db-cursor &rest flags)
"Small wrapper for DB-CURSOR-MOVE-BUFFERED."
(with-buffer-streams (key-buf value-buf)
(apply #'db-cursor-move-buffered db-cursor key-buf value-buf flags)))
(defun gc-btree (btree live-oids &key oid-in-value)
"Remove all entries from BTREE belonging to oids not in
LIVE-OIDS. Read the oid from the key or the value an entry according
to OID-IN-VALUE."
(let ((n-visited 0)
(n-deleted 0))
;; db-cursor-delete must be enclosed in a transaction, else we got
;; a rather generic berkeley db error
(with-transaction (:degree-2 t :txn-nosync t :dirty-read t)
(with-db-cursor (db-cursor (db-cursor btree))
(loop for (key value) = (multiple-value-list
(db-cursor-move db-cursor :next t))
while key
do (let ((oid (buffer-read-int (if oid-in-value value key))))
(when-debug-gc
(let ((k (deserialize key)))
(if (gethash oid live-oids)
(debug-gc "Keeping:~S ~S~%" oid k)
(debug-gc "GCing:~S ~S~%" oid k)))
(force-output))
(incf n-visited)
(unless (gethash oid live-oids)
(incf n-deleted)
(db-cursor-delete db-cursor))))))
(debug-gc "~A/~A~%" n-visited n-deleted)))
(defun elephant-gc ()
"Remove unreferenced \(garbage) objects from the db. This needs to
be run offline, i.e. with no other db operations running including
open transactions."
(unwind-protect
(let ((sc *store-controller*)
(processed-oids (make-hash-table))
(current-oids (make-hash-table))
(new-oids (make-hash-table)))
(flet ((marker (controller oid class)
(declare (ignore controller))
(unless (or (gethash oid processed-oids)
(gethash oid current-oids))
(debug-gc "marking oid:~S~%" oid)
(setf (gethash oid new-oids) class))))
;; Let's mark live objects.
(with-marking-deserialize (#'marker)
;; mark the root
(setf (gethash -1 current-oids)
(class-name (class-of (controller-root sc))))
(loop
while (< 0 (hash-table-count current-oids)) do
;; walk objects for current oids
(maphash (lambda (oid class)
(debug-gc "processing oid:~S(~S)~%" oid class)
(walk oid class)
(setf (gethash oid processed-oids) class))
current-oids)
;; move NEW-OIDS to CURRENT-OIDS
(setf current-oids new-oids
new-oids (make-hash-table))))
;; PROCESSED-OIDS now contains the oids of all reachable
;; objects, remove what's not in it
(when-debug-gc
(maphash (lambda (oid class)
(debug-gc "~S ~S~%" oid class))
processed-oids))
(time (gc-btree (controller-db sc) processed-oids))
(time (gc-btree (controller-btrees sc) processed-oids))
;; If an inexed btree becomes garbage its indices need to
;; be cleaned up.
(time (gc-btree (controller-indices sc) processed-oids
:oid-in-value t))))
;; make-instance (used in WALK for btrees) calls modify the
;; instance cache but the cached data is wrong since mark-oids
;; skips a lot of things
(clrhash (instance-cache *store-controller*))))
More information about the elephant-devel
mailing list