[elephant-devel] db gc

Gábor Melis mega at hotpop.com
Sun Mar 13 20:01:26 UTC 2005


On Thursday 03 March 2005 21:10, Ben wrote:
> it appears that maybe i'm storing objects incorrectly.  perhaps the
> right way to do this is to store objects as OIDs without classes, and
> then have a separate OID -> class table.  that way change-class can
> work correctly.  it depends on if you think change-class should update
> the DB or not, though.  (mental note to self: if one implements this,
> one should make sure the instance cache code does the right thing
> e.g. check the class before handing back a cached instance!)

This patch does what you describe except for the mental not which I do not 
understand. I thought my full text indices were big because the class name is 
stored in each reference to the persistent object. Turns out I was mistaken, 
but here it is anyway. The gc had to be modified a bit, too.

Gábor
-------------- next part --------------
A non-text attachment was scrubbed...
Name: persistent-serialization.patch
Type: text/x-diff
Size: 4445 bytes
Desc: not available
URL: <https://mailman.common-lisp.net/pipermail/elephant-devel/attachments/20050313/7b1ec1db/attachment.patch>
-------------- next part --------------
;;; -*- Mode: Lisp; Syntax: ANSI-Common-Lisp; Base: 10 -*-

;;; TODO:
;;;
;;; * read db linearly and record the object graph in memory, or just
;;; cache it when it's too big to hold
;;;
;;; * different classes for the same oid?
;;;
;;; * incremental gc

(in-package "ELEPHANT")

(eval-when (:compile-toplevel :load-toplevel :execute)
  (defparameter *debug-gc* nil))

(defmacro when-debug-gc (&body body)
  (when *debug-gc*
    `(progn
       , at body)))

(defmacro debug-gc (&rest args)
  (when *debug-gc*
    `(format t , at args)))

(defun mark-oids (buf-str marker)
  "Read oids from BUF-STR as if it was deserialized, but skip
irrelevant data and be fast. Call MARKER with controller oid and
class."
  (declare (optimize (speed 3) (safety 0))
	   (type (or null buffer-stream) buf-str))
  (labels
      ((%mark-oids (bs)
	 (declare (optimize (speed 3) (safety 0))
		  (type buffer-stream bs))
	 (let ((tag (buffer-read-byte bs)))
	   (declare (type foreign-char tag))
           (debug-gc "buffer=~S~%tag=~S~%" bs tag)
	   (cond
	     ((= tag +fixnum+)
	      (buffer-skip bs 4))
	     ((= tag +nil+) nil)
	     ((or (= tag +ucs1-symbol+)
                  (= tag +ucs2-symbol+)
                  (= tag +ucs4-symbol+))
              (buffer-skip bs (buffer-read-fixnum bs))
              (%mark-oids bs))
	     ((or (= tag +ucs1-string+)
                  (= tag +ucs2-string+)
                  (= tag +ucs4-string+))
	      (buffer-skip bs (buffer-read-fixnum bs)))
	     ((= tag +persistent+)
	      (funcall marker *store-controller*
                       (buffer-read-fixnum bs)
                       (really-deserialize bs :recursivep t)))
	     ((= tag +single-float+)
	      (buffer-skip bs 4))
	     ((= tag +double-float+)
	      (buffer-skip bs 8))
	     ((= tag +char+)
	      (buffer-skip bs 4))
	     ((or (= tag +ucs1-pathname+)
                  (= tag +ucs2-pathname+)
                  (= tag +ucs4-pathname+))
              (buffer-skip bs (buffer-read-fixnum bs)))
	     ((or (= tag +positive-bignum+)
                  (= tag +negative-bignum+))
	      (buffer-skip bs (buffer-read-fixnum bs)))
	     ((= tag +rational+)
	      (%mark-oids bs)
              (%mark-oids bs))
	     ((= tag +cons+)
	      (let* ((id (buffer-read-fixnum bs))
		     (maybe-cons (gethash id *circularity-hash*)))
		(unless maybe-cons
                  (setf (gethash id *circularity-hash*) t)
                  (%mark-oids bs)
                  (%mark-oids bs))))
	     ((= tag +hash-table+)
	      (let* ((id (buffer-read-fixnum bs))
		     (maybe-hash (gethash id *circularity-hash*)))
		(unless maybe-hash
                  ;; test, rehash-size, rehash-threshold
                  (%mark-oids bs)
                  (%mark-oids bs)
                  (%mark-oids bs)
                  (setf (gethash id *circularity-hash*) t)
                  (loop for i fixnum from 0
                     below (really-deserialize bs :recursivep t)
                     do
                     ;; key, value
                     (%mark-oids bs)
                     (%mark-oids bs)))))
             ((= tag +object+)
              (let* ((id (buffer-read-fixnum bs))
                     (maybe-o (gethash id *circularity-hash*)))
                (unless maybe-o
                  ;; class
                  (%mark-oids bs)
                  (setf (gethash id *circularity-hash*) t)
                  (loop for i fixnum from 0
                     below (really-deserialize bs :recursivep t)
                     do
                     ;; slot, value
                     (%mark-oids bs)
                     (%mark-oids bs)))))
	     ((= tag +array+)
	      (let* ((id (buffer-read-fixnum bs))
		     (maybe-array (gethash id *circularity-hash*)))
		(unless maybe-array
                  (let ((flags (buffer-read-byte bs))
                        (total-size 1))
                    (loop for i fixnum from 0 below (buffer-read-int bs) do
                         (setf total-size (* total-size (buffer-read-int bs))))
                    ;; has fill pointer?
                    (when (/= 0 (logand +fill-pointer-p+ flags))
                      (buffer-read-int bs))
                    (setf (gethash id *circularity-hash*) t)
                    (loop for i fixnum from 0 below total-size
                       do (%mark-oids bs))))))
             (t (error "mark-oids fubar!"))))))
    (etypecase buf-str
      (null (return-from mark-oids nil))
      (buffer-stream
       (setq *lisp-obj-id* 0)
       (clrhash *circularity-hash*)
       (%mark-oids buf-str)))))

(defmacro with-marking-deserialize ((marker) &body body)
  "Execute BODY in an environment where calls to DESERIALIZE are
hijacked and end up as MARK-OIDS calls that call MARKER for each
oid/class encountered."
  (let ((buf (gensym)))
    `(let ((*deserialize-fn* #'(lambda (,buf) (mark-oids ,buf ,marker))))
       , at body)))

(defparameter *persistent-effective-slot-definition-class*
  (find-class 'persistent-effective-slot-definition))

(defun walk (oid class)
  "Read persistent object slots and btree key/value pairs to force
deserialization."
  (setq class (find-class class))
  (flet ((walk-slot (slot-name)
           (with-buffer-streams (key-buf value-buf)
             (buffer-write-int oid key-buf)
             (serialize slot-name key-buf)
             (let ((buf (db-get-key-buffered
                         (controller-db *store-controller*)
                         key-buf value-buf)))
               (when buf
                 (deserialize buf))))))
    (when (subtypep class 'persistent)
      (debug-gc "Walking object:~S (~S) ~%" oid class)
      (loop for slot-definition in (class-slots class)
         when (eq (class-of slot-definition)
                  *persistent-effective-slot-definition-class*)
         do (debug-gc "Walking object slot:~S ~S~%" oid
                      (slot-definition-name slot-definition))
         (walk-slot (slot-definition-name slot-definition)))))
  (when (subtypep class 'btree)
    (debug-gc "Walking btree:~S~%" oid)
    ;; FIXME: make-instance should be avoided here
    (map-btree (lambda (key value)
                 (declare (ignore key value))
                 (debug-gc "Walked btree kv:~S ~S~%" key value))
               (make-instance 'btree :from-oid oid))))

(defmacro with-db-cursor ((name value) &body body)
  `(let ((,name ,value))
     (unwind-protect (progn , at body)
       (db-cursor-close ,name))))

(defun db-cursor-move (db-cursor &rest flags)
  "Small wrapper for DB-CURSOR-MOVE-BUFFERED."
  (with-buffer-streams (key-buf value-buf)
    (apply #'db-cursor-move-buffered db-cursor key-buf value-buf flags)))

(defun gc-btree (btree live-oids &key oid-in-value)
  "Remove all entries from BTREE belonging to oids not in
LIVE-OIDS. Read the oid from the key or the value an entry according
to OID-IN-VALUE."
  (let ((n-visited 0)
        (n-deleted 0))
    ;; db-cursor-delete must be enclosed in a transaction, else we got
    ;; a rather generic berkeley db error
    (with-transaction (:degree-2 t :txn-nosync t :dirty-read t)
      (with-db-cursor (db-cursor (db-cursor btree))
        (loop for (key value) = (multiple-value-list
                                 (db-cursor-move db-cursor :next t))
           while key
           do (let ((oid (buffer-read-int (if oid-in-value value key))))
                (when-debug-gc
                  (let ((k (deserialize key)))
                    (if (gethash oid live-oids)
                        (debug-gc "Keeping:~S ~S~%" oid k)
                        (debug-gc "GCing:~S ~S~%" oid k)))
                  (force-output))
                (incf n-visited)
                (unless (gethash oid live-oids)
                  (incf n-deleted)
                  (db-cursor-delete db-cursor))))))
    (debug-gc "~A/~A~%" n-visited n-deleted)))

(defun elephant-gc ()
  "Remove unreferenced \(garbage) objects from the db. This needs to
be run offline, i.e. with no other db operations running including
open transactions."
  (unwind-protect
       (let ((sc *store-controller*)
             (processed-oids (make-hash-table))
             (current-oids (make-hash-table))
             (new-oids (make-hash-table)))
         (flet ((marker (controller oid class)
                  (declare (ignore controller))
                  (unless (or (gethash oid processed-oids)
                              (gethash oid current-oids))
                    (debug-gc "marking oid:~S~%" oid)
                    (setf (gethash oid new-oids) class))))
           ;; Let's mark live objects.
           (with-marking-deserialize (#'marker)
             ;; mark the root
             (setf (gethash -1 current-oids)
                   (class-name (class-of (controller-root sc))))
             (loop
                while (< 0 (hash-table-count current-oids)) do
                ;; walk objects for current oids
                (maphash (lambda (oid class)
                           (debug-gc "processing oid:~S(~S)~%" oid class)
                           (walk oid class)
                           (setf (gethash oid processed-oids) class))
                         current-oids)
                ;; move NEW-OIDS to CURRENT-OIDS
                (setf current-oids new-oids
                      new-oids (make-hash-table))))
           ;; PROCESSED-OIDS now contains the oids of all reachable
           ;; objects, remove what's not in it
           (when-debug-gc
             (maphash (lambda (oid class)
                        (debug-gc "~S ~S~%" oid class))
                      processed-oids))
           (time (gc-btree (controller-db sc) processed-oids))
           (time (gc-btree (controller-btrees sc) processed-oids))
           ;; If an inexed btree becomes garbage its indices need to
           ;; be cleaned up.
           (time (gc-btree (controller-indices sc) processed-oids
                           :oid-in-value t))))
    ;; make-instance (used in WALK for btrees) calls modify the
    ;; instance cache but the cached data is wrong since mark-oids
    ;; skips a lot of things
    (clrhash (instance-cache *store-controller*))))


More information about the elephant-devel mailing list