[rucksack-cvs] CVS rucksack

alemmens alemmens at common-lisp.net
Tue Feb 19 22:44:06 UTC 2008


Update of /project/rucksack/cvsroot/rucksack
In directory clnet:/tmp/cvs-serv14617

Modified Files:
	index.lisp objects.lisp p-btrees.lisp package.lisp 
	rucksack.asd 
Log Message:
Version 0.1.17: add some list functions and replace persistent lists
by persistent btrees for non-unique slot indexes.

--- /project/rucksack/cvsroot/rucksack/index.lisp	2008/01/22 15:59:24	1.11
+++ /project/rucksack/cvsroot/rucksack/index.lisp	2008/02/19 22:44:05	1.12
@@ -1,4 +1,4 @@
-;; $Id: index.lisp,v 1.11 2008/01/22 15:59:24 alemmens Exp $
+;; $Id: index.lisp,v 1.12 2008/02/19 22:44:05 alemmens Exp $
 
 (in-package :rucksack)
 
@@ -104,8 +104,13 @@
               (funcall function equal value)
             ;; We have a persistent list of values: call FUNCTION for
             ;; each element of that list.
-            (p-mapc (lambda (elt) (funcall function equal elt))
-                    value))))
+            (etypecase value
+              ((or null persistent-cons)
+               (p-mapc (lambda (elt) (funcall function equal elt))
+                       value))
+              (persistent-object-set
+               (map-set-btree value
+                              (lambda (elt) (funcall function equal elt))))))))
     (apply #'map-btree index function :order order args)))
 
 
@@ -193,16 +198,27 @@
 (eval-when (:compile-toplevel :load-toplevel :execute)
 
   (define-index-spec :number-index
-                     '(btree :key< < :value= p-eql))
+                     '(btree :key< <
+                             :value= p-eql
+                             :value-type persistent-object))
 
   (define-index-spec :string-index
-                     '(btree :key< string< :value= p-eql :key-type string))
+                     '(btree :key< string<
+                             :value= p-eql
+                             :value-type persistent-object
+                             :key-type string))
 
   (define-index-spec :symbol-index
-                     '(btree :key< string< :value= p-eql :key-type symbol))
+                     '(btree :key< string<
+                             :value= p-eql
+                             :value-type persistent-object
+                             :key-type symbol))
 
   (define-index-spec :case-insensitive-string-index
-                     '(btree :key< string-lessp :value= p-eql :key-type string))
+                     '(btree :key< string-lessp
+                             :value= p-eql
+                             :value-type persistent-object
+                             :key-type string))
 
   (define-index-spec :trimmed-string-index
                      ;; Like :STRING-INDEX, but with whitespace trimmed left
@@ -210,4 +226,5 @@
                      '(btree :key< string<
                              :key-key trim-whitespace
                              :value= p-eql
+                             :value-type persistent-object
                              :key-type string)))
--- /project/rucksack/cvsroot/rucksack/objects.lisp	2008/02/11 12:47:52	1.21
+++ /project/rucksack/cvsroot/rucksack/objects.lisp	2008/02/19 22:44:06	1.22
@@ -1,4 +1,4 @@
-;; $Id: objects.lisp,v 1.21 2008/02/11 12:47:52 alemmens Exp $
+;; $Id: objects.lisp,v 1.22 2008/02/19 22:44:06 alemmens Exp $
 
 (in-package :rucksack)
 
@@ -181,6 +181,9 @@
 (defmethod p-car ((cons persistent-cons))
   (persistent-data-read #'car cons))
 
+(defmethod p-car ((x (eql nil)))
+  nil)
+
 (defmethod (setf p-car) (value (cons persistent-cons))
   (persistent-data-write (lambda (new-value contents)
                            (setf (car contents) new-value))
@@ -190,6 +193,9 @@
 (defmethod p-cdr ((cons persistent-cons))
   (persistent-data-read #'cdr cons))
 
+(defmethod p-cdr ((x (eql nil)))
+  nil)
+
 (defmethod (setf p-cdr) (value (cons persistent-cons))
   (persistent-data-write (lambda (new-value contents)
                            (setf (cdr contents) new-value))
@@ -212,6 +218,30 @@
 ;; Other functions from chapter 14 of the spec.
 ;;
 
+(defun p-caar (object)
+  "The persistent equivalent of CAAR."
+  (p-car (p-car object)))
+
+(defun p-cadr (object)
+  "The persistent equivalenet of CADR."
+  (p-car (p-cdr object)))
+
+(defun p-cdar (object)
+  "The persistent equivalent of CDAR."
+  (p-cdr (p-car object)))
+
+(defun p-cddr (object)
+  "The persistent equivalent of CDDR."
+  (p-cdr (p-cdr object)))
+
+
+(defmethod p-consp ((object persistent-cons))
+  t)
+
+(defmethod p-consp ((object t))
+  nil)
+
+
 (defmethod p-endp ((object (eql nil)))
   t)
 
@@ -223,8 +253,19 @@
          :datum object
          :expected-type '(or null persistent-cons)))
 
-(defmethod p-cddr ((cons persistent-cons))
-  (p-cdr (p-cdr cons)))
+
+(defun p-last (list &optional (n 1))
+  "Returns the last persistent cons cell of a persistent list (or
+NIL if the list is empty)."
+  (unless (= n 1)
+    ;; DO: Implement this case.
+    (error "The optional argument for P-LAST isn't implemented yet."))
+  (let ((result list)
+        (tail (p-cdr list)))
+    (loop until (p-endp tail)
+          do (shiftf result tail (p-cdr tail)))
+    result))
+
 
 (defun p-mapcar (function list)
   ;; DO: Accept more than one list argument.
--- /project/rucksack/cvsroot/rucksack/p-btrees.lisp	2008/02/11 12:47:52	1.18
+++ /project/rucksack/cvsroot/rucksack/p-btrees.lisp	2008/02/19 22:44:06	1.19
@@ -1,4 +1,4 @@
-;; $Id: p-btrees.lisp,v 1.18 2008/02/11 12:47:52 alemmens Exp $
+;; $Id: p-btrees.lisp,v 1.19 2008/02/19 22:44:06 alemmens Exp $
 
 (in-package :rucksack)
 
@@ -11,6 +11,22 @@
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
 #|
+
+This is a modified version of the in-memory btrees.  We use p-arrays,
+p-conses and persistent-objects.
+
+Basically, a B-tree is a balanced multi-way tree.
+
+The reason for using multi-way trees instead of binary trees is that
+the nodes are expected to be on disk; it would be inefficient to have
+to execute a disk operation for each tree node if it contains only 2
+keys.
+
+The key property of B-trees is that each possible search path has the same
+length, measured in terms of nodes.
+|#
+
+#|
    ;; Btrees
    #:btree
    #:btree-key< #:btree-key<= #:btree-key= #:btree-key>= #:btree-key>
@@ -127,25 +143,41 @@
 ORDER is either :ASCENDING (default) or :DESCENDING."))
 
 
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;;; B-trees
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;
+;; Set btrees
+;;
+;; A 'set btree' is a special kind of btree that's used to implement sets.
+;; With set btrees, the 'value' part of a btree binding is irrelevant, because
+;; all information is in the keys themselves.
+;;
 
-#|
+(defgeneric set-btree-insert (set value)
+  (:documentation "Add a value to a set-btree.  This will modify the
+set-btree."))
 
-This is a modified version of the in-memory btrees.  We use p-arrays,
-p-conses and persistent-objects.
+(defgeneric set-btree-delete (set value &key if-does-not-exist)
+  (:documentation "Removes a value from a set-btree and returns the
+modified set-btree.  If the value is not present in the set, this
+function signals an error if IF-DOES-NOT-EXIST is :ERROR (if
+IF-DOES-NOT-EXIST is :IGNORE, it returns nil)."))
 
-Basically, a B-tree is a balanced multi-way tree.
+(defgeneric set-btree-search (set value &key errorp default-value)
+  (:documentation
+   "Returns VALUE if it is present in the btree-set SET.  Otherwise
+the result depends on the ERRORP option: if ERRORP is true, a
+btree-search-error is signalled; otherwise, DEFAULT-VALUE is
+returned."))
 
-The reason for using multi-way trees instead of binary trees is that
-the nodes are expected to be on disk; it would be inefficient to have
-to execute a disk operation for each tree node if it contains only 2
-keys.
+(defgeneric map-set-btree (set function)
+  (:documentation
+   "Calls a unary function for each value in a btree-set."))
+
+(defgeneric set-btree-empty-p (set)
+  (:documentation "Returns true iff a btree-set is empty."))
+
+(defgeneric set-count (set)
+  (:documentation "Returns the number of values in a btree-set."))
 
-The key property of B-trees is that each possible search path has the same
-length, measured in terms of nodes.
-|#
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;;; Conditions
@@ -213,7 +245,9 @@
                   :reader btree-unique-keys-p
                   :initform t
                   :documentation
-                  "If false, one key can correspond to more than one value.")
+                  "If false, one key can correspond to more than one value.
+In that case, the values are assumed to be objects for which the function
+OBJECT-ID is defined (and returns a unique integer).")
    (key-type :initarg :key-type
              :reader btree-key-type
              :initform t
@@ -323,8 +357,65 @@
   (:metaclass persistent-class))
 
 
+(defmethod initialize-instance :after ((node bnode)
+                                       &key btree &allow-other-keys)
+  (setf (bnode-bindings node) (p-make-array (* 2 (btree-max-node-size btree))
+                                            :initial-element nil)
+        (bnode-nr-bindings node) 0))
+
+
+(defmethod print-object ((node bnode) stream)
+  (print-unreadable-object (node stream :type t :identity t)
+    (format stream "with ~D bindings" (bnode-nr-bindings node))))
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;; Set btrees
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+(defclass set-btree (btree)
+  ()
+  (:default-initargs
+   ;; We use a special bnode class because we don't care about the binding
+   ;; values (so we can optimize them away later).
+   :node-class 'set-bnode
+   ;; We use small nodes, because we expect relatively many sets
+   ;; with only a few elements.
+   :max-node-size 8
+   ;; The keys of a set-btree are unique (otherwise it wouldn't be a set
+   ;; but a bag).
+   :unique-keys-p t)
+  (:metaclass persistent-class)
+  (:documentation "A persistent set of objects, implemented as a btree."))
+
+(defclass set-bnode (bnode)
+  ()
+  (:metaclass persistent-class)
+  (:documentation "A special kind of btree node, used to implement set btrees."))
+
+
+;; Sets of persistent objects are implemented as set-btrees.  They're
+;; used to represent the values of a btree that maps slot values to
+;; one or more persistent objects (i.e. they're used for non-unique
+;; slot indexes). They can also be used separately.
+
+(defclass persistent-object-set (set-btree)
+  ()
+  (:default-initargs
+   ;; For sets of persistent-objects we store the objects as keys,
+   ;; but we use the object-ids to compare keys.
+   :key-key 'object-id)
+  (:metaclass persistent-class)
+  (:documentation "A persistent set of persistent-objects, implemented
+as a btree."))
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Some info functions
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
 ;;
-;; Info functions
+;; Counting keys or values
 ;;
 
 (defmethod btree-nr-keys ((btree btree))
@@ -344,14 +435,71 @@
       (btree-nr-keys btree)
     (let ((result 0))
       (map-btree-keys btree
-                      (lambda (key p-values)
+                      (lambda (key set)
                         (declare (ignore key))
-                        (incf result (p-length p-values))))
+                        (incf result
+                              (etypecase set
+                                (persistent-object-set (set-count set))
+                                (persistent-cons (p-length set))
+                                (null 0)))))
       result)))
 
 ;;
+;; Depth and balance
+;;
+
+(defmethod node-max-depth ((node bnode))
+  (if (bnode-leaf-p node)
+      0
+    (loop for i below (bnode-nr-bindings node)
+          for binding = (node-binding node i)
+          maximize (1+ (node-max-depth (binding-value binding))))))
+
+(defmethod node-min-depth ((node bnode))
+  (if (bnode-leaf-p node)
+      0
+    (loop for i below (bnode-nr-bindings node)
+          for binding = (node-binding node i)
+          minimize (1+ (node-min-depth (binding-value binding))))))
+
+(defmethod btree-depths ((btree btree))
+  (if (slot-value btree 'root)
+      (values (node-min-depth (btree-root btree))
+              (node-max-depth (btree-root btree)))
+    (values 0 0)))
+
+(defmethod btree-balanced-p ((btree btree))
+  (multiple-value-bind (min max)
+      (btree-depths btree)
+    (<= (- max min) 1)))
+
+
+;;
+;; Debugging
+;;
+
+(defun display-node (node)
+  (pprint (node-as-cons node)))
+
+(defun node-as-cons (node &optional (unique-keys t))
+  (loop with leaf-p = (bnode-leaf-p node)
+        for i below (bnode-nr-bindings node)
+        for value = (node-binding-value node i)
+        collect (list (node-binding-key node i)
+                      (if leaf-p
+                          (if unique-keys
+                              value
+                            (unwrap-persistent-list value))
+                        (node-as-cons value)))))
+
+(defun btree-as-cons (btree)
+  (and (slot-value btree 'root)
+       (node-as-cons (btree-root btree) (btree-unique-keys-p btree))))
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;; Bindings
-;; 
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
 (defstruct binding
   key
@@ -396,72 +544,39 @@
       value
     (p-cons value '())))
 
-;;
-;;
-
-(defmethod initialize-instance :after ((node bnode)
-                                       &key btree &allow-other-keys)
-  (setf (bnode-bindings node) (p-make-array (* 2 (btree-max-node-size btree))
-                                            :initial-element nil)
-        (bnode-nr-bindings node) 0))
-
-
-(defmethod print-object ((node bnode) stream)
-  (print-unreadable-object (node stream :type t :identity t)
-    (format stream "with ~D bindings" (bnode-nr-bindings node))))
 
-;;
-;; Debugging
-;;
-
-(defun display-node (node)
-  (pprint (node-as-cons node)))
-
-(defun node-as-cons (node &optional (unique-keys t))
-  (loop with leaf-p = (bnode-leaf-p node)
-        for i below (bnode-nr-bindings node)
-        for value = (node-binding-value node i)
-        collect (list (node-binding-key node i)
-                      (if leaf-p
-                          (if unique-keys
-                              value
-                            (unwrap-persistent-list value))
-                        (node-as-cons value)))))
-
-(defun btree-as-cons (btree)
-  (and (slot-value btree 'root)
-       (node-as-cons (btree-root btree) (btree-unique-keys-p btree))))
-
-
-;;
-;; Depth and balance
-;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; Set btrees and persistent object sets: implementation
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
-(defmethod node-max-depth ((node bnode))
-  (if (bnode-leaf-p node)
-      0
-    (loop for i below (bnode-nr-bindings node)
-          for binding = (node-binding node i)
-          maximize (1+ (node-max-depth (binding-value binding))))))
+(defmethod set-btree-insert ((set set-btree) value)
+  (btree-insert set value nil :if-exists :overwrite))
 
-(defmethod node-min-depth ((node bnode))
-  (if (bnode-leaf-p node)
-      0
-    (loop for i below (bnode-nr-bindings node)
-          for binding = (node-binding node i)
-          minimize (1+ (node-min-depth (binding-value binding))))))
+(defmethod set-btree-delete ((set set-btree) value &key (if-does-not-exist nil))
+  (btree-delete-key set value :if-does-not-exist if-does-not-exist))
 
-(defmethod btree-depths ((btree btree))
-  (if (slot-value btree 'root)
-      (values (node-min-depth (btree-root btree))
-              (node-max-depth (btree-root btree)))
-    (values 0 0)))
+(defmethod set-btree-search ((set set-btree) value &key errorp default-value)
+  (btree-search set value
+                :errorp errorp
+                :default-value default-value))
+
+(defmethod map-set-btree ((set set-btree) function)
+  (map-btree-keys set
+                  (lambda (key value)
+                    (declare (ignore value))
+                    (funcall function key))))
+
+(defmethod set-btree-empty-p ((set set-btree))
+  (or (not (slot-boundp set 'root))
+      (let ((root (slot-value set 'root)))
+        (and (bnode-leaf-p root)
+             (= 0 (bnode-nr-bindings root))))))
 
-(defmethod btree-balanced-p ((btree btree))
-  (multiple-value-bind (min max)
-      (btree-depths btree)
-    (<= (- max min) 1)))
+(defmethod set-count ((set set-btree))
+  (btree-nr-values set))
 
+;; DO: Change the binding functions for SET-BTREES to optimize the values
+;; away.
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;; Search
@@ -829,12 +944,32 @@
                         :key key
                         :value value))))
           ;; For non-unique keys, we ignore the :IF-EXISTS option and
-          ;; just add value to the list of values (unless value is already
+          ;; just add value to the set of values (unless value is already
           ;; there).
-          (unless (p-find value (node-binding-value leaf position)
-                          :test (btree-value= btree))
-            (setf (node-binding-value leaf position)
-                  (p-cons value (node-binding-value leaf position)))))
+          (let ((set (node-binding-value leaf position)))
+            (etypecase set
+              (persistent-object-set
+               (set-btree-insert set value))
+              (persistent-cons
+               (if (eql (btree-value-type btree) 'persistent-object)
+                   ;; The values are persistent objects, so we know we
+                   ;; can put them in a persistent-object-set.  Let's
+                   ;; do that, now we know that there are at least two
+                   ;; objects in the set.
+                   (let ((new-set (make-instance 'persistent-object-set)))
+                     (set-btree-insert new-set (p-car set))
+                     (set-btree-insert new-set value)
+                     (setf (node-binding-value leaf position) new-set))
+                 ;; We don't know anything about the values, so we have to
+                 ;; resort to a persistent list to store the values.  This
+                 ;; will lead to bad performance if few keys map to many
+                 ;; values, but we don't have much choice.
+                 ;; DO: Use set-btrees for other types for which we can come
+                 ;; up with some kind of ordering (like strings, numbers,
+                 ;; etcetera).
+                 (unless (p-find value set :test (btree-value= btree))
+                   (setf (node-binding-value leaf position)
+                         (p-cons value (node-binding-value leaf position)))))))))
        ;; The key doesn't exist yet. Create a new binding and add it to the
        ;; leaf index in the right position.
        (progn
@@ -891,22 +1026,26 @@
              ;; just delete the value from the list of values (unless it's
              ;; not there).
              (flet ((check (x) (funcall (btree-value= btree) x value)))
-               (let ((values (binding-value binding)))
-                 ;; EFFICIENCY: We walk the list twice now, which is not
-                 ;; necessary.  Write a special purpose function for this
-                 ;; instead of just using P-FIND and P-DELETE.
-                 (if (p-find value values :test (btree-value= btree))
-                     (if (null (p-cdr values))
-                         ;; This is the last value in the list: remove the
-                         ;; key.
-                         (btree-delete-key btree key)
-                       ;; There's more than one value in the list: delete the
-                       ;; value that must be deleted and keep the other values.
-                       (setf (node-binding-value node position)
-                             (p-delete-if #'check (binding-value binding)
-                                          :count 1)))
-                   ;; The value is not in the list: forget it.
-                   (forget-it)))))))))
+               (let ((set (binding-value binding)))
+                 (etypecase set

[41 lines skipped]
--- /project/rucksack/cvsroot/rucksack/package.lisp	2008/02/11 12:47:52	1.13
+++ /project/rucksack/cvsroot/rucksack/package.lisp	2008/02/19 22:44:06	1.14
@@ -1,4 +1,4 @@
-;; $Id: package.lisp,v 1.13 2008/02/11 12:47:52 alemmens Exp $
+;; $Id: package.lisp,v 1.14 2008/02/19 22:44:06 alemmens Exp $
 
 #-(or allegro lispworks sbcl openmcl)
   (error "Unsupported implementation: ~A" (lisp-implementation-type))
@@ -32,7 +32,9 @@
    #:object-id
    #:p-cons #:p-array
    #:p-eql
-   #:p-car #:p-cdr #:p-list
+   #:p-car #:p-cdr #:p-list #:p-last
+   #:p-endp #:p-consp
+   #:p-caar #:p-cadr #:p-cdar #:p-cddr
    #:unwrap-persistent-list
    #:p-mapcar #:p-mapc #:p-maplist #:p-mapl
    #:p-member-if
--- /project/rucksack/cvsroot/rucksack/rucksack.asd	2008/02/11 12:47:52	1.18
+++ /project/rucksack/cvsroot/rucksack/rucksack.asd	2008/02/19 22:44:06	1.19
@@ -1,9 +1,9 @@
-;;; $Id: rucksack.asd,v 1.18 2008/02/11 12:47:52 alemmens Exp $
+;;; $Id: rucksack.asd,v 1.19 2008/02/19 22:44:06 alemmens Exp $
 
 (in-package :cl-user)
 
 (asdf:defsystem :rucksack
-  :version "0.1.16"
+  :version "0.1.17"
   :serial t
   :components ((:file "queue")
                (:file "package")




More information about the rucksack-cvs mailing list