From alemmens at common-lisp.net Sun Feb 3 12:32:21 2008 From: alemmens at common-lisp.net (alemmens) Date: Sun, 3 Feb 2008 07:32:21 -0500 (EST) Subject: [rucksack-cvs] CVS rucksack Message-ID: <20080203123221.1C11D71142@common-lisp.net> Update of /project/rucksack/cvsroot/rucksack In directory clnet:/tmp/cvs-serv14177 Modified Files: cache.lisp done.txt garbage-collector.lisp object-table.lisp objects.lisp rucksack.asd Log Message: Version 0.1.15. Fixed a garbage collector bug reported by Sean Ross. When the garbage collector deletes object ids from the object table (because the objects are dead and we may want to reuse their ids later for other objects), it should also remove that object from the cache. If it doesn't, there's a possibility that the object id will be reused later for a new object and the cache wil still refer to the old in-memory object. --- /project/rucksack/cvsroot/rucksack/cache.lisp 2008/01/31 20:26:08 1.13 +++ /project/rucksack/cvsroot/rucksack/cache.lisp 2008/02/03 12:32:15 1.14 @@ -1,4 +1,4 @@ -;; $Id: cache.lisp,v 1.13 2008/01/31 20:26:08 alemmens Exp $ +;; $Id: cache.lisp,v 1.14 2008/02/03 12:32:15 alemmens Exp $ (in-package :rucksack) @@ -36,6 +36,11 @@ (:documentation "Retrieves the object with the given id from the cache and returns that object.")) +(defgeneric cache-delete-object (object-id cache) + (:documentation "Removes an object-id from the cache and from +the object table, so the object-id can be reused for another object +later.")) + (defgeneric cache-commit (cache) (:documentation "Makes sure that all changes to the cache are written to disk.")) @@ -84,14 +89,15 @@ ;; Clean objects (objects :initarg :objects :reader objects - :documentation "A hash-table (from id to object) + :documentation "A hash-table \(from id to object) containing the youngest committed version of all objects that are -currently kept in memory but are not dirty. ('The youngest version' +currently kept in memory but are not dirty. \('The youngest version' means the version belonging to the youngest committed transaction.)") (queue :initform (make-instance 'queue) :reader queue :documentation "A queue of the ids of all non-dirty objects that are currently in the cache memory. Whenever an object is -retrieved (i.e. read), it's added to the queue.") +retrieved (i.e. read), it's added to the queue. If an object-id is +in this queue, it is not necessarily in the OBJECTS hash-table.") (last-timestamp :initform (get-universal-time) :accessor last-timestamp) (transaction-id-helper :initform -1 @@ -233,7 +239,7 @@ (- (cache-size cache) (cache-count cache))) ;; -;; Create/get/touch +;; Create/get/touch/delete ;; (defmethod cache-create-object (object (cache standard-cache)) @@ -338,6 +344,9 @@ result)))) +(defmethod cache-delete-object (object-id (cache standard-cache)) + (remhash object-id (objects cache))) + ;; ;; Queue operations --- /project/rucksack/cvsroot/rucksack/done.txt 2008/01/31 20:26:08 1.15 +++ /project/rucksack/cvsroot/rucksack/done.txt 2008/02/03 12:32:16 1.16 @@ -1,89 +1,98 @@ +* 2008-02-02 - version 0.1.15 + +Fixed a garbage collector bug reported by Sean Ross. When the garbage +collector deletes object ids from the object table (because the +objects are dead and we may want to reuse their ids later for other +objects), it should also remove that object from the cache. If it +doesn't, there's a possibility that the object id will be reused later +for a new object and the cache wil still refer to the old in-memory +object. + + * 2008-01-31 - version 0.1.14 -- Class and slot indexes now map directly to objects instead of - object-ids. This fixes a bug where the garbage collector forgot - to add all indexed objects to the root set. (Suggested by Sean - Ross.) +Class and slot indexes now map directly to objects instead of +object-ids. This fixes a bug where the garbage collector forgot to +add all indexed objects to the root set. (Suggested by Sean Ross.) -- Increase default cache size to 100,000 objects. +Increase default cache size to 100,000 objects. * 2008-01-23 - version 0.1.13 -- Add Brad Beveridge's basic unit test suite (modified to work - with lisp-unit instead of 5am). +Add Brad Beveridge's basic unit test suite (modified to work with +lisp-unit instead of 5am). -- Add Chris Riesbeck's lisp-unit library to help with creating - unit test suites. +Add Chris Riesbeck's lisp-unit library to help with creating +unit test suites. -- Move all tests to their own directory. +Move all tests to their own directory. -- Add P-NREVERSE and P-POSITION for persistent lists. +Add P-NREVERSE and P-POSITION for persistent lists. -- Fix bugs in P-REPLACE and P-MAPCAR. +Fix bugs in P-REPLACE and P-MAPCAR. * 2008-01-22 - version 0.1.12 -- Use (ARRAY-DIMENSION buffer 0) instead of LENGTH in - LOAD-BUFFER, because we want to ignore the fill pointer - here. Thanks to Sean Ross. +Use (ARRAY-DIMENSION buffer 0) instead of LENGTH in LOAD-BUFFER, +because we want to ignore the fill pointer here. Thanks to Sean Ross. * 2008-01-22 - version 0.1.11 -- Fix bug caused by LEAF-DELETE-KEY. Reported and fixed by - Brad Beveridge. +Fix bug caused by LEAF-DELETE-KEY. Reported and fixed by Brad +Beveridge. -- Fix some typos (:VALUE should be :VALUE=) in index.lisp. +Fix some typos (:VALUE should be :VALUE=) in index.lisp. * 2008-01-16 - version 0.1.10 -- When deleting a key from a btree, use the BTREE-KEY= function (not - P-EQL) to determine the position of the key. Reported and fixed - by Leonid Novikov. +When deleting a key from a btree, use the BTREE-KEY= function (not +P-EQL) to determine the position of the key. Reported and fixed +by Leonid Novikov. * 2007-08-12 - version 0.1.9 -- Fix btree bug during btree-delete: if we're deleting the biggest key - from a leaf, we should update the parents so they'll use the key that - has now become the biggest. (Henrik Hjelte.) - -- Try to signal an error when an incompatible value is given to - indexed slots, e.g. trying to put a string into a slot with a - :symbol-index. (Takehiko Abe) +Fix btree bug during btree-delete: if we're deleting the biggest key +from a leaf, we should update the parents so they'll use the key that +has now become the biggest. (Henrik Hjelte.) + +Try to signal an error when an incompatible value is given to indexed +slots, e.g. trying to put a string into a slot with a :symbol-index. +(Takehiko Abe) -- Signal an error during when putting duplicate values into a slot for - which duplicate values are not allowed. (Takehiko Abe) +Signal an error during when putting duplicate values into a slot for +which duplicate values are not allowed. (Takehiko Abe) -- Use BTREE-VALUE-TYPE, not BTREE-KEY-TYPE, when type checking a value - during BTREE-INSERT. (Takehiko Abe) +Use BTREE-VALUE-TYPE, not BTREE-KEY-TYPE, when type checking a value +during BTREE-INSERT. (Takehiko Abe) -- Wrap COMPILE-FILE calls in a WITH-COMPILATION-UNIT to prevent - superfluous warnings about undefined functions. +Wrap COMPILE-FILE calls in a WITH-COMPILATION-UNIT to prevent +superfluous warnings about undefined functions. * 2007-03-13 - version 0.1.8 -- Fix a bug in LEAF-DELETE-KEY (thanks to Henrik Hjelte). +Fix a bug in LEAF-DELETE-KEY (thanks to Henrik Hjelte). -- Add RUCKSACK-DELETE-OBJECT, RUCKSACK-DELETE-OBJECTS and - RUCKSACK-ROOT-P (suggested by Henrik Hjelte). I haven't - tested these functions yet. +Add RUCKSACK-DELETE-OBJECT, RUCKSACK-DELETE-OBJECTS and +RUCKSACK-ROOT-P (suggested by Henrik Hjelte). I haven't tested these +functions yet. * 2007-01-22 - version 0.1.7 -- Get rid of two SBCL compiler warnings. (Reported by Cyrus Harmon.) +Get rid of two SBCL compiler warnings. (Reported by Cyrus Harmon.) * 2007-01-21 - version 0.1.6 -- Added serializing/deserializing of structures. Only works on SBCL. - (Thanks to Levente M?sz?ros.) +Added serializing/deserializing of structures. Only works on SBCL. +(Thanks to Levente M?sz?ros.) * 2006-11-30 @@ -128,9 +137,6 @@ * 2006-08-31 -- Get rid of the Lispworks-specific PROCESS-A-CLASS-OPTION stuff and handle - the :INDEX class option in a way that's compatible with AMOP. - - Write test cases for schema updates and user defined methods for UPDATE-PERSISTENT-INSTANCE-FOR-REDEFINED-CLASS. --- /project/rucksack/cvsroot/rucksack/garbage-collector.lisp 2007/01/20 18:17:55 1.21 +++ /project/rucksack/cvsroot/rucksack/garbage-collector.lisp 2008/02/03 12:32:16 1.22 @@ -1,4 +1,4 @@ -;; $Id: garbage-collector.lisp,v 1.21 2007/01/20 18:17:55 alemmens Exp $ +;; $Id: garbage-collector.lisp,v 1.22 2008/02/03 12:32:16 alemmens Exp $ (in-package :rucksack) @@ -104,8 +104,8 @@ rounded up.)"))) -(defparameter *initial-heap-size* (* 1024 1024) - "The default initial heap size is 1 MB. ") +(defparameter *initial-heap-size* (* 10 1024 1024) + "The default initial heap size is 10 MB. ") (defmethod initialize-instance :after ((heap mark-and-sweep-heap) &key size &allow-other-keys) @@ -367,17 +367,16 @@ (let* ((free-p (and (integerp block-start) (minusp block-start))) (block-size (if free-p (- block-start) block-header))) ;; Reclaim dead blocks. - (when (and (not free-p) ; only non-free blocks - (not (block-alive-p object-table - ;; read object ID - (let ((heap-stream (heap-stream heap))) - (deserialize heap-stream) - (deserialize heap-stream)) - block))) - ;; The block is dead (either because the object is dead - ;; or because the block contains an old version): return - ;; the block to its free list. - (deallocate-block block heap)) + (when (not free-p) ; only non-free blocks + (let* ((heap-stream (heap-stream heap)) + (object-id (progn + (deserialize heap-stream) + (deserialize heap-stream)))) + (when (not (block-alive-p object-table object-id block)) + ;; The block is dead (either because the object is dead + ;; or because the block contains an old version): return + ;; the block to its free list. + (deallocate-block block heap)))) ;; (incf work-done block-size) ;; Move to next block (if there is one). @@ -435,8 +434,10 @@ do (progn ;; Hook dead object blocks back into the free list. (when (eql (object-info object-table object-id) :dead-object) - (let ((block (object-id-to-block object-id object-table))) - (deallocate-block block object-table))) + (delete-object-id object-table object-id) + ;; Don't forget to remove the id->object mapping from + ;; the cache! (This was a difficult bug to find.) + (cache-delete-object object-id (rucksack-cache (rucksack heap)))) (incf (nr-object-bytes-sweeped heap) object-block-size))) ;; (when (>= (nr-object-bytes-sweeped heap) (nr-object-bytes heap)) --- /project/rucksack/cvsroot/rucksack/object-table.lisp 2007/01/20 18:17:55 1.4 +++ /project/rucksack/cvsroot/rucksack/object-table.lisp 2008/02/03 12:32:16 1.5 @@ -1,4 +1,4 @@ -;; $Id: object-table.lisp,v 1.4 2007/01/20 18:17:55 alemmens Exp $ +;; $Id: object-table.lisp,v 1.5 2008/02/03 12:32:16 alemmens Exp $ (in-package :rucksack) @@ -58,7 +58,7 @@ (let* ((block (allocate-block object-table :expand t)) (id (block-to-object-id block object-table))) (setf (object-info object-table id) :reserved) - (block-to-object-id block object-table))) + id)) (defun delete-object-id (object-table object-id) "Returns object-id's cell to the free-list." --- /project/rucksack/cvsroot/rucksack/objects.lisp 2008/01/23 15:43:42 1.19 +++ /project/rucksack/cvsroot/rucksack/objects.lisp 2008/02/03 12:32:16 1.20 @@ -1,4 +1,4 @@ -;; $Id: objects.lisp,v 1.19 2008/01/23 15:43:42 alemmens Exp $ +;; $Id: objects.lisp,v 1.20 2008/02/03 12:32:16 alemmens Exp $ (in-package :rucksack) @@ -96,9 +96,10 @@ (defmethod print-object ((object persistent-data) stream) (print-unreadable-object (object stream :type t :identity nil) - (format stream "#~D~@[ in ~A~]" + (format stream "#~D~@[ with transaction id ~D~]" (slot-value object 'object-id) - (cache object)))) + (and (slot-boundp object 'transaction-id) + (slot-value object 'transaction-id))))) (defmethod compute-persistent-slot-names ((class standard-class) (object persistent-data)) @@ -490,9 +491,10 @@ (defmethod print-object ((object persistent-object) stream) (print-unreadable-object (object stream :type t :identity nil) - (format stream "#~D~@[ in ~A~]" + (format stream "#~D~@[ with transaction id ~D~]" (slot-value object 'object-id) - (cache object)))) + (transaction-id object)))) + ;; It's a bit stupid that we have to write the same code for three ;; P-EQL methods, but we don't seem to have much choice. --- /project/rucksack/cvsroot/rucksack/rucksack.asd 2008/01/31 20:26:08 1.16 +++ /project/rucksack/cvsroot/rucksack/rucksack.asd 2008/02/03 12:32:16 1.17 @@ -1,9 +1,9 @@ -;;; $Id: rucksack.asd,v 1.16 2008/01/31 20:26:08 alemmens Exp $ +;;; $Id: rucksack.asd,v 1.17 2008/02/03 12:32:16 alemmens Exp $ (in-package :cl-user) (asdf:defsystem :rucksack - :version "0.1.14" + :version "0.1.15" :serial t :components ((:file "queue") (:file "package") From alemmens at common-lisp.net Mon Feb 11 11:44:53 2008 From: alemmens at common-lisp.net (alemmens) Date: Mon, 11 Feb 2008 06:44:53 -0500 (EST) Subject: [rucksack-cvs] CVS rucksack/doc Message-ID: <20080211114453.DC6C55F042@common-lisp.net> Update of /project/rucksack/cvsroot/rucksack/doc In directory clnet:/tmp/cvs-serv9073/doc Log Message: Directory /project/rucksack/cvsroot/rucksack/doc added to the repository From alemmens at common-lisp.net Mon Feb 11 11:45:57 2008 From: alemmens at common-lisp.net (alemmens) Date: Mon, 11 Feb 2008 06:45:57 -0500 (EST) Subject: [rucksack-cvs] CVS rucksack/doc Message-ID: <20080211114557.49D5F63036@common-lisp.net> Update of /project/rucksack/cvsroot/rucksack/doc In directory clnet:/tmp/cvs-serv9277 Added Files: rucksack-tutorial.lisp Log Message: Create new doc directory and add tutorial by Brad Beveridge. --- /project/rucksack/cvsroot/rucksack/doc/rucksack-tutorial.lisp 2008/02/11 11:45:57 NONE +++ /project/rucksack/cvsroot/rucksack/doc/rucksack-tutorial.lisp 2008/02/11 11:45:57 1.1 #| Rucksack Tutorial by Brad Beveridge (brad.beveridge at gmail.com) What is Rucksack? Hopefully you already know this, but the quick description is that Rucksack is a persistence library for Common Lisp. Its project page is at http://common-lisp.net/project/rucksack/, and the author of the library is Arthur Lemmens. Rucksack provides a fairly transparent persistence mechanism for conses, vectors and CLOS objects. RS also provides a btree based indexing mechanism that lets you effciently look up CLOS objects and other persistent data. This tutorial will walk you through creating a simple address book database that uses Rucksack as its backing store. The tutorial is entirely in a single file to make it easier to just load and run, I assume that you have managed to install Rucksack and load it into your Lisp image (probably using ASDF) The tutorial is designed to be read from start to end, with the reader evaluating the live code as we go (C-cC-e in Slime). Alternately you may compile and load this file (C-cC-k) , and only evaluate the forms that you find interesting. If you do go with compiling and loading the whole file, you may get some warnings, none are serious, so just push on through :) Let's get started by defining a package that uses Rucksack, and declaring that we are defining our code within it. |# (defpackage :rucksack-tutorial (:nicknames :rs-tute) (:use :cl :rucksack)) (in-package :rucksack-tutorial) #| RS will need to be given a path where it can create its files. The path below should work for most unix like systems. If you are running other operating systems, you should create a temporary directory and set the value below. |# (defvar *rs-tute-directory* #p"/tmp/rs-tute/") #| Now we define our class. It should look familiar to you because it's just CLOS with some extra keywords. The information that each contact in our address book should have is: - a unique number for internal use - the name of the contact - a phone number - email address - street address - a notes area for the contact We will want to search and display the data in the address book in many different ways, so for each slot that we may want to sort by or look up we will use an appropriate index. Rucksack pre-defines some index specifications which you can find in index.lisp. By specifying that we want to index a particular slot in a class, RS will construct a persistent BTree that maps the value of that slot to the object. In our example we specify five slots to be indexed, which means RS will manage five indexes just for this class. Having a slot indexed means that we can very quickly search for a specific slot value, or return a range of slot values. Since we also want to index all instances of the class as a whole, we specify the (:index t) class property and RS creates a sixth index that tracks every instance of CONTACT-DETAILS. Since we are indexing by other slots we don't really need this index, but it is fine for this example. We're going to open the rucksack storage in :SUPERSEDE mode for this first evaluation so that we always start with a fresh database. * NOTE * As of 30/1/08 Rucksack requires that your DEFCLASS form is evaluated inside an open RS and transaction. It is during class definition that the initial indexes are created. |# (with-rucksack (rs *rs-tute-directory* :if-exists :supersede) (with-transaction () (defclass contact-details () ((unique-id :initarg :unique-id :accessor unique-id-of :index :number-index :unique t :documentation "A unique number for each contact in our DB") (name :initarg :name :accessor name-of :index :case-insensitive-string-index :documentation "The full name of the contact") (phone-number :initarg :phone-number :accessor phone-number-of :index :number-index :documentation "The phone number of the contact") (email :initarg :email :accessor email-of :index :case-insensitive-string-index :documentation "Email address") (address :initarg :address :accessor address-of :index :case-insensitive-string-index :documentation "Postal address") (notes :initarg :notes :accessor notes-of :documentation "Free form notes about this contact")) (:documentation "The CONTACT-DETAILS class is the backbone of our address book. It provides details about each contact in our address book.") (:index t) (:metaclass persistent-class)) )) #| And let's specialize INITIALIZE-INSTANCE to automatically give a unique ID. |# (defvar *unique-id* 0) (defmethod initialize-instance :after ((obj contact-details) &key) (setf (unique-id-of obj) (incf *unique-id*))) #| Lets make it so we can print instances of CONTACT-DETAILS |# (defmethod print-object ((obj contact-details) stream) (print-unreadable-object (obj stream :type t) (with-slots (unique-id name phone-number email address notes) obj (format stream "~A: '~A' ~A '~A' '~A' '~A'" unique-id name phone-number email address notes)))) #| Now our initial database is setup, lets write a simple function that creates new instances of CONTACT-DETAILS. I've chosen a very simple approach here, every time you call MAKE-CONTACT the function will open the store and create a new transaction. A longer running application would probably have a WITH-RUCKSACK form near its main function, or perhaps manually open and close the store. |# (defun make-contact (name &optional phone-number email address notes) (with-rucksack (rs *rs-tute-directory*) (with-transaction () (make-instance 'contact-details :name (or name "") :phone-number (or phone-number 0) :email (or email "") :address (or address "") :notes notes)))) #| Now is probably a good time to talk about Rucksack's transactions. Basically any time you perform a Rucksack operation it must be inside a transaction. Rucksack transactions are just what you would expect. Within a transaction you are guaranteed one of two outcomes, either: 1. The transaction will complete and the modified state will be written to the RS store. - OR 2. The transaction is aborted and no changed state is written to the RS store. In the case of #2 it is up to the application layer to decide what to do if a transaction fails. Inside a WITH-TRANSACTION form, a transaction can be aborted by aborting the form's body (e.g. by calling ABORT or by signaling an error). ---------------- Now, lets create some contacts. We don't need to wrap these in a transaction because MAKE-CONTACT does it for us already. |# (make-contact "Brad Beveridge" 0 "brad.beveridge at gmail.com" "" "Guy who wrote this.") (make-contact "Arthur Lemmens" 555 "alemmens at xs4all.nl" "" "The author of Rucksack.") (make-contact "Noddy Noname" 1234 "noddy at nowhere.com") (make-contact "Jane" 2345 "jane at hotmail.com" "Jaynes Town" "Standard female name") (make-contact "Zane" 9345 "zane at hotmail.com" "Zaynes Town" "I needed a Z name") #| Feel free to be a bit more creative than me & put some more contacts in... We are now at the stage where we have some CLOS objects that happen to be persistent. Since our transactions completed (hopefully!), those objects are also safely stored on our hard drive. Our first query function simply prints out all of the CONTACT-DETAILS objects that Rucksack is aware of. |# (defun print-all-contacts () (with-rucksack (rs *rs-tute-directory*) (with-transaction () (rucksack-map-class rs 'contact-details (lambda (object) (format t "~A~%" object)))))) #| (print-all-contacts) The function RUCKSACK-MAP-CLASS is fairly straight forward, it takes a Rucksack, a class name and a function that must accept one argument. The function will be called for each object in the store that is of the given class type. The class must have the (:index t) property. Let's write a function that finds a contact by matching their name. |# (defun find-contact-by-name (name) (with-rucksack (rs *rs-tute-directory*) (with-transaction () (rucksack-map-slot rs 'contact-details 'name (lambda (contact) (return-from find-contact-by-name contact)) :equal name))) nil) #| (format t "~A~%" (find-contact-by-name "jane")) Notice how similar this is to how we printed all of the object instances? Rucksack has lots of RUCKSACK-MAP-* functions that all follow the same basic form, you pass in the data you want to search for and Rucksack will call the function that you supply for each object that matches. Let's try returning a range of name matches. If we don't supply an end match, then it means we want all matches after start. If we don't supply start or end strings then the function will return all instances in their sorted order. |# (defun find-contacts-by-name-range (&optional start end) (let (ret) (with-rucksack (rs *rs-tute-directory*) (with-transaction () (rucksack-map-slot rs 'contact-details 'name (lambda (contact) (push contact ret)) :min start :max end :include-min t :include-max t))) ; reverse the list so it's in the expected order (nreverse ret))) #| (dolist (contact (find-contacts-by-name-range "a" "c")) (format t "~A~%" contact)) (dolist (contact (find-contacts-by-name-range "c")) (format t "~A~%" contact)) Let's write a little function to delete an object if we have its name. |# (defun delete-object-by-name (name) (with-rucksack (rs *rs-tute-directory*) (with-transaction () (let ((contact (find-contact-by-name name))) (when contact (rucksack::rucksack-delete-object rs contact)))))) #| (delete-object-by-name "Zane") (print-all-contacts) We've now covered enough of Rucksack to actually do some useful things. We can create persistent objects, search through them and also delete them. Rucksack has plenty of other goodies, such as persistent storage of non CLOS data - vectors and conses. Rucksack also supports changing the definition of objects in a similar manner to the way you would redefine CLOS classes. For a more indepth explanation of how Rucksack works, please read the talk-eclm2006.txt file that comes with the source code. Happy Rucksacking! Brad |# From alemmens at common-lisp.net Mon Feb 11 12:47:53 2008 From: alemmens at common-lisp.net (alemmens) Date: Mon, 11 Feb 2008 07:47:53 -0500 (EST) Subject: [rucksack-cvs] CVS rucksack Message-ID: <20080211124753.64C715D166@common-lisp.net> Update of /project/rucksack/cvsroot/rucksack In directory clnet:/tmp/cvs-serv22665 Modified Files: cache.lisp done.txt objects.lisp p-btrees.lisp package.lisp rucksack.asd rucksack.lisp Log Message: Version 0.1.16: improved performance by decreasing persistent consing for btrees and using a lazy-cache. Fixed some small bugs. Added a few handy functions and macros. In detail: Added P-PUSH and P-POP. Improved btree efficiency by switching to a different data structure for the bindings. Instead of using a persistent cons for each key/ value pair, we now put the keys and values directly into the bnode vector. This speeds up most btree operations because it reduces persistent consing when adding new values and it reduces indirections when searching for keys. Renamed BTREE-NODE to BNODE, BTREE-NODE-INDEX to BNODE-BINDINGS, BTREE-NODE-INDEX-COUNT to BNODE-NR-BINDINGS, FIND-BINDING-IN-NODE to FIND-KEY-IN-NODE. Fix a missing argument bug in REMOVE-CLASS-INDEX. Added a LAZY-CACHE which just clears the entire hash table whenever the cache gets full. This improves memory usage, because the normal cache queue kept track of a lot of objects that for some reason couldn't be cleaned up by the implementation's garbage collector. Added the convenience macros RUCKSACK-DO-CLASS and RUCKSACK-DO-SLOT. Made RUCKSACK-DELETE-OBJECT an exported symbol of the RUCKSACK package. Fix a bug in TEST-NON-UNIQUE-BTREE: it should call CHECK-NON-UNIQUE-CONTENTS instead of CHECK-CONTENTS. --- /project/rucksack/cvsroot/rucksack/cache.lisp 2008/02/03 12:32:15 1.14 +++ /project/rucksack/cvsroot/rucksack/cache.lisp 2008/02/11 12:47:52 1.15 @@ -1,4 +1,4 @@ -;; $Id: cache.lisp,v 1.14 2008/02/03 12:32:15 alemmens Exp $ +;; $Id: cache.lisp,v 1.15 2008/02/11 12:47:52 alemmens Exp $ (in-package :rucksack) @@ -120,6 +120,16 @@ objects."))) +(defclass lazy-cache (standard-cache) + () + (:documentation "A lazy cache doesn't bother with fancy mechanisms +for deciding which objects to remove from the cache. It just fills +the cache until maximum capacity (i.e. CACHE-SIZE) and then clears +the entire cache at once. Advantages of this could be that it uses +less time and less memory to do its work. Disadvantage is that it's +very stupid about the objects it should try to keep in memory.")) + + (defmethod print-object ((cache standard-cache) stream) (print-unreadable-object (cache stream :type t :identity nil) (format stream "of size ~D, heap ~S and ~D objects in memory." @@ -147,8 +157,8 @@ (defun sans (plist &rest keys) "Returns PLIST with keyword arguments from KEYS removed." - ;; stolen from Usenet posting <3247672165664225 at naggum.no> by Erik - ;; Naggum + ;; From Usenet posting <3247672165664225 at naggum.no> by Erik + ;; Naggum. (let ((sans ())) (loop (let ((tail (nth-value 2 (get-properties plist keys)))) @@ -369,7 +379,7 @@ (incf nr-objects-removed)))))) -(defun add-to-queue (object-id cache) +(defmethod add-to-queue (object-id (cache standard-cache)) ;; Add an object to the end of the queue. (let ((queue (queue cache))) (when (cache-full-p cache) @@ -377,6 +387,18 @@ (queue-add queue object-id))) ;; +;; Queue operations for lazy caches +;; + +(defmethod make-room-in-cache ((cache lazy-cache)) + (clrhash (objects cache))) + +(defmethod add-to-queue (object-id (cache lazy-cache)) + ;; We're not adding anything to the queue, because we're too lazy. + object-id) + + +;; ;; Open/close/map transactions ;; --- /project/rucksack/cvsroot/rucksack/done.txt 2008/02/03 12:32:16 1.16 +++ /project/rucksack/cvsroot/rucksack/done.txt 2008/02/11 12:47:52 1.17 @@ -1,3 +1,34 @@ +* 2008-02-11 - version 0.1.16 + +Added P-PUSH and P-POP. + +Improved btree efficiency by switching to a different data structure +for the bindings. Instead of using a persistent cons for each key/ +value pair, we now put the keys and values directly into the bnode +vector. This speeds up most btree operations because it reduces +persistent consing when adding new values and it reduces indirections +when searching for keys. + +Renamed BTREE-NODE to BNODE, BTREE-NODE-INDEX to BNODE-BINDINGS, +BTREE-NODE-INDEX-COUNT to BNODE-NR-BINDINGS, FIND-BINDING-IN-NODE to +FIND-KEY-IN-NODE. + +Fix a missing argument bug in REMOVE-CLASS-INDEX. + +Added a LAZY-CACHE which just clears the entire hash table whenever +the cache gets full. This improves memory usage, because the normal +cache queue kept track of a lot of objects that for some reason +couldn't be cleaned up by the implementation's garbage collector. + +Added the convenience macros RUCKSACK-DO-CLASS and RUCKSACK-DO-SLOT. + +Made RUCKSACK-DELETE-OBJECT an exported symbol of the RUCKSACK +package. + +Fix a bug in TEST-NON-UNIQUE-BTREE: it should call +CHECK-NON-UNIQUE-CONTENTS instead of CHECK-CONTENTS. + + * 2008-02-02 - version 0.1.15 Fixed a garbage collector bug reported by Sean Ross. When the garbage --- /project/rucksack/cvsroot/rucksack/objects.lisp 2008/02/03 12:32:16 1.20 +++ /project/rucksack/cvsroot/rucksack/objects.lisp 2008/02/11 12:47:52 1.21 @@ -1,4 +1,4 @@ -;; $Id: objects.lisp,v 1.20 2008/02/03 12:32:16 alemmens Exp $ +;; $Id: objects.lisp,v 1.21 2008/02/11 12:47:52 alemmens Exp $ (in-package :rucksack) @@ -269,6 +269,29 @@ nil) +(defmacro p-pop (place &environment env) + "Pop an item from the persistent list specified by PLACE." + (multiple-value-bind (dummies vals new setter getter) + (get-setf-expansion place env) + `(let* (,@(mapcar #'list dummies vals) (,(car new) ,getter)) + (prog1 (p-car ,(car new)) + (setq ,(car new) (p-cdr ,(car new))) + ,setter)))) + + +(defmacro p-push (item place &environment env) + "Push ITEM onto the persistent list specified by PLACE. Return the +modified persistent list. ITEM is evaluated before place." + (multiple-value-bind (dummies vals newval setter getter) + (get-setf-expansion place env) + (let ((item-var (gensym "ITEM"))) + `(let* ((,item-var ,item) + ,@(mapcar #'list dummies vals) + (,(car newval) (p-cons ,item-var ,getter))) + ,setter)))) + + + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Persistent sequence functions ;; (Just a start...) --- /project/rucksack/cvsroot/rucksack/p-btrees.lisp 2008/01/22 15:59:24 1.17 +++ /project/rucksack/cvsroot/rucksack/p-btrees.lisp 2008/02/11 12:47:52 1.18 @@ -1,4 +1,4 @@ -;; $Id: p-btrees.lisp,v 1.17 2008/01/22 15:59:24 alemmens Exp $ +;; $Id: p-btrees.lisp,v 1.18 2008/02/11 12:47:52 alemmens Exp $ (in-package :rucksack) @@ -21,7 +21,7 @@ #:btree-nr-keys #:btree-nr-values ;; Nodes - #:btree-node + #:bnode ;; Functions #:btree-search #:btree-insert #:btree-delete #:btree-delete-key @@ -138,9 +138,10 @@ Basically, a B-tree is a balanced multi-way tree. -The reason for using multi-way trees instead of binary trees is that the nodes -are expected to be on disk; it would be inefficient to have to execute -a disk operation for each tree node if it contains only 2 keys. +The reason for using multi-way trees instead of binary trees is that +the nodes are expected to be on disk; it would be inefficient to have +to execute a disk operation for each tree node if it contains only 2 +keys. The key property of B-trees is that each possible search path has the same length, measured in terms of nodes. @@ -202,10 +203,10 @@ ;; (node-class :initarg :node-class :reader btree-node-class - :initform 'btree-node) + :initform 'bnode) (max-node-size :initarg :max-node-size :reader btree-max-node-size - :initform 32 + :initform 64 :documentation "An integer specifying the preferred maximum number of keys per btree node.") (unique-keys-p :initarg :unique-keys-p @@ -251,20 +252,22 @@ (let ((key< (slot-value btree 'key<)) (key-key (btree-key-key btree))) (lambda (key1 key2) - (funcall key< - (funcall key-key key1) - (funcall key-key key2))))) + (and (not (eql key1 'key-irrelevant)) + (not (eql key2 'key-irrelevant)) + (funcall key< + (funcall key-key key1) + (funcall key-key key2)))))) (defmethod btree-key= ((btree btree)) (let ((key< (slot-value btree 'key<)) (key-key (btree-key-key btree))) (lambda (key1 key2) - (let ((key1 (funcall key-key key1)) - (key2 (funcall key-key key2))) - (and (not (eql key1 'key-irrelevant)) - (not (eql key2 'key-irrelevant)) - (not (funcall key< key1 key2)) - (not (funcall key< key2 key1))))))) + (and (not (eql key1 'key-irrelevant)) + (not (eql key2 'key-irrelevant)) + (let ((key1 (funcall key-key key1)) + (key2 (funcall key-key key2))) + (and (not (funcall key< key1 key2)) + (not (funcall key< key2 key1)))))))) (defmethod btree-key>= ((btree btree)) (lambda (key1 key2) @@ -299,39 +302,41 @@ ;; -;; The next two classes are for internal use only, so we don't bother -;; with fancy long names. +;; Btree nodes (= 'bnodes'). ;; -(defclass btree-node () - ((index :initarg :index - :initform '() - :accessor btree-node-index - :documentation "A vector of key/value pairs. The keys are -sorted by KEY<. No two keys can be the same. For leaf nodes of btrees -with non-unique-keys, the value part is actually a list of values. -For intermediate nodes, the value is a child node. All keys in the -child node will be KEY<= the child node's key in the parent node.") - (index-count :initform 0 - :accessor btree-node-index-count - :documentation "The number of key/value pairs in the index vector.") - (leaf-p :initarg :leaf-p :initform nil :reader btree-node-leaf-p)) +(defclass bnode () + ((bindings :initarg :bindings + :initform '() + :accessor bnode-bindings + :documentation "A vector of with alternating keys and +values. The keys are sorted by KEY<. No two keys can be the same. +For leaf nodes of btrees with non-unique-keys, the value part is +actually a list of values. For intermediate nodes, the value is a +child node. All keys in the child node will be KEY<= the child node's +key in the parent node.") + (nr-bindings :initform 0 + :accessor bnode-nr-bindings + :documentation "The number of key/value bindings in +the index vector.") + (leaf-p :initarg :leaf-p :initform nil :reader bnode-leaf-p)) (:metaclass persistent-class)) + ;; ;; Info functions ;; (defmethod btree-nr-keys ((btree btree)) (if (slot-boundp btree 'root) - (btree-node-nr-keys (btree-root btree)) + (bnode-nr-keys (btree-root btree)) 0)) -(defmethod btree-node-nr-keys ((node btree-node)) - (if (btree-node-leaf-p node) - (btree-node-index-count node) - (loop for i below (btree-node-index-count node) - sum (btree-node-nr-keys (binding-value (node-binding node i)))))) +(defmethod bnode-nr-keys ((node bnode)) + (if (bnode-leaf-p node) + (bnode-nr-bindings node) + (loop for i below (bnode-nr-bindings node) + sum (bnode-nr-keys (binding-value (node-binding node i)))))) (defmethod btree-nr-values ((btree btree)) @@ -348,30 +353,43 @@ ;; Bindings ;; -(defun node-binding (node i) - (let ((index (btree-node-index node))) - (p-aref index i))) - -(defun (setf node-binding) (binding node i) - (setf (p-aref (btree-node-index node) i) - binding)) - - -(defun make-binding (key value) - (p-cons key value)) - -(defun binding-key (binding) - (p-car binding)) +(defstruct binding + key + value) -(defun (setf binding-key) (key binding) - (setf (p-car binding) key)) +(defun node-binding (node i) + ;; A binding used to be a persistent cons, but we want to reduce + ;; persistent consing so now we use a small struct and try to + ;; make sure that we persist the relevant info when necessary. + (let ((vector (bnode-bindings node))) + (make-binding :key (p-aref vector (* 2 i)) + :value (p-aref vector (1+ (* 2 i)))))) + +(defun node-binding-key (node i) + (p-aref (bnode-bindings node) (* 2 i))) -(defun (setf binding-value) (value binding) - (setf (p-cdr binding) value)) +(defun node-binding-value (node i) + (p-aref (bnode-bindings node) (1+ (* 2 i)))) -(defun binding-value (binding) - (p-cdr binding)) +(defun (setf node-binding) (binding node i) + (update-node-binding node i + (binding-key binding) + (binding-value binding)) + binding) + +(defun update-node-binding (node i key value) + (setf (node-binding-key node i) key + (node-binding-value node i) value)) + +(defun (setf node-binding-key) (key node i) + (setf (p-aref (bnode-bindings node) (* 2 i)) + key)) + +(defun (setf node-binding-value) (value node i) + (setf (p-aref (bnode-bindings node) (1+ (* 2 i))) + value)) +;; (defun make-leaf-value (btree value) (if (btree-unique-keys-p btree) @@ -381,16 +399,16 @@ ;; ;; -(defmethod initialize-instance :after ((node btree-node) +(defmethod initialize-instance :after ((node bnode) &key btree &allow-other-keys) - (setf (btree-node-index node) (p-make-array (btree-max-node-size btree) - :initial-element nil) - (btree-node-index-count node) 0)) + (setf (bnode-bindings node) (p-make-array (* 2 (btree-max-node-size btree)) + :initial-element nil) + (bnode-nr-bindings node) 0)) -(defmethod print-object ((node btree-node) stream) +(defmethod print-object ((node bnode) stream) (print-unreadable-object (node stream :type t :identity t) - (format stream "with ~D bindings" (btree-node-index-count node)))) + (format stream "with ~D bindings" (bnode-nr-bindings node)))) ;; ;; Debugging @@ -400,16 +418,15 @@ (pprint (node-as-cons node))) (defun node-as-cons (node &optional (unique-keys t)) - (loop with index = (btree-node-index node) - with leaf-p = (btree-node-leaf-p node) - for i below (btree-node-index-count node) - for binding = (p-aref index i) - collect (list (binding-key binding) + (loop with leaf-p = (bnode-leaf-p node) + for i below (bnode-nr-bindings node) + for value = (node-binding-value node i) + collect (list (node-binding-key node i) (if leaf-p (if unique-keys - (binding-value binding) - (unwrap-persistent-list (binding-value binding))) - (node-as-cons (binding-value binding)))))) + value + (unwrap-persistent-list value)) + (node-as-cons value))))) (defun btree-as-cons (btree) (and (slot-value btree 'root) @@ -420,17 +437,17 @@ ;; Depth and balance ;; -(defmethod node-max-depth ((node btree-node)) - (if (btree-node-leaf-p node) +(defmethod node-max-depth ((node bnode)) + (if (bnode-leaf-p node) 0 - (loop for i below (btree-node-index-count node) + (loop for i below (bnode-nr-bindings node) for binding = (node-binding node i) maximize (1+ (node-max-depth (binding-value binding)))))) -(defmethod node-min-depth ((node btree-node)) - (if (btree-node-leaf-p node) +(defmethod node-min-depth ((node bnode)) + (if (bnode-leaf-p node) 0 - (loop for i below (btree-node-index-count node) + (loop for i below (bnode-nr-bindings node) for binding = (node-binding node i) minimize (1+ (node-min-depth (binding-value binding)))))) @@ -445,6 +462,7 @@ (btree-depths btree) (<= (- max min) 1))) + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Search ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -482,16 +500,23 @@ ;; (defgeneric node-search (btree node key errorp default-value) - (:method ((btree btree) (node btree-node) key errorp default-value) + (:method ((btree btree) (node bnode) key errorp default-value) (let ((binding (node-search-binding btree node key))) (if binding (binding-value binding) (not-found btree key errorp default-value))))) (defgeneric node-search-binding (btree node key) - (:method ((btree btree) (node btree-node) key) - (if (btree-node-leaf-p node) - (find-binding-in-node key node btree) + (:documentation "Tries to find KEY in NODE or one of its subnodes. +Returns three values if the key was found: the binding, the node +containing the binding and the position of the binding in that node. +Returns nil otherwise.") + (:method ((btree btree) (node bnode) key) + (if (bnode-leaf-p node) + (multiple-value-bind (binding position) + (find-key-in-node btree node key) + (and binding + (values binding node position))) (let ((subnode (find-subnode btree node key))) (node-search-binding btree subnode key))))) @@ -500,44 +525,64 @@ ;; Find the first binding with a key >= the given key and return ;; the corresponding subnode. (let ((btree-key< (btree-key< btree)) - (last (1- (btree-node-index-count node)))) + (last (1- (bnode-nr-bindings node)))) (labels ((binary-search (start end) - (let* ((mid (+ start (ash (- end start) -1)))) - (cond ((= start mid) - (let ((start-binding (node-binding node start))) - (if (funcall btree-key< (binding-key start-binding) key) - (binding-value (node-binding node end)) - (binding-value start-binding)))) - (t - (let ((mid-binding (node-binding node mid))) - (if (funcall btree-key< (binding-key mid-binding) key) - (binary-search mid end) - (binary-search start mid)))))))) - (if (funcall btree-key< (binding-key (node-binding node (1- last))) key) - (binding-value (node-binding node last)) + (let ((mid (+ start (ash (- end start) -1)))) + (if (= start mid) + (if (funcall btree-key< (node-binding-key node start) key) + (node-binding-value node end) + (node-binding-value node start)) + (if (funcall btree-key< (node-binding-key node mid) key) + (binary-search mid end) + (binary-search start mid)))))) + (if (funcall btree-key< (node-binding-key node (1- last)) key) + (node-binding-value node last) (binary-search 0 last))))) -(defun find-binding-in-node (key node btree) +(defun find-key-in-node (btree node key) + "Tries to find a binding with the given key in a bnode. If it +succeeds, it returns the binding (and, as a second value, the position +of that binding). Otherwise it returns NIL." (let ((btree-key< (btree-key< btree)) - (array (btree-node-index node)) - (index-count (btree-node-index-count node))) + (index-count (bnode-nr-bindings node))) (labels ((binary-search (start end) - (let* ((mid (+ start (ash (- end start) -1)))) - (cond ((= start mid) - (let ((start-binding (p-aref array start))) - (if (funcall btree-key< (binding-key start-binding) key) - (when (< end index-count) - (p-aref array end)) - start-binding))) - (t (let ((mid-binding (p-aref array mid))) - (if (funcall btree-key< (binding-key mid-binding) key) - (binary-search mid end) - (binary-search start mid)))))))) + (let ((mid (+ start (ash (- end start) -1)))) + (if (= start mid) + (let ((start-binding (node-binding node start))) + (if (funcall btree-key< (node-binding-key node start) key) + (when (< end index-count) + (values (node-binding node end) end)) + (values start-binding start))) + (if (funcall btree-key< (node-binding-key node mid) key) + (binary-search mid end) + (binary-search start mid)))))) (when (plusp index-count) - (let ((candidate (binary-search 0 index-count))) + (multiple-value-bind (candidate position) + (binary-search 0 index-count) (when (and candidate (funcall (btree-key= btree) (binding-key candidate) key)) - candidate)))))) + (values candidate position))))))) + +(defun key-position (btree node key) + "Tries to find a binding with the given key in a bnode. If it +succeeds, it returns the position of that binding. Otherwise, it +returns NIL." + (nth-value 1 (find-key-in-node btree node key))) + + +(defun find-value-in-node (btree node value &key (test (btree-value= btree))) + "Tries to find a binding with the given value in a bnode. If it +succeeds, it returns the binding (and, as a second value, the position +of that binding). Otherwise it returns NIL." + ;; The bindings aren't sorted by value, so we have to do + ;; a plain linear search. + (loop for i below (bnode-nr-bindings node) + when (funcall test (node-binding-value node i) value) + do (return-from find-value-in-node + (values (node-binding node i) i))) + ;; Not found: return nil. [635 lines skipped] --- /project/rucksack/cvsroot/rucksack/package.lisp 2008/01/23 15:43:42 1.12 +++ /project/rucksack/cvsroot/rucksack/package.lisp 2008/02/11 12:47:52 1.13 @@ -1,4 +1,4 @@ -;; $Id: package.lisp,v 1.12 2008/01/23 15:43:42 alemmens Exp $ +;; $Id: package.lisp,v 1.13 2008/02/11 12:47:52 alemmens Exp $ #-(or allegro lispworks sbcl openmcl) (error "Unsupported implementation: ~A" (lisp-implementation-type)) @@ -35,7 +35,8 @@ #:p-car #:p-cdr #:p-list #:unwrap-persistent-list #:p-mapcar #:p-mapc #:p-maplist #:p-mapl - #:p-member-if + #:p-member-if + #:p-pop #:p-push #:p-make-array #:p-aref #:p-array-dimensions #:p-length #:p-find #:p-replace #:p-delete-if #:p-position @@ -65,9 +66,12 @@ #:rucksack-map-class-indexes #:rucksack-map-slot-indexes #:rucksack-maybe-index-changed-slot #:rucksack-maybe-index-new-object #:rucksack-map-class #:rucksack-map-slot + #:rucksack-do-class #:rucksack-do-slot + #:rucksack-delete-object ;; Transactions #:current-transaction + #:transaction-start #:transaction-commit #:transaction-rollback #:with-transaction #:*transaction* #:transaction #:standard-transaction --- /project/rucksack/cvsroot/rucksack/rucksack.asd 2008/02/03 12:32:16 1.17 +++ /project/rucksack/cvsroot/rucksack/rucksack.asd 2008/02/11 12:47:52 1.18 @@ -1,9 +1,9 @@ -;;; $Id: rucksack.asd,v 1.17 2008/02/03 12:32:16 alemmens Exp $ +;;; $Id: rucksack.asd,v 1.18 2008/02/11 12:47:52 alemmens Exp $ (in-package :cl-user) (asdf:defsystem :rucksack - :version "0.1.15" + :version "0.1.16" :serial t :components ((:file "queue") (:file "package") --- /project/rucksack/cvsroot/rucksack/rucksack.lisp 2008/01/31 20:26:08 1.23 +++ /project/rucksack/cvsroot/rucksack/rucksack.lisp 2008/02/11 12:47:52 1.24 @@ -1,4 +1,4 @@ -;; $Id: rucksack.lisp,v 1.23 2008/01/31 20:26:08 alemmens Exp $ +;; $Id: rucksack.lisp,v 1.24 2008/02/11 12:47:52 alemmens Exp $ (in-package :rucksack) @@ -154,6 +154,22 @@ do more filtering before actually loading objects from disk. INCLUDE-SUBCLASSES defaults to T.")) +(defmacro rucksack-do-class ((instance-var class + &key + (rucksack '*rucksack*) + id-only + (include-subclasses t)) + &body body) + "Evaluate BODY for each instance of CLASS, with INSTANCE-VAR +successively bound to each instance. See the documentation of +RUCKSACK-MAP-CLASS for more details." + (check-type instance-var symbol) + `(rucksack-map-class ,rucksack ,class + (lambda (,instance-var) , at body) + :id-only ,id-only + :include-subclasses ,include-subclasses)) + + (defgeneric rucksack-map-slot (rucksack class slot function &key equal min max include-min include-max order include-subclasses) @@ -170,6 +186,27 @@ do more filtering before actually loading objects from disk. INCLUDE-SUBCLASSES defaults to T.")) +(defmacro rucksack-do-slot ((instance-var class slot + &key (rucksack '*rucksack*) + equal min max include-min include-max + order include-subclasses) + &body body) + "Evaluate BODY for each instance of CLASS where SLOT has the +specified value. INSTANCE-VAR will be bound successively to each +instance. See the documentation of RUCKSACK-MAP-SLOT for more +details." + (check-type instance-var symbol) + `(rucksack-map-slot ,rucksack ,class ,slot + (lambda (,instance-var) , at body) + :equal ,equal + :min ,min + :max ,max + :include-min ,include-min + :include-max ,include-max + :order ,order + :include-subclasses ,include-subclasses)) + + #+later (defgeneric rucksack-map-objects (rucksack class-designator function @@ -369,7 +406,7 @@ (defmethod initialize-instance :after ((rucksack standard-rucksack) &key - (cache-class 'standard-cache) + (cache-class 'lazy-cache) (cache-args '()) &allow-other-keys) ;; Open cache. @@ -455,7 +492,7 @@ (class 'serial-transaction-rucksack) (if-exists :overwrite) (if-does-not-exist :create) - (cache-class 'standard-cache) + (cache-class 'lazy-cache) (cache-args '()) &allow-other-keys) "Opens the rucksack in the directory designated by DIRECTORY-DESIGNATOR. @@ -729,7 +766,7 @@ (simple-rucksack-error "Class index for ~S doesn't exist in ~A." class rucksack)))) - (btree-delete-key class + (btree-delete-key (class-index-table rucksack) class :if-does-not-exist (if errorp :error :ignore)))) @@ -877,18 +914,20 @@ class object slot old-value new-value old-boundp new-boundp) - (let ((index (rucksack-slot-index rucksack class slot - :errorp nil - :include-superclasses t))) - (when index - (when old-boundp - (index-delete index old-value object - :if-does-not-exist :ignore)) - (when new-boundp - (index-insert index new-value object - :if-exists (if (slot-unique slot) - :error - :overwrite)))))) + ;; SLOT is a slot-definition, not a slot name. + (when (slot-index slot) + (let ((index (rucksack-slot-index rucksack class slot + :errorp nil + :include-superclasses t))) + (when index + (when old-boundp + (index-delete index old-value object + :if-does-not-exist :ignore)) + (when new-boundp + (index-insert index new-value object + :if-exists (if (slot-unique slot) + :error + :overwrite))))))) (defmethod rucksack-slot-index ((rucksack standard-rucksack) class slot From alemmens at common-lisp.net Mon Feb 11 12:47:53 2008 From: alemmens at common-lisp.net (alemmens) Date: Mon, 11 Feb 2008 07:47:53 -0500 (EST) Subject: [rucksack-cvs] CVS rucksack/tests Message-ID: <20080211124753.E8CE45F047@common-lisp.net> Update of /project/rucksack/cvsroot/rucksack/tests In directory clnet:/tmp/cvs-serv22665/tests Modified Files: test.lisp Log Message: Version 0.1.16: improved performance by decreasing persistent consing for btrees and using a lazy-cache. Fixed some small bugs. Added a few handy functions and macros. In detail: Added P-PUSH and P-POP. Improved btree efficiency by switching to a different data structure for the bindings. Instead of using a persistent cons for each key/ value pair, we now put the keys and values directly into the bnode vector. This speeds up most btree operations because it reduces persistent consing when adding new values and it reduces indirections when searching for keys. Renamed BTREE-NODE to BNODE, BTREE-NODE-INDEX to BNODE-BINDINGS, BTREE-NODE-INDEX-COUNT to BNODE-NR-BINDINGS, FIND-BINDING-IN-NODE to FIND-KEY-IN-NODE. Fix a missing argument bug in REMOVE-CLASS-INDEX. Added a LAZY-CACHE which just clears the entire hash table whenever the cache gets full. This improves memory usage, because the normal cache queue kept track of a lot of objects that for some reason couldn't be cleaned up by the implementation's garbage collector. Added the convenience macros RUCKSACK-DO-CLASS and RUCKSACK-DO-SLOT. Made RUCKSACK-DELETE-OBJECT an exported symbol of the RUCKSACK package. Fix a bug in TEST-NON-UNIQUE-BTREE: it should call CHECK-NON-UNIQUE-CONTENTS instead of CHECK-CONTENTS. --- /project/rucksack/cvsroot/rucksack/tests/test.lisp 2008/01/23 15:49:07 1.1 +++ /project/rucksack/cvsroot/rucksack/tests/test.lisp 2008/02/11 12:47:53 1.2 @@ -1,4 +1,4 @@ -;; $Id: test.lisp,v 1.1 2008/01/23 15:49:07 alemmens Exp $ +;; $Id: test.lisp,v 1.2 2008/02/11 12:47:53 alemmens Exp $ (in-package :rucksack-test) @@ -363,7 +363,7 @@ (check-order btree) (check-size btree (- n delete)) (when check-contents - (check-contents btree)) + (check-non-unique-contents btree)) (format t "~&Reinserting~%") (shuffle array) (dotimes (i (floor delete nr-formats)) @@ -377,7 +377,7 @@ (check-order btree) (check-size btree n) (when check-contents - (check-contents btree))))))) + (check-non-unique-contents btree))))))) :ok) (defun btree-stress-test (&key (n 1000)) From alemmens at common-lisp.net Mon Feb 11 12:55:32 2008 From: alemmens at common-lisp.net (alemmens) Date: Mon, 11 Feb 2008 07:55:32 -0500 (EST) Subject: [rucksack-cvs] CVS rucksack Message-ID: <20080211125532.B83774F038@common-lisp.net> Update of /project/rucksack/cvsroot/rucksack In directory clnet:/tmp/cvs-serv23702 Modified Files: done.txt Log Message: Don't forget to mention Brad's tutorial. --- /project/rucksack/cvsroot/rucksack/done.txt 2008/02/11 12:47:52 1.17 +++ /project/rucksack/cvsroot/rucksack/done.txt 2008/02/11 12:55:32 1.18 @@ -1,5 +1,7 @@ * 2008-02-11 - version 0.1.16 +Created new doc directory and added tutorial by Brad Beveridge. + Added P-PUSH and P-POP. Improved btree efficiency by switching to a different data structure From alemmens at common-lisp.net Mon Feb 11 13:00:12 2008 From: alemmens at common-lisp.net (alemmens) Date: Mon, 11 Feb 2008 08:00:12 -0500 (EST) Subject: [rucksack-cvs] CVS rucksack/doc Message-ID: <20080211130012.3B8B463036@common-lisp.net> Update of /project/rucksack/cvsroot/rucksack/doc In directory clnet:/tmp/cvs-serv24432/doc Added Files: do.txt done.txt glossary.txt internals.txt notes.txt talk-eclm2006.txt Log Message: Moved documentation files to doc directory. --- /project/rucksack/cvsroot/rucksack/doc/do.txt 2008/02/11 13:00:12 NONE +++ /project/rucksack/cvsroot/rucksack/doc/do.txt 2008/02/11 13:00:12 1.1 DO: - Make Rucksack crash proof. (Use a copying GC?) - Make sure that the GC gets rid of all obsolete object versions. - Add export/import to s-expression format. This is necessary for migrating existing rucksacks to a new version of Rucksack. - Give each transaction its own commit file (the name can be generated from the transaction id). That's one step towards avoiding locks on transaction commit. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; * MAYBE LATER ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - Maybe signal a continuable error when the in-memory class definition does not correspond to the most recent schema. If the user decides to continue, UPDATE-PERSISTENT-INSTANCE-... will be called when necessary. - Think about non-persistent slots. Should we initialize them during LOAD-OBJECT? Do we need them at all? - I'm not sure that :INCLUDE-SUBCLASSES NIL makes sense for RUCKSACK-MAP-SLOT. Think about this. - Deal with CHANGE-CLASS: call UPDATE-PERSISTENT-INSTANCE-FOR-DIFFERENT-CLASS when necessary. (Maybe it's never necessary and we can just use the existing UPDATE-INSTANCE-FOR-DIFFERENT-CLASS mechanism?) --- /project/rucksack/cvsroot/rucksack/doc/done.txt 2008/02/11 13:00:12 NONE +++ /project/rucksack/cvsroot/rucksack/doc/done.txt 2008/02/11 13:00:12 1.1 * 2008-02-11 - version 0.1.16 Created new doc directory and added tutorial by Brad Beveridge. Added P-PUSH and P-POP. Improved btree efficiency by switching to a different data structure for the bindings. Instead of using a persistent cons for each key/ value pair, we now put the keys and values directly into the bnode vector. This speeds up most btree operations because it reduces persistent consing when adding new values and it reduces indirections when searching for keys. Renamed BTREE-NODE to BNODE, BTREE-NODE-INDEX to BNODE-BINDINGS, BTREE-NODE-INDEX-COUNT to BNODE-NR-BINDINGS, FIND-BINDING-IN-NODE to FIND-KEY-IN-NODE. Fix a missing argument bug in REMOVE-CLASS-INDEX. Added a LAZY-CACHE which just clears the entire hash table whenever the cache gets full. This improves memory usage, because the normal cache queue kept track of a lot of objects that for some reason couldn't be cleaned up by the implementation's garbage collector. Added the convenience macros RUCKSACK-DO-CLASS and RUCKSACK-DO-SLOT. Made RUCKSACK-DELETE-OBJECT an exported symbol of the RUCKSACK package. Fix a bug in TEST-NON-UNIQUE-BTREE: it should call CHECK-NON-UNIQUE-CONTENTS instead of CHECK-CONTENTS. * 2008-02-02 - version 0.1.15 Fixed a garbage collector bug reported by Sean Ross. When the garbage collector deletes object ids from the object table (because the objects are dead and we may want to reuse their ids later for other objects), it should also remove that object from the cache. If it doesn't, there's a possibility that the object id will be reused later for a new object and the cache wil still refer to the old in-memory object. * 2008-01-31 - version 0.1.14 Class and slot indexes now map directly to objects instead of object-ids. This fixes a bug where the garbage collector forgot to add all indexed objects to the root set. (Suggested by Sean Ross.) Increase default cache size to 100,000 objects. * 2008-01-23 - version 0.1.13 Add Brad Beveridge's basic unit test suite (modified to work with lisp-unit instead of 5am). Add Chris Riesbeck's lisp-unit library to help with creating unit test suites. Move all tests to their own directory. Add P-NREVERSE and P-POSITION for persistent lists. Fix bugs in P-REPLACE and P-MAPCAR. * 2008-01-22 - version 0.1.12 Use (ARRAY-DIMENSION buffer 0) instead of LENGTH in LOAD-BUFFER, because we want to ignore the fill pointer here. Thanks to Sean Ross. * 2008-01-22 - version 0.1.11 Fix bug caused by LEAF-DELETE-KEY. Reported and fixed by Brad Beveridge. Fix some typos (:VALUE should be :VALUE=) in index.lisp. * 2008-01-16 - version 0.1.10 When deleting a key from a btree, use the BTREE-KEY= function (not P-EQL) to determine the position of the key. Reported and fixed by Leonid Novikov. * 2007-08-12 - version 0.1.9 Fix btree bug during btree-delete: if we're deleting the biggest key from a leaf, we should update the parents so they'll use the key that has now become the biggest. (Henrik Hjelte.) Try to signal an error when an incompatible value is given to indexed slots, e.g. trying to put a string into a slot with a :symbol-index. (Takehiko Abe) Signal an error during when putting duplicate values into a slot for which duplicate values are not allowed. (Takehiko Abe) Use BTREE-VALUE-TYPE, not BTREE-KEY-TYPE, when type checking a value during BTREE-INSERT. (Takehiko Abe) Wrap COMPILE-FILE calls in a WITH-COMPILATION-UNIT to prevent superfluous warnings about undefined functions. * 2007-03-13 - version 0.1.8 Fix a bug in LEAF-DELETE-KEY (thanks to Henrik Hjelte). Add RUCKSACK-DELETE-OBJECT, RUCKSACK-DELETE-OBJECTS and RUCKSACK-ROOT-P (suggested by Henrik Hjelte). I haven't tested these functions yet. * 2007-01-22 - version 0.1.7 Get rid of two SBCL compiler warnings. (Reported by Cyrus Harmon.) * 2007-01-21 - version 0.1.6 Added serializing/deserializing of structures. Only works on SBCL. (Thanks to Levente M?sz?ros.) * 2006-11-30 - FLET MAP-INDEXES should be LABELS MAP-INDEXES (thanks to Cyrus Harmon). - The :EQUAL parameter for MAP-INDEX-DATA wasn't handled correctly for indexes with non-unique keys (reported by Cyrus Harmon). * 2006-09-04 - Take care of some differences between the MOP implementations of Lispworks and SBCL. Lispworks doesn't call (setf slot-value-using-class) in SHARED-INITIALIZE, but SBCL does. Lispworks calls FINALIZE-INHERITANCE after a class is redefined and a new instance is created, but SBCL doesn't. All tests now work for Lispworks (5.0) and SBCL (0.9.16). - Some work on a copying GC. * 2006-09-03 - Handle updates of in-memory persistent objects by writing a method for Lisp's UPDATE-INSTANCE-FOR-REDEFINED-CLASS that marks the object as dirty and calls Rucksack's UPDATE-PERSISTENT-INSTANCE-FOR-REDEFINED-CLASS. * 2006-09-01 - Get rid of the Lispworks-specific PROCESS-A-SLOT-OPTION stuff and handle the slot options in a way that's compatible with AMOP. - Removed INITARGS argument for UPDATE-PERSISTENT-INSTANCE-FOR-REDEFINED-CLASS, because it turns out not to be necessary (see details in notes.txt). - Add explanation to test-index-1a.lisp about the use of (eval-when (:compile-toplevel :load-toplevel :execute) ...) - Replace *RUCKSACK* by RS in test-*.lisp. * 2006-08-31 - Write test cases for schema updates and user defined methods for UPDATE-PERSISTENT-INSTANCE-FOR-REDEFINED-CLASS. - Indexing: compare the specified slot/class indexes to the indexes that exist in the Rucksack, *not* to the indexes specified in the previous version of the class definition. Otherwise we get inconsistencies when we recompile class definitions from scratch with a Rucksack that already exists. - Write test case for slots with redefined indexes. This also tests the default method for UPDATE-PERSISTENT-INSTANCE-FOR-REDEFINED-CLASS. * 2006-08-30 - FINALIZE-INHERITANCE: Compute slot diffs for obsolete schemas. - More work on UPDATE-PERSISTENT-INSTANCE-FOR-REDEFINED-CLASS. * 2006-08-29 - Partial implementation of UPDATE-PERSISTENT-INSTANCE-FOR-REDEFINED-CLASS & friends. * 2006-08-29 - Example-1: indexing should still work after recompiling. - RUCKSACK-UPDATE-SLOT-INDEXES: Remove indexes for old slots that don't exist anymore. - Some work on schema updates. - Compute persistent slots at the right moment. * 2006-08-26 - Make sure that indexing works correctly with subclasses. - Fix some more indexing bugs. * 2006-08 - The class and slot indexes were normal hash tables, but they should be persistent objects like everything else: I replaced them by btrees. - Get process-lock and process-unlock working on SBCL (thanks to Geoff Cant). * 2006-08 - Save and load the index tables when closing/opening a rucksack. - Implement the :UNIQUE slot option. - Improve predefined slot index specs. * 2006-08 - Add a SERIAL-TRANSACTION-RUCKSACK class that allows for only one transaction at a time (by using a transaction lock). This allows for a fast track towards a working Rucksack implementation. Then parallel transactions can be added later. - Don't do any GC at all while a transaction is writing objects to disk. Instead we keep track of the amount of disk space allocated by the committing transaction. Then we do a (partial) GC immediately after committing the transaction. --- /project/rucksack/cvsroot/rucksack/doc/glossary.txt 2008/02/11 13:00:12 NONE +++ /project/rucksack/cvsroot/rucksack/doc/glossary.txt 2008/02/11 13:00:12 1.1 ;; $Header: /project/rucksack/cvsroot/rucksack/doc/glossary.txt,v 1.1 2008/02/11 13:00:10 alemmens Exp $ * block A free list block on disk. Each block has a fixed size header (currently 8 octets). The header is followed by a serialized integer: if this integer is positive, it is the id of the object whose contents are serialized in this block. If the integer is negative, the block belongs to a free list and is not in use; the integer's absolute value is the size of the block (the sweep phase of the garbage collector needs this block size). Also used as an abbreviation for a block's heap position. * class designator Either a class name (i.e. a symbol) or a class. See the CLHS glossary. * compatible object version The object version that's compatible with a transaction T is the most recent version that's not younger than T. * index spec A non-keyword symbol (the name of an indexing class) or a list starting with a symbol (the name of an indexing class) followed by a plist of keywords and values (initargs for the indexing class). Examples: BTREE, (BTREE :KEY< < :VALUE= P-EQL). * index spec designator Either an index spec or the name (i.e. a keyword) of an index spec that has been defined with DEFINE-INDEX-SPEC. Example: :STRING-INDEX. * object version list The list with committed object versions. The list is ordered by transaction timestamp of the transaction that created/modified the object. The ordering is most recent transaction first. * open transaction A transaction that hasn't rolled back or committed yet. * partial transaction This is shorthand for 'partially committed transaction', i.e. a transaction that has started a commit operation but hasn't finished it yet. * root object An object that's part of the root set. * root set The root set for a garbage collector is the set of objects from which all other live objects can be reached. Any object that can not be reached from a root object is considered dead: its disk space may be reused by another object if necessary. * slot designator Either a symbol (a slot name) or a slot-definition metaobject. --- /project/rucksack/cvsroot/rucksack/doc/internals.txt 2008/02/11 13:00:12 NONE +++ /project/rucksack/cvsroot/rucksack/doc/internals.txt 2008/02/11 13:00:12 1.1 RUCKSACK INTERNALS * Free list heaps A free-list-heap starts with an 8-byte address ('disk pointer') that points to the end of the heap. This is followed by as many 'disk-pointers' as the heap has free lists: each disk pointer points to the first free block on that free list. * Object table The object table is a free-list-heap with exactly one free list, so it contains one free list pointer. * The real heap The 'real' heap contains 32 free lists, with a smallest block size of 16 (i.e. 2^4) and a largest block size of 2^(4+31), i.e. 32 GB. * Blocks and objects The heap contains blocks of different sizes (currently the block sizes are powers of 2; starting with blocks of 16 bytes). Each block starts with an 8-byte header. If the block is unoccupied, the header contains a pointer to the next block in the free list; otherwise it contains the size of the block. The header is followed by a serialized value which is either NIL, a positive integer or a negative integer. If it's NIL, the block is occupied by an object of which there is exactly one version. If it's a positive integer, the block is occupied by an object and the integer is a pointer to (the heap position of) the previously saved version of the object. If it's negative, the block belongs to a free list and is not in use; the integer's absolute value is the size of the block (the sweep phase of the garbage collector needs this block size). [OCCUPIED BLOCK]: 0- 8: block size 8-15: pointer to previous version (nil or an integer) .. : transaction id .. : object id .. : nr of slots .. : schema id ...: serialized slots ...: maybe some free space [FREE BLOCK]: 0- 8: pointer to next free block .. : the negative of the block size ... : free space [9 lines skipped] --- /project/rucksack/cvsroot/rucksack/doc/notes.txt 2008/02/11 13:00:12 NONE +++ /project/rucksack/cvsroot/rucksack/doc/notes.txt 2008/02/11 13:00:12 1.1 [102 lines skipped] --- /project/rucksack/cvsroot/rucksack/doc/talk-eclm2006.txt 2008/02/11 13:00:12 NONE +++ /project/rucksack/cvsroot/rucksack/doc/talk-eclm2006.txt 2008/02/11 13:00:12 1.1 [1293 lines skipped] From alemmens at common-lisp.net Mon Feb 11 13:00:21 2008 From: alemmens at common-lisp.net (alemmens) Date: Mon, 11 Feb 2008 08:00:21 -0500 (EST) Subject: [rucksack-cvs] CVS rucksack Message-ID: <20080211130021.2DBB363036@common-lisp.net> Update of /project/rucksack/cvsroot/rucksack In directory clnet:/tmp/cvs-serv24432 Removed Files: do.txt done.txt glossary.txt internals.txt notes.txt talk-eclm2006.txt Log Message: Moved documentation files to doc directory. From alemmens at common-lisp.net Mon Feb 11 14:21:57 2008 From: alemmens at common-lisp.net (alemmens) Date: Mon, 11 Feb 2008 09:21:57 -0500 (EST) Subject: [rucksack-cvs] CVS rucksack Message-ID: <20080211142157.AB1D15F05D@common-lisp.net> Update of /project/rucksack/cvsroot/rucksack In directory clnet:/tmp/cvs-serv19611 Modified Files: manual.txt Log Message: Mention the tutorial. --- /project/rucksack/cvsroot/rucksack/manual.txt 2008/01/24 12:38:52 1.2 +++ /project/rucksack/cvsroot/rucksack/manual.txt 2008/02/11 14:21:57 1.3 @@ -16,3 +16,7 @@ 6. (run-tests) +* The tutorial + +The tutorial by Brad Beveridge (in doc/rucksack-tutorial.lisp) is a +good next step. From alemmens at common-lisp.net Tue Feb 19 22:44:06 2008 From: alemmens at common-lisp.net (alemmens) Date: Tue, 19 Feb 2008 17:44:06 -0500 (EST) Subject: [rucksack-cvs] CVS rucksack Message-ID: <20080219224406.6D02574390@common-lisp.net> Update of /project/rucksack/cvsroot/rucksack In directory clnet:/tmp/cvs-serv14617 Modified Files: index.lisp objects.lisp p-btrees.lisp package.lisp rucksack.asd Log Message: Version 0.1.17: add some list functions and replace persistent lists by persistent btrees for non-unique slot indexes. --- /project/rucksack/cvsroot/rucksack/index.lisp 2008/01/22 15:59:24 1.11 +++ /project/rucksack/cvsroot/rucksack/index.lisp 2008/02/19 22:44:05 1.12 @@ -1,4 +1,4 @@ -;; $Id: index.lisp,v 1.11 2008/01/22 15:59:24 alemmens Exp $ +;; $Id: index.lisp,v 1.12 2008/02/19 22:44:05 alemmens Exp $ (in-package :rucksack) @@ -104,8 +104,13 @@ (funcall function equal value) ;; We have a persistent list of values: call FUNCTION for ;; each element of that list. - (p-mapc (lambda (elt) (funcall function equal elt)) - value)))) + (etypecase value + ((or null persistent-cons) + (p-mapc (lambda (elt) (funcall function equal elt)) + value)) + (persistent-object-set + (map-set-btree value + (lambda (elt) (funcall function equal elt)))))))) (apply #'map-btree index function :order order args))) @@ -193,16 +198,27 @@ (eval-when (:compile-toplevel :load-toplevel :execute) (define-index-spec :number-index - '(btree :key< < :value= p-eql)) + '(btree :key< < + :value= p-eql + :value-type persistent-object)) (define-index-spec :string-index - '(btree :key< string< :value= p-eql :key-type string)) + '(btree :key< string< + :value= p-eql + :value-type persistent-object + :key-type string)) (define-index-spec :symbol-index - '(btree :key< string< :value= p-eql :key-type symbol)) + '(btree :key< string< + :value= p-eql + :value-type persistent-object + :key-type symbol)) (define-index-spec :case-insensitive-string-index - '(btree :key< string-lessp :value= p-eql :key-type string)) + '(btree :key< string-lessp + :value= p-eql + :value-type persistent-object + :key-type string)) (define-index-spec :trimmed-string-index ;; Like :STRING-INDEX, but with whitespace trimmed left @@ -210,4 +226,5 @@ '(btree :key< string< :key-key trim-whitespace :value= p-eql + :value-type persistent-object :key-type string))) --- /project/rucksack/cvsroot/rucksack/objects.lisp 2008/02/11 12:47:52 1.21 +++ /project/rucksack/cvsroot/rucksack/objects.lisp 2008/02/19 22:44:06 1.22 @@ -1,4 +1,4 @@ -;; $Id: objects.lisp,v 1.21 2008/02/11 12:47:52 alemmens Exp $ +;; $Id: objects.lisp,v 1.22 2008/02/19 22:44:06 alemmens Exp $ (in-package :rucksack) @@ -181,6 +181,9 @@ (defmethod p-car ((cons persistent-cons)) (persistent-data-read #'car cons)) +(defmethod p-car ((x (eql nil))) + nil) + (defmethod (setf p-car) (value (cons persistent-cons)) (persistent-data-write (lambda (new-value contents) (setf (car contents) new-value)) @@ -190,6 +193,9 @@ (defmethod p-cdr ((cons persistent-cons)) (persistent-data-read #'cdr cons)) +(defmethod p-cdr ((x (eql nil))) + nil) + (defmethod (setf p-cdr) (value (cons persistent-cons)) (persistent-data-write (lambda (new-value contents) (setf (cdr contents) new-value)) @@ -212,6 +218,30 @@ ;; Other functions from chapter 14 of the spec. ;; +(defun p-caar (object) + "The persistent equivalent of CAAR." + (p-car (p-car object))) + +(defun p-cadr (object) + "The persistent equivalenet of CADR." + (p-car (p-cdr object))) + +(defun p-cdar (object) + "The persistent equivalent of CDAR." + (p-cdr (p-car object))) + +(defun p-cddr (object) + "The persistent equivalent of CDDR." + (p-cdr (p-cdr object))) + + +(defmethod p-consp ((object persistent-cons)) + t) + +(defmethod p-consp ((object t)) + nil) + + (defmethod p-endp ((object (eql nil))) t) @@ -223,8 +253,19 @@ :datum object :expected-type '(or null persistent-cons))) -(defmethod p-cddr ((cons persistent-cons)) - (p-cdr (p-cdr cons))) + +(defun p-last (list &optional (n 1)) + "Returns the last persistent cons cell of a persistent list (or +NIL if the list is empty)." + (unless (= n 1) + ;; DO: Implement this case. + (error "The optional argument for P-LAST isn't implemented yet.")) + (let ((result list) + (tail (p-cdr list))) + (loop until (p-endp tail) + do (shiftf result tail (p-cdr tail))) + result)) + (defun p-mapcar (function list) ;; DO: Accept more than one list argument. --- /project/rucksack/cvsroot/rucksack/p-btrees.lisp 2008/02/11 12:47:52 1.18 +++ /project/rucksack/cvsroot/rucksack/p-btrees.lisp 2008/02/19 22:44:06 1.19 @@ -1,4 +1,4 @@ -;; $Id: p-btrees.lisp,v 1.18 2008/02/11 12:47:52 alemmens Exp $ +;; $Id: p-btrees.lisp,v 1.19 2008/02/19 22:44:06 alemmens Exp $ (in-package :rucksack) @@ -11,6 +11,22 @@ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; #| + +This is a modified version of the in-memory btrees. We use p-arrays, +p-conses and persistent-objects. + +Basically, a B-tree is a balanced multi-way tree. + +The reason for using multi-way trees instead of binary trees is that +the nodes are expected to be on disk; it would be inefficient to have +to execute a disk operation for each tree node if it contains only 2 +keys. + +The key property of B-trees is that each possible search path has the same +length, measured in terms of nodes. +|# + +#| ;; Btrees #:btree #:btree-key< #:btree-key<= #:btree-key= #:btree-key>= #:btree-key> @@ -127,25 +143,41 @@ ORDER is either :ASCENDING (default) or :DESCENDING.")) -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;;; B-trees -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; +;; Set btrees +;; +;; A 'set btree' is a special kind of btree that's used to implement sets. +;; With set btrees, the 'value' part of a btree binding is irrelevant, because +;; all information is in the keys themselves. +;; -#| +(defgeneric set-btree-insert (set value) + (:documentation "Add a value to a set-btree. This will modify the +set-btree.")) -This is a modified version of the in-memory btrees. We use p-arrays, -p-conses and persistent-objects. +(defgeneric set-btree-delete (set value &key if-does-not-exist) + (:documentation "Removes a value from a set-btree and returns the +modified set-btree. If the value is not present in the set, this +function signals an error if IF-DOES-NOT-EXIST is :ERROR (if +IF-DOES-NOT-EXIST is :IGNORE, it returns nil).")) -Basically, a B-tree is a balanced multi-way tree. +(defgeneric set-btree-search (set value &key errorp default-value) + (:documentation + "Returns VALUE if it is present in the btree-set SET. Otherwise +the result depends on the ERRORP option: if ERRORP is true, a +btree-search-error is signalled; otherwise, DEFAULT-VALUE is +returned.")) -The reason for using multi-way trees instead of binary trees is that -the nodes are expected to be on disk; it would be inefficient to have -to execute a disk operation for each tree node if it contains only 2 -keys. +(defgeneric map-set-btree (set function) + (:documentation + "Calls a unary function for each value in a btree-set.")) + +(defgeneric set-btree-empty-p (set) + (:documentation "Returns true iff a btree-set is empty.")) + +(defgeneric set-count (set) + (:documentation "Returns the number of values in a btree-set.")) -The key property of B-trees is that each possible search path has the same -length, measured in terms of nodes. -|# ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; Conditions @@ -213,7 +245,9 @@ :reader btree-unique-keys-p :initform t :documentation - "If false, one key can correspond to more than one value.") + "If false, one key can correspond to more than one value. +In that case, the values are assumed to be objects for which the function +OBJECT-ID is defined (and returns a unique integer).") (key-type :initarg :key-type :reader btree-key-type :initform t @@ -323,8 +357,65 @@ (:metaclass persistent-class)) +(defmethod initialize-instance :after ((node bnode) + &key btree &allow-other-keys) + (setf (bnode-bindings node) (p-make-array (* 2 (btree-max-node-size btree)) + :initial-element nil) + (bnode-nr-bindings node) 0)) + + +(defmethod print-object ((node bnode) stream) + (print-unreadable-object (node stream :type t :identity t) + (format stream "with ~D bindings" (bnode-nr-bindings node)))) + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;; Set btrees +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(defclass set-btree (btree) + () + (:default-initargs + ;; We use a special bnode class because we don't care about the binding + ;; values (so we can optimize them away later). + :node-class 'set-bnode + ;; We use small nodes, because we expect relatively many sets + ;; with only a few elements. + :max-node-size 8 + ;; The keys of a set-btree are unique (otherwise it wouldn't be a set + ;; but a bag). + :unique-keys-p t) + (:metaclass persistent-class) + (:documentation "A persistent set of objects, implemented as a btree.")) + +(defclass set-bnode (bnode) + () + (:metaclass persistent-class) + (:documentation "A special kind of btree node, used to implement set btrees.")) + + +;; Sets of persistent objects are implemented as set-btrees. They're +;; used to represent the values of a btree that maps slot values to +;; one or more persistent objects (i.e. they're used for non-unique +;; slot indexes). They can also be used separately. + +(defclass persistent-object-set (set-btree) + () + (:default-initargs + ;; For sets of persistent-objects we store the objects as keys, + ;; but we use the object-ids to compare keys. + :key-key 'object-id) + (:metaclass persistent-class) + (:documentation "A persistent set of persistent-objects, implemented +as a btree.")) + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Some info functions +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ;; -;; Info functions +;; Counting keys or values ;; (defmethod btree-nr-keys ((btree btree)) @@ -344,14 +435,71 @@ (btree-nr-keys btree) (let ((result 0)) (map-btree-keys btree - (lambda (key p-values) + (lambda (key set) (declare (ignore key)) - (incf result (p-length p-values)))) + (incf result + (etypecase set + (persistent-object-set (set-count set)) + (persistent-cons (p-length set)) + (null 0))))) result))) ;; +;; Depth and balance +;; + +(defmethod node-max-depth ((node bnode)) + (if (bnode-leaf-p node) + 0 + (loop for i below (bnode-nr-bindings node) + for binding = (node-binding node i) + maximize (1+ (node-max-depth (binding-value binding)))))) + +(defmethod node-min-depth ((node bnode)) + (if (bnode-leaf-p node) + 0 + (loop for i below (bnode-nr-bindings node) + for binding = (node-binding node i) + minimize (1+ (node-min-depth (binding-value binding)))))) + +(defmethod btree-depths ((btree btree)) + (if (slot-value btree 'root) + (values (node-min-depth (btree-root btree)) + (node-max-depth (btree-root btree))) + (values 0 0))) + +(defmethod btree-balanced-p ((btree btree)) + (multiple-value-bind (min max) + (btree-depths btree) + (<= (- max min) 1))) + + +;; +;; Debugging +;; + +(defun display-node (node) + (pprint (node-as-cons node))) + +(defun node-as-cons (node &optional (unique-keys t)) + (loop with leaf-p = (bnode-leaf-p node) + for i below (bnode-nr-bindings node) + for value = (node-binding-value node i) + collect (list (node-binding-key node i) + (if leaf-p + (if unique-keys + value + (unwrap-persistent-list value)) + (node-as-cons value))))) + +(defun btree-as-cons (btree) + (and (slot-value btree 'root) + (node-as-cons (btree-root btree) (btree-unique-keys-p btree)))) + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Bindings -;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (defstruct binding key @@ -396,72 +544,39 @@ value (p-cons value '()))) -;; -;; - -(defmethod initialize-instance :after ((node bnode) - &key btree &allow-other-keys) - (setf (bnode-bindings node) (p-make-array (* 2 (btree-max-node-size btree)) - :initial-element nil) - (bnode-nr-bindings node) 0)) - - -(defmethod print-object ((node bnode) stream) - (print-unreadable-object (node stream :type t :identity t) - (format stream "with ~D bindings" (bnode-nr-bindings node)))) -;; -;; Debugging -;; - -(defun display-node (node) - (pprint (node-as-cons node))) - -(defun node-as-cons (node &optional (unique-keys t)) - (loop with leaf-p = (bnode-leaf-p node) - for i below (bnode-nr-bindings node) - for value = (node-binding-value node i) - collect (list (node-binding-key node i) - (if leaf-p - (if unique-keys - value - (unwrap-persistent-list value)) - (node-as-cons value))))) - -(defun btree-as-cons (btree) - (and (slot-value btree 'root) - (node-as-cons (btree-root btree) (btree-unique-keys-p btree)))) - - -;; -;; Depth and balance -;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Set btrees and persistent object sets: implementation +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -(defmethod node-max-depth ((node bnode)) - (if (bnode-leaf-p node) - 0 - (loop for i below (bnode-nr-bindings node) - for binding = (node-binding node i) - maximize (1+ (node-max-depth (binding-value binding)))))) +(defmethod set-btree-insert ((set set-btree) value) + (btree-insert set value nil :if-exists :overwrite)) -(defmethod node-min-depth ((node bnode)) - (if (bnode-leaf-p node) - 0 - (loop for i below (bnode-nr-bindings node) - for binding = (node-binding node i) - minimize (1+ (node-min-depth (binding-value binding)))))) +(defmethod set-btree-delete ((set set-btree) value &key (if-does-not-exist nil)) + (btree-delete-key set value :if-does-not-exist if-does-not-exist)) -(defmethod btree-depths ((btree btree)) - (if (slot-value btree 'root) - (values (node-min-depth (btree-root btree)) - (node-max-depth (btree-root btree))) - (values 0 0))) +(defmethod set-btree-search ((set set-btree) value &key errorp default-value) + (btree-search set value + :errorp errorp + :default-value default-value)) + +(defmethod map-set-btree ((set set-btree) function) + (map-btree-keys set + (lambda (key value) + (declare (ignore value)) + (funcall function key)))) + +(defmethod set-btree-empty-p ((set set-btree)) + (or (not (slot-boundp set 'root)) + (let ((root (slot-value set 'root))) + (and (bnode-leaf-p root) + (= 0 (bnode-nr-bindings root)))))) -(defmethod btree-balanced-p ((btree btree)) - (multiple-value-bind (min max) - (btree-depths btree) - (<= (- max min) 1))) +(defmethod set-count ((set set-btree)) + (btree-nr-values set)) +;; DO: Change the binding functions for SET-BTREES to optimize the values +;; away. ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Search @@ -829,12 +944,32 @@ :key key :value value)))) ;; For non-unique keys, we ignore the :IF-EXISTS option and - ;; just add value to the list of values (unless value is already + ;; just add value to the set of values (unless value is already ;; there). - (unless (p-find value (node-binding-value leaf position) - :test (btree-value= btree)) - (setf (node-binding-value leaf position) - (p-cons value (node-binding-value leaf position))))) + (let ((set (node-binding-value leaf position))) + (etypecase set + (persistent-object-set + (set-btree-insert set value)) + (persistent-cons + (if (eql (btree-value-type btree) 'persistent-object) + ;; The values are persistent objects, so we know we + ;; can put them in a persistent-object-set. Let's + ;; do that, now we know that there are at least two + ;; objects in the set. + (let ((new-set (make-instance 'persistent-object-set))) + (set-btree-insert new-set (p-car set)) + (set-btree-insert new-set value) + (setf (node-binding-value leaf position) new-set)) + ;; We don't know anything about the values, so we have to + ;; resort to a persistent list to store the values. This + ;; will lead to bad performance if few keys map to many + ;; values, but we don't have much choice. + ;; DO: Use set-btrees for other types for which we can come + ;; up with some kind of ordering (like strings, numbers, + ;; etcetera). + (unless (p-find value set :test (btree-value= btree)) + (setf (node-binding-value leaf position) + (p-cons value (node-binding-value leaf position))))))))) ;; The key doesn't exist yet. Create a new binding and add it to the ;; leaf index in the right position. (progn @@ -891,22 +1026,26 @@ ;; just delete the value from the list of values (unless it's ;; not there). (flet ((check (x) (funcall (btree-value= btree) x value))) - (let ((values (binding-value binding))) - ;; EFFICIENCY: We walk the list twice now, which is not - ;; necessary. Write a special purpose function for this - ;; instead of just using P-FIND and P-DELETE. - (if (p-find value values :test (btree-value= btree)) - (if (null (p-cdr values)) - ;; This is the last value in the list: remove the - ;; key. - (btree-delete-key btree key) - ;; There's more than one value in the list: delete the - ;; value that must be deleted and keep the other values. - (setf (node-binding-value node position) - (p-delete-if #'check (binding-value binding) - :count 1))) - ;; The value is not in the list: forget it. - (forget-it))))))))) + (let ((set (binding-value binding))) + (etypecase set [41 lines skipped] --- /project/rucksack/cvsroot/rucksack/package.lisp 2008/02/11 12:47:52 1.13 +++ /project/rucksack/cvsroot/rucksack/package.lisp 2008/02/19 22:44:06 1.14 @@ -1,4 +1,4 @@ -;; $Id: package.lisp,v 1.13 2008/02/11 12:47:52 alemmens Exp $ +;; $Id: package.lisp,v 1.14 2008/02/19 22:44:06 alemmens Exp $ #-(or allegro lispworks sbcl openmcl) (error "Unsupported implementation: ~A" (lisp-implementation-type)) @@ -32,7 +32,9 @@ #:object-id #:p-cons #:p-array #:p-eql - #:p-car #:p-cdr #:p-list + #:p-car #:p-cdr #:p-list #:p-last + #:p-endp #:p-consp + #:p-caar #:p-cadr #:p-cdar #:p-cddr #:unwrap-persistent-list #:p-mapcar #:p-mapc #:p-maplist #:p-mapl #:p-member-if --- /project/rucksack/cvsroot/rucksack/rucksack.asd 2008/02/11 12:47:52 1.18 +++ /project/rucksack/cvsroot/rucksack/rucksack.asd 2008/02/19 22:44:06 1.19 @@ -1,9 +1,9 @@ -;;; $Id: rucksack.asd,v 1.18 2008/02/11 12:47:52 alemmens Exp $ +;;; $Id: rucksack.asd,v 1.19 2008/02/19 22:44:06 alemmens Exp $ (in-package :cl-user) (asdf:defsystem :rucksack - :version "0.1.16" + :version "0.1.17" :serial t :components ((:file "queue") (:file "package")