[climacs-cvs] CVS update: climacs/base-test.lisp climacs/base.lisp climacs/buffer-test.lisp climacs/climacs.asd climacs/packages.lisp

Aleksandar Bakic abakic at common-lisp.net
Thu Aug 4 22:07:49 UTC 2005


Update of /project/climacs/cvsroot/climacs
In directory common-lisp.net:/tmp/cvs-serv5255

Modified Files:
	base-test.lisp base.lisp buffer-test.lisp climacs.asd 
	packages.lisp 
Log Message:
Added cl-automaton module and support for regexp searches. Below are
some notes. Also modified one constituentp-related test.

Instead of having module "cl-automaton" within the :climacs defsystem,
the module could be turned into a dependence on :automaton, defined in
cl-automaton/automaton.asd. Similarly for cl-automaton/automaton-test.asd.

For slower buffer implementations, a buffer iterator is needed for
higher performance of regexp searches. Greedy matching should be
improved (see automaton::run-to-first-unmatch).

Perhaps, fast (tabular) automaton representation should be implemented,
unless it would be taking way too much space.

Incremental regexp search needs to be done.


Date: Fri Aug  5 00:07:45 2005
Author: abakic

Index: climacs/base-test.lisp
diff -u climacs/base-test.lisp:1.14 climacs/base-test.lisp:1.15
--- climacs/base-test.lisp:1.14	Sun Jul 17 19:20:27 2005
+++ climacs/base-test.lisp	Fri Aug  5 00:07:44 2005
@@ -457,7 +457,7 @@
    (constituentp #\Tab)
    (constituentp "a")
    (constituentp #\Null))
-  t nil nil nil nil nil)
+  t nil nil nil nil #-sbcl nil #+sbcl t)
 
 (defmultitest whitespacep.test-1
   (values
@@ -779,7 +779,7 @@
 (defmultitest tabify-buffer-region.test-1
   (let ((buffer (make-instance %%buffer)))
     (insert-buffer-sequence buffer 0 "c       l       im              acs")
-    (climacs-base::tabify-buffer-region buffer 0 (size buffer) 8)    
+    (climacs-base::tabify-buffer-region buffer 0 (size buffer) 8)
     (buffer-sequence buffer 0 (size buffer)))
   "c	l	im		acs")
 
@@ -1103,6 +1103,36 @@
      (buffer-search-backward buffer 1 "")))
   3 3 0 8 nil nil 0 1)
 
+(defmultitest buffer-re-search-forward.test-1
+  (let ((buffer (make-instance %%buffer))
+	(a1 (automaton::determinize
+	     (regexp-automaton (string-regexp "i[mac]+s"))))
+	(a2 (automaton::determinize
+	     (regexp-automaton (string-regexp "[^aeiou][aeiou]")))))
+    (insert-buffer-sequence buffer 0 "
+climacs")
+      (values
+       (buffer-re-search-forward a1 buffer 0)
+       (buffer-re-search-forward a2 buffer 1)
+       (buffer-re-search-forward a1 buffer 4)
+       (buffer-re-search-forward a2 buffer 6)))
+  3 2 nil nil)
+
+(defmultitest buffer-re-search-backward.test-1
+  (let ((buffer (make-instance %%buffer))
+	(a1 (climacs-base::reversed-deterministic-automaton
+	     (regexp-automaton (string-regexp "i[ma]+c"))))
+	(a2 (climacs-base::reversed-deterministic-automaton
+	     (regexp-automaton (string-regexp "[^aeiou][aeiou]")))))
+    (insert-buffer-sequence buffer 0 "
+climacs")
+      (values
+       (buffer-re-search-backward a1 buffer 7)
+       (buffer-re-search-backward a2 buffer 7)
+       (buffer-re-search-backward a1 buffer 5)
+       (buffer-re-search-backward a2 buffer 2)))
+  3 4 nil nil)
+
 (defmultitest search-forward.test-1
   (let ((buffer (make-instance %%buffer)))
     (insert-buffer-sequence buffer 0 "
@@ -1156,6 +1186,62 @@
     (let ((m (clone-mark (low-mark buffer) :right)))
       (setf (offset m) 3)
       (search-backward m "klimaks")
+      (offset m)))
+  3)
+
+(defmultitest re-search-forward.test-1
+  (let ((buffer (make-instance %%buffer)))
+    (insert-buffer-sequence buffer 0 "
+climacs")
+    (let ((m (clone-mark (low-mark buffer) :left)))
+      (setf (offset m) 0)
+      (re-search-forward m "[mac]{3}")
+      (offset m)))
+  7)
+
+(defmultitest re-search-forward.test-2
+  (let ((buffer (make-instance %%buffer)))
+    (insert-buffer-sequence buffer 0 "climacs")
+    (let ((m (clone-mark (low-mark buffer) :right)))
+      (setf (offset m) 3)
+      (re-search-forward m "[mac]{3}")
+      (offset m)))
+  6)
+
+(defmultitest re-search-forward.test-3
+  (let ((buffer (make-instance %%buffer)))
+    (insert-buffer-sequence buffer 0 "climacs")
+    (let ((m (clone-mark (low-mark buffer) :right)))
+      (setf (offset m) 3)
+      (re-search-forward m "klimaks")
+      (offset m)))
+  3)
+
+(defmultitest re-search-backward.test-1
+  (let ((buffer (make-instance %%buffer)))
+    (insert-buffer-sequence buffer 0 "climacs
+")
+    (let ((m (clone-mark (low-mark buffer) :left)))
+      (setf (offset m) 8)
+      (re-search-backward m "[mac]{3}")
+      (offset m)))
+  3)
+
+(defmultitest re-search-backward.test-2
+  (let ((buffer (make-instance %%buffer)))
+    (insert-buffer-sequence buffer 0 "climacs")
+    (let ((m (clone-mark (low-mark buffer) :right)))
+      (setf (offset m) 6)
+      (re-search-backward m "[mac]{3}")
+      (offset m)))
+  3)
+
+(defmultitest re-search-backward.test-3
+  (let ((buffer (make-instance %%buffer)))
+    (insert-buffer-sequence buffer 0 "climacs")
+    (let ((m (clone-mark (low-mark buffer) :right)))
+      (setf (offset m) 3)
+      (re-search-backward m "klimaks")
       (offset m)))
   3)
 


Index: climacs/base.lisp
diff -u climacs/base.lisp:1.39 climacs/base.lisp:1.40
--- climacs/base.lisp:1.39	Mon May 30 11:09:48 2005
+++ climacs/base.lisp	Fri Aug  5 00:07:44 2005
@@ -608,6 +608,62 @@
 	  return i
 	finally (return nil)))
 
+(defun non-greedy-match-forward (a buffer i)
+  (let ((p (automaton::initial a)))
+    (loop for j from i below (size buffer)
+       for q = (automaton::sstep p (buffer-object buffer j)) do
+	 (unless q
+	   (return nil))
+	 (if (automaton::accept q)
+	     (return (1+ j))
+	     (setq p q))
+       finally (return nil))))
+
+(defun buffer-re-search-forward (a buffer offset)
+  "Returns as the first value the smallest offset of BUFFER >= OFFSET
+with contents accepted by deterministic automaton A; otherwise,
+returns nil. If the first value is non-nil, the second value is the
+offset after the matched contents."
+  (if (automaton::singleton a)
+      (buffer-search-forward buffer offset (automaton::singleton a))
+      (loop for i from offset below (size buffer) do
+	 (let ((j (non-greedy-match-forward a buffer i)))
+	   (when j (return (values i j))))
+	 finally (return nil))))
+
+(defun reversed-deterministic-automaton (a)
+  "Reverses and determinizes A, then returns it."
+  (if (automaton::singleton a)
+      (progn
+	(setf (automaton::singleton a) (reverse (automaton::singleton a)))
+	a)
+      (automaton::determinize2
+       a
+       (make-instance 'automaton::state-set :ht (automaton::areverse a)))))
+
+(defun non-greedy-match-backward (a buffer i)
+  (let ((p (automaton::initial a)))
+    (loop for j downfrom i to 0
+       for q = (automaton::sstep p (buffer-object buffer j)) do
+	 (unless q
+	   (return nil))
+	 (if (automaton::accept q)
+	     (return j)
+	     (setq p q))
+       finally (return nil))))
+
+(defun buffer-re-search-backward (a buffer offset)
+  "Returns as the first value the largest offset of BUFFER <= OFFSET
+with contents accepted by (reversed) deterministic automaton A;
+otherwise, returns nil. If the first value is non-nil, the second
+value is the offset after the matched contents."
+  (if (automaton::singleton a)
+      (buffer-search-backward buffer offset (automaton::singleton a))
+      (loop for i downfrom (min offset (1- (size buffer))) to 0 do
+	 (let ((j (non-greedy-match-backward a buffer i)))
+	   (when j (return (values j i))))
+	 finally (return nil))))
+
 (defun search-forward (mark vector &key (test #'eql))
   "move MARK forward after the first occurence of VECTOR after MARK"
   (let ((offset (buffer-search-forward
@@ -621,6 +677,29 @@
 		 (buffer mark) (offset mark) vector :test test)))
     (when offset
       (setf (offset mark) offset))))
+
+(defun re-search-forward (mark re)
+  "move MARK forward after the first occurence of string matching RE
+after MARK"
+  (let ((a (automaton::determinize
+	     (automaton::regexp-automaton
+	      (automaton::string-regexp re)))))
+    (multiple-value-bind (i j)
+	(buffer-re-search-forward a (buffer mark) (offset mark))
+      (when i
+	(setf (offset mark) j)))))
+
+(defun re-search-backward (mark re)
+  "move MARK backward before the first occurence of string matching RE
+before MARK"
+  (let ((a (reversed-deterministic-automaton
+	    (automaton::regexp-automaton
+	     (automaton::string-regexp re)))))
+    (multiple-value-bind (i j)
+	(buffer-re-search-backward a (buffer mark) (offset mark))
+      (declare (ignorable j))
+    (when i
+      (setf (offset mark) i)))))
 
 (defun buffer-search-word-backward (buffer offset word &key (test #'eql))
   "return the largest offset of BUFFER <= (- OFFSET (length WORD))


Index: climacs/buffer-test.lisp
diff -u climacs/buffer-test.lisp:1.20 climacs/buffer-test.lisp:1.21
--- climacs/buffer-test.lisp:1.20	Tue Mar 15 19:41:18 2005
+++ climacs/buffer-test.lisp	Fri Aug  5 00:07:44 2005
@@ -4,7 +4,7 @@
 ;;; 
 
 (cl:defpackage :climacs-tests
-  (:use :cl :rtest :climacs-buffer :climacs-base))
+  (:use :cl :rtest :climacs-buffer :climacs-base :automaton))
 
 (cl:in-package :climacs-tests)
 


Index: climacs/climacs.asd
diff -u climacs/climacs.asd:1.35 climacs/climacs.asd:1.36
--- climacs/climacs.asd:1.35	Sun Jul 24 18:44:48 2005
+++ climacs/climacs.asd	Fri Aug  5 00:07:45 2005
@@ -30,13 +30,19 @@
 (defsystem :climacs
   :depends-on (:mcclim :flexichain)
   :components
-  ((:module "Persistent"
+  ((:module "cl-automaton"
+	    :components ((:file "automaton-package")
+			 (:file "eqv-hash" :depends-on ("automaton-package"))
+			 (:file "state-and-transition" :depends-on ("eqv-hash"))
+			 (:file "automaton" :depends-on ("state-and-transition" "eqv-hash"))
+			 (:file "regexp" :depends-on ("automaton"))))
+   (:module "Persistent"
             :components ((:file "binseq-package")
                          (:file "binseq" :depends-on ("binseq-package"))
                          (:file "obinseq" :depends-on ("binseq-package" "binseq"))
                          (:file "binseq2" :depends-on ("binseq-package" "obinseq" "binseq"))))
 
-   (:file "packages" :depends-on ("Persistent"))
+   (:file "packages" :depends-on ("cl-automaton" "Persistent"))
    (:file "buffer" :depends-on ("packages"))
    (:file "persistent-buffer"
           :pathname #p"Persistent/persistent-buffer.lisp"
@@ -74,7 +80,22 @@
   :components
   ((:file "rt" :pathname #p"testing/rt.lisp")
    (:file "buffer-test" :depends-on ("rt"))
-   (:file "base-test" :depends-on ("rt"))))
+   (:file "base-test" :depends-on ("rt"))
+   (:file "automaton-test-package"
+	  :pathname #P"cl-automaton/automaton-test-package.lisp"
+	  :depends-on ("rt"))
+   (:file "eqv-hash-test"
+	  :pathname #P"cl-automaton/eqv-hash-test.lisp"
+	  :depends-on ("rt" "automaton-test-package"))
+   (:file "state-and-transition-test"
+	  :pathname #P"cl-automaton/state-and-transition-test.lisp"
+	  :depends-on ("rt" "automaton-test-package"))
+   (:file "automaton-test"
+	  :pathname #P"cl-automaton/automaton-test.lisp"
+	  :depends-on ("rt" "automaton-test-package"))
+   (:file "regexp-test"
+	  :pathname #P"cl-automaton/regexp-test.lisp"
+	  :depends-on ("rt" "automaton-test-package"))))
 
 #+asdf
 (defmethod asdf:perform :around ((o asdf:compile-op)


Index: climacs/packages.lisp
diff -u climacs/packages.lisp:1.72 climacs/packages.lisp:1.73
--- climacs/packages.lisp:1.72	Thu Aug  4 03:10:45 2005
+++ climacs/packages.lisp	Fri Aug  5 00:07:45 2005
@@ -79,7 +79,9 @@
 	   #:name-mixin #:name
 	   #:buffer-looking-at #:looking-at
 	   #:buffer-search-forward #:buffer-search-backward
-	   #:search-forward #:search-backward))
+	   #:buffer-re-search-forward #:buffer-re-search-backward
+	   #:search-forward #:search-backward
+	   #:re-search-forward #:re-search-backward))
 
 (defpackage :climacs-abbrev
   (:use :clim-lisp :clim :climacs-buffer :climacs-base)




More information about the Climacs-cvs mailing list