[climacs-cvs] CVS update: climacs/gui.lisp climacs/html-syntax.lisp climacs/packages.lisp

Robert Strandh rstrandh at common-lisp.net
Mon Feb 28 08:51:40 UTC 2005


Update of /project/climacs/cvsroot/climacs
In directory common-lisp.net:/tmp/cvs-serv23495

Modified Files:
	gui.lisp html-syntax.lisp packages.lisp 
Log Message:
Improvements to HTML syntax.  This syntax module now uses an
incremental lexer, and and incremental parser based on the existing
Earley parser in syntax.lisp.

Removed backward-to-error and forward-to-error, since I am not sure
that these are what we want. 


Date: Mon Feb 28 09:51:36 2005
Author: rstrandh

Index: climacs/gui.lisp
diff -u climacs/gui.lisp:1.125 climacs/gui.lisp:1.126
--- climacs/gui.lisp:1.125	Sun Feb 27 19:52:01 2005
+++ climacs/gui.lisp	Mon Feb 28 09:51:33 2005
@@ -1282,18 +1282,6 @@
 	 (syntax (syntax (buffer pane))))
     (end-of-paragraph point syntax)))
 
-(define-named-command com-backward-to-error ()
-  (let* ((pane (current-window))
-	 (point (point pane))
-	 (syntax (syntax (buffer pane))))
-    (display-message "~a" (backward-to-error point syntax))))
-
-(define-named-command com-forward-to-error ()
-  (let* ((pane (current-window))
-	 (point (point pane))
-	 (syntax (syntax (buffer pane))))
-    (display-message "~a" (forward-to-error point syntax))))
-
 (define-named-command com-eval-expression ((insertp 'boolean :prompt "Insert?"))
   (let* ((*package* (find-package :climacs-gui))
 	 (string (handler-case (accept 'string :prompt "Eval")


Index: climacs/html-syntax.lisp
diff -u climacs/html-syntax.lisp:1.3 climacs/html-syntax.lisp:1.4
--- climacs/html-syntax.lisp:1.3	Sat Feb  5 07:49:53 2005
+++ climacs/html-syntax.lisp	Mon Feb 28 09:51:34 2005
@@ -34,183 +34,237 @@
   (and (eq (class-of t1) (class-of t2))
        (< (badness t1) (badness t2))))
 
-(defclass html (html-sym) ())
-(defclass head (html-sym) ())
-(defclass title (html-sym) ())
-(defclass body (html-sym) ())
-(defclass h1 (html-sym) ())
-(defclass h2 (html-sym) ())
-(defclass h3 (html-sym) ())
-(defclass para (html-sym) ())
-(defclass ul (html-sym) ())
-(defclass li (html-sym) ())
-(defclass texts (html-sym) ())
-
-(defclass error-token (html-sym) ())
-(defclass text (html-sym) ())
-
-(defclass <html> (html-sym) ())
-(defclass </html> (html-sym) ())
-(defclass <head> (html-sym) ())
-(defclass </head> (html-sym) ())
-(defclass <title> (html-sym) ())
-(defclass </title> (html-sym) ())
-(defclass <body> (html-sym) ())
-(defclass </body> (html-sym) ())
-(defclass <h1> (html-sym) ())
-(defclass </h1> (html-sym) ())
-(defclass <h2> (html-sym) ())
-(defclass </h2> (html-sym) ())
-(defclass <h3> (html-sym) ())
-(defclass </h3> (html-sym) ())
-(defclass <p> (html-sym) ())
-(defclass </p> (html-sym) ())
-(defclass <ul> (html-sym) ())
-(defclass </ul> (html-sym) ())
-(defclass <li> (html-sym) ())
-(defclass </li> (html-sym) ())
+(defclass words (html-sym) ())
+
+(defclass empty-words (words) ())
+
+(defclass nonempty-words (words)
+  ((words :initarg :words)
+   (word :initarg :word)))
+
+(defclass html-balanced (html-sym)
+  ((start :initarg :start)
+   (end :initarg :end)))
+
+(defclass html (html-balanced)
+  ((head :initarg :head)
+   (body :initarg :body)))
+
+(defclass head (html-balanced)
+  ((title :initarg :title)))
+
+(defclass html-words (html-balanced)
+  ((words :initarg :words)))
+
+(defclass title (html-words) ())
+(defclass body (html-words) ())
+(defclass h1 (html-words) ())
+(defclass h2 (html-words) ())
+(defclass h3 (html-words) ())
+(defclass para (html-words) ())
+
+(defclass html-token (html-sym)
+  ((start-mark :initarg :start-mark :reader start-mark)
+   (size :initarg :size)))
+
+(defgeneric end-offset (html-token))
+
+(defmethod end-offset ((token html-token))
+  (with-slots (start-mark size) token
+     (+ (offset start-mark) size)))
+
+(defgeneric start-offset (html-token))
+
+(defmethod start-offset ((token html-token))
+  (offset (start-mark token)))
+
+(defclass <html> (html-token) () (:default-initargs :size 6))
+(defclass </html> (html-token) ()(:default-initargs :size 7))
+(defclass <head> (html-token) () (:default-initargs :size 6))
+(defclass </head> (html-token) () (:default-initargs :size 7))
+(defclass <title> (html-token) () (:default-initargs :size 7))
+(defclass </title> (html-token) () (:default-initargs :size 8))
+(defclass <body> (html-token) () (:default-initargs :size 6))
+(defclass </body> (html-token) () (:default-initargs :size 7))
+(defclass <h1> (html-token) () (:default-initargs :size 4))
+(defclass </h1> (html-token) () (:default-initargs :size 5))
+(defclass <h2> (html-token) () (:default-initargs :size 4))
+(defclass </h2> (html-token) () (:default-initargs :size 5))
+(defclass <h3> (html-token) () (:default-initargs :size 4))
+(defclass </h3> (html-token) () (:default-initargs :size 5))
+(defclass <p> (html-token) () (:default-initargs :size 3))
+(defclass </p> (html-token) () (:default-initargs :size 4))
+(defclass <ul> (html-token) () (:default-initargs :size 4))
+(defclass </ul> (html-token) () (:default-initargs :size 5))
+(defclass <li> (html-token) () (:default-initargs :size 4))
+(defclass </li> (html-token) () (:default-initargs :size 5))
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;;;
 ;;; lexer
 
-(defparameter *token-table*
-	      '(("<html>" . <html>)
-		("</html>" . </html>)
-		("<head>" . <head>)
-		("</head>" . </head>)
-		("<title>" . <title>)
-		("</title>" . </title>)
-		("<body>" . <body>)
-		("</body>" . </body>)
-		("<h1>" . <h1>)
-		("</h1>" . </h1>)
-		("<h2>" . <h2>)
-		("</h2>" . </h2>)
-		("<h3>" . <h3>)
-		("</h3>" . </h3>)
-		("<p>" . <p>)
-		("</p>" . </p>)
-		("<ul>" . <ul>)
-		("</ul>" . </ul>)
-		("<li>" . <li>)
-		("</li>" . </li>)))
-
-(defclass html-lexer (lexer)
-  ((mark :initarg :mark)))
-
-(defmethod lex ((lexer html-lexer))
-  (with-slots (mark) lexer
-     (assert (not (end-of-buffer-p mark)))
-     (cond ((or (end-of-line-p mark)
-		(not (eql (object-after mark) #\<)))
-	    (when (end-of-line-p mark)
-	      (forward-object mark))
-	    (loop until (or (end-of-line-p mark)
-			    (eql (object-after mark) #\<))
-		  do (forward-object mark))
-	    (make-instance 'text))
-	   (t
-	    (let ((offset (offset mark)))
-	      (forward-object mark)
-	      (loop until (or (end-of-line-p mark)
-			      (whitespacep (object-after mark))
-			      (eql (object-before mark) #\>))
-		    do (forward-object mark))
-	      (let* ((word (region-to-sequence offset mark))
-		     (class-name (cdr (assoc word *token-table* :test #'equalp))))
-		(make-instance (or class-name 'error-token))))))))
+(defclass html-element (html-token)
+  ((state :initarg :state)))
+
+(defclass start-element (html-element) ())
+(defclass tag-start (html-element) ())
+(defclass tag-end (html-element) ())
+(defclass slash (html-element) ())
+(defclass word (html-element) ())
+(defclass delimiter (html-element) ())
+
+(defun next-token (scan)
+  (let ((start-mark (clone-mark scan)))
+    (flet ((fo () (forward-object scan)))
+      (macrolet ((make-entry (type)
+		   `(return-from next-token
+		      (make-instance ,type :start-mark start-mark
+				     :size (- (offset scan) (offset start-mark))))))
+	(loop with object = (object-after scan)
+	      until (end-of-buffer-p scan)
+	      do (case object
+		   (#\< (fo) (make-entry 'tag-start))
+		   (#\> (fo) (make-entry 'tag-end))
+		   (#\/ (fo) (make-entry 'slash))
+		   (t (cond ((alphanumericp object)
+			     (loop until (end-of-buffer-p scan)
+				   while (alphanumericp (object-after scan))
+				   do (fo))
+			     (make-entry 'word))
+			    (t
+			     (fo) (make-entry 'delimiter))))))))))
+
+(define-syntax html-syntax ("HTML" (basic-syntax))
+  ((tokens :initform (make-instance 'standard-flexichain))
+   (guess-pos :initform 1)
+   (valid-parse :initform 1)
+   (parser)))
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;;;
 ;;; parser
 
+(defun word-is (word string)
+  (string-equal (coerce (region-to-sequence (start-mark word) (end-offset word)) 'string)
+		string))
+
 (defparameter *html-grammar*
   (grammar
-   (html -> (<html> head body </html>))
-   (head -> (<head> title </head>))
-   (title -> (<title> texts </title>))
-   (body -> (<body> texts </body>))
-   (texts -> ())
-   (texts -> (texts text))))
-
-(define-syntax html-syntax ("HTML" (basic-syntax))
-  ((parser)
-   (states)))
+    (<html> -> (tag-start
+		(word (and (= (end-offset tag-start) (start-offset word))
+			   (word-is word "html")))
+		(tag-end (= (end-offset word) (start-offset tag-end))))
+	    :start-mark (start-mark tag-start))
+    (</html> -> (tag-start
+		 (slash (= (end-offset tag-start) (start-offset slash)))
+		 (word (and (= (end-offset slash) (start-offset word))
+			    (word-is word "html")))
+		 (tag-end (= (end-offset word) (start-offset tag-end))))
+	     :start-mark (start-mark tag-start))
+    (<head> -> (tag-start
+		(word (and (= (end-offset tag-start) (start-offset word))
+			   (word-is word "head")))
+		(tag-end (= (end-offset word) (start-offset tag-end))))
+	    :start-mark (start-mark tag-start))
+    (</head> -> (tag-start
+		 (slash (= (end-offset tag-start) (start-offset slash)))
+		 (word (and (= (end-offset slash) (start-offset word))
+			    (word-is word "head")))
+		 (tag-end (= (end-offset word) (start-offset tag-end))))
+	     :start-mark (start-mark tag-start))
+    (<title> -> (tag-start
+		 (word (and (= (end-offset tag-start) (start-offset word))
+			    (word-is word "title")))
+		 (tag-end (= (end-offset word) (start-offset tag-end))))
+	     :start-mark (start-mark tag-start))
+    (</title> -> (tag-start
+		  (slash (= (end-offset tag-start) (start-offset slash)))
+		  (word (and (= (end-offset slash) (start-offset word))
+			     (word-is word "title")))
+		  (tag-end (= (end-offset word) (start-offset tag-end))))
+	      :start-mark (start-mark tag-start))
+    (<body> -> (tag-start
+		(word (and (= (end-offset tag-start) (start-offset word))
+			   (word-is word "body")))
+		(tag-end (= (end-offset word) (start-offset tag-end))))
+	    :start-mark (start-mark tag-start))
+    (</body> -> (tag-start
+		 (slash (= (end-offset tag-start) (start-offset slash)))
+		 (word (and (= (end-offset slash) (start-offset word))
+			    (word-is word "body")))
+		 (tag-end (= (end-offset word) (start-offset tag-end))))
+	     :start-mark (start-mark tag-start))
+    (html -> (<html> head body </html>)
+	  :start <html> :head head :body body :end </html>)
+    (head -> (<head> title </head>)
+	  :start <head> :title title :end </head>)
+    (title -> (<title> words </title>)
+	   :start <title> :words words :end </title>)
+    (body -> (<body> words </body>)
+	  :start <body> :words words :end </body>)
+    (words -> ()
+	   (make-instance 'empty-words))
+    (words -> (words word)
+	   (make-instance 'nonempty-words :words words :word word))))
 
 (defmethod initialize-instance :after ((syntax html-syntax) &rest args)
   (declare (ignore args))
-  (with-slots (parser states buffer) syntax
+  (with-slots (parser tokens buffer) syntax
      (setf parser (make-instance 'parser
 		     :grammar *html-grammar*
-		     :lexer (make-instance 'html-lexer
-			       :mark (make-instance 'standard-left-sticky-mark :buffer buffer))
 		     :target 'html))
-     (setf states (list (cons (make-instance 'standard-left-sticky-mark :buffer buffer)
-			      (initial-state parser))))))
+     (insert* tokens 0 (make-instance 'start-element
+			  :start-mark (make-instance 'standard-left-sticky-mark
+					 :buffer buffer
+					 :offset 0)
+			  :size 0
+			  :state (initial-state parser)))))
+
+(defmethod update-syntax-for-display (buffer (syntax html-syntax) top bot)
+  (with-slots (parser tokens valid-parse) syntax
+     (loop until (= valid-parse (nb-elements tokens))
+	   while (mark< (end-offset (element* tokens valid-parse)) bot)
+	   do (let ((current-token (element* tokens (1- valid-parse)))
+		    (next-token (element* tokens valid-parse)))
+		(setf (slot-value next-token 'state)
+		      (advance-parse parser (list next-token) (slot-value current-token 'state))))
+	      (incf valid-parse))))
 
 (defmethod update-syntax (buffer (syntax html-syntax))
-  (let ((low-mark (low-mark buffer)))
-    (with-slots (parser states) syntax
-       (with-slots (lexer) parser
-	  (with-slots (mark) lexer
-	     (loop until (or (null (cdr states))
-			     (< (offset (caar states)) (offset low-mark)))
-		   do (pop states))
-	     (setf (offset mark) (offset (caar states)))
-	     (loop until (end-of-buffer-p mark)
-		   do (let ((token (lex lexer)))
-			(push (cons (clone-mark mark)
-				    (advance-parse parser (list token) (cdar states)))
-			      states)))))
-       (print (find 'html (gethash (initial-state parser) (parse-trees (cdar states)))
-		    :key #'type-of)
-	      *query-io*)
-       (finish-output *query-io*))))
-
-(defgeneric forward-to-error (point syntax))
-(defgeneric backward-to-error (point syntax))
-
-(defun find-bad-parse-tree (state)
-  (maphash (lambda (key parse-trees)
-	     (declare (ignore key))
-	     (let ((parse-tree (find-if (lambda (parse-tree)
-					  (plusp (badness parse-tree)))
-					parse-trees)))
-	       (when parse-tree
-		 (return-from find-bad-parse-tree parse-tree))))
-	   (parse-trees state)))
-
-(defgeneric empty-state-p (state))
-
-(defmethod empty-state-p (state)
-  (maphash (lambda (key val)
-	     (declare (ignore key))
-	     (loop for parse-tree in val
-		   do (return-from empty-state-p nil)))
-	   (parse-trees state))
-  (maphash (lambda (key val)
-	     (declare (ignore key))
-	     (loop for parse-tree in val
-		   do (return-from empty-state-p nil)))
-	   (incomplete-items state)))
-
-(defmethod backward-to-error (point (syntax html-syntax))
-  (let ((states (slot-value syntax 'states)))
-    ;; find the last state before point
-    (loop until (or (null states)
-		    (mark< (caar states) point))
-	  do (pop states))
-    (when (null states)
-      (return-from backward-to-error "no more errors"))
-    (when (empty-state-p (cdar states))
-      (loop for ((m1 . s1) (m2 . s2)) on states
-	    until (not (empty-state-p s2))
-	    finally (setf (offset point) (offset m1)))
-      (return-from backward-to-error "no valid parse from this point"))
-    (loop for (mark . state) in states
-	  for tree = (find-bad-parse-tree state)
-	  when tree
-	    do (setf (offset point) (offset mark))
-	       (return (message tree))
-	  finally (return "no more errors"))))
+  (let ((low-mark (low-mark buffer))
+	(high-mark (high-mark buffer))
+	(scan))
+    (with-slots (tokens guess-pos valid-parse) syntax
+       (when (mark<= low-mark high-mark)
+	 ;; go back to a position before low-mark
+	 (loop until (or (= guess-pos 1)
+			 (mark< (end-offset (element* tokens (1- guess-pos))) low-mark))
+	       do (decf guess-pos))
+	 ;; go forward to the last position before low-mark
+	 (loop with nb-elements = (nb-elements tokens)
+	       until (or (= guess-pos nb-elements)
+			 (mark>= (end-offset (element* tokens guess-pos)) low-mark))
+	       do (incf guess-pos))
+	 ;; mark valid parse
+	 (setf valid-parse guess-pos)
+	 ;; delete entries that must be reparsed
+	 (loop until (or (= guess-pos (nb-elements tokens))
+			 (mark> (start-mark (element* tokens guess-pos)) high-mark))
+	       do (delete* tokens guess-pos))
+	 (setf scan (make-instance 'standard-left-sticky-mark
+		       :buffer buffer
+		       :offset (if (zerop guess-pos)
+				   0
+				   (end-offset (element* tokens (1- guess-pos))))))
+	 ;; scan
+	 (loop with start-mark = nil
+	       do (loop until (end-of-buffer-p scan)
+			while (whitespacep (object-after scan))
+			do (forward-object scan))
+	       until (if (end-of-buffer-p high-mark)
+			 (end-of-buffer-p scan)
+			 (mark> scan high-mark))
+	       do (setf start-mark (clone-mark scan))
+		  (insert* tokens guess-pos (next-token scan))
+		  (incf guess-pos))))))
+


Index: climacs/packages.lisp
diff -u climacs/packages.lisp:1.51 climacs/packages.lisp:1.52
--- climacs/packages.lisp:1.51	Sun Feb 27 19:52:01 2005
+++ climacs/packages.lisp	Mon Feb 28 09:51:35 2005
@@ -91,8 +91,7 @@
 	   #:basic-syntax
 	   #:update-syntax #:update-syntax-for-display
            #:syntax-line-indentation
-	   #:beginning-of-paragraph #:end-of-paragraph
-	   #:forward-to-error #:backward-to-error))
+	   #:beginning-of-paragraph #:end-of-paragraph))
 
 (defpackage :climacs-cl-syntax
   (:use :clim-lisp :clim :climacs-buffer :climacs-base :flexichain :climacs-syntax)




More information about the Climacs-cvs mailing list