[climacs-cvs] CVS update: climacs/gui.lisp climacs/html-syntax.lisp climacs/packages.lisp
Robert Strandh
rstrandh at common-lisp.net
Mon Feb 28 08:51:40 UTC 2005
Update of /project/climacs/cvsroot/climacs
In directory common-lisp.net:/tmp/cvs-serv23495
Modified Files:
gui.lisp html-syntax.lisp packages.lisp
Log Message:
Improvements to HTML syntax. This syntax module now uses an
incremental lexer, and and incremental parser based on the existing
Earley parser in syntax.lisp.
Removed backward-to-error and forward-to-error, since I am not sure
that these are what we want.
Date: Mon Feb 28 09:51:36 2005
Author: rstrandh
Index: climacs/gui.lisp
diff -u climacs/gui.lisp:1.125 climacs/gui.lisp:1.126
--- climacs/gui.lisp:1.125 Sun Feb 27 19:52:01 2005
+++ climacs/gui.lisp Mon Feb 28 09:51:33 2005
@@ -1282,18 +1282,6 @@
(syntax (syntax (buffer pane))))
(end-of-paragraph point syntax)))
-(define-named-command com-backward-to-error ()
- (let* ((pane (current-window))
- (point (point pane))
- (syntax (syntax (buffer pane))))
- (display-message "~a" (backward-to-error point syntax))))
-
-(define-named-command com-forward-to-error ()
- (let* ((pane (current-window))
- (point (point pane))
- (syntax (syntax (buffer pane))))
- (display-message "~a" (forward-to-error point syntax))))
-
(define-named-command com-eval-expression ((insertp 'boolean :prompt "Insert?"))
(let* ((*package* (find-package :climacs-gui))
(string (handler-case (accept 'string :prompt "Eval")
Index: climacs/html-syntax.lisp
diff -u climacs/html-syntax.lisp:1.3 climacs/html-syntax.lisp:1.4
--- climacs/html-syntax.lisp:1.3 Sat Feb 5 07:49:53 2005
+++ climacs/html-syntax.lisp Mon Feb 28 09:51:34 2005
@@ -34,183 +34,237 @@
(and (eq (class-of t1) (class-of t2))
(< (badness t1) (badness t2))))
-(defclass html (html-sym) ())
-(defclass head (html-sym) ())
-(defclass title (html-sym) ())
-(defclass body (html-sym) ())
-(defclass h1 (html-sym) ())
-(defclass h2 (html-sym) ())
-(defclass h3 (html-sym) ())
-(defclass para (html-sym) ())
-(defclass ul (html-sym) ())
-(defclass li (html-sym) ())
-(defclass texts (html-sym) ())
-
-(defclass error-token (html-sym) ())
-(defclass text (html-sym) ())
-
-(defclass <html> (html-sym) ())
-(defclass </html> (html-sym) ())
-(defclass <head> (html-sym) ())
-(defclass </head> (html-sym) ())
-(defclass <title> (html-sym) ())
-(defclass </title> (html-sym) ())
-(defclass <body> (html-sym) ())
-(defclass </body> (html-sym) ())
-(defclass <h1> (html-sym) ())
-(defclass </h1> (html-sym) ())
-(defclass <h2> (html-sym) ())
-(defclass </h2> (html-sym) ())
-(defclass <h3> (html-sym) ())
-(defclass </h3> (html-sym) ())
-(defclass <p> (html-sym) ())
-(defclass </p> (html-sym) ())
-(defclass <ul> (html-sym) ())
-(defclass </ul> (html-sym) ())
-(defclass <li> (html-sym) ())
-(defclass </li> (html-sym) ())
+(defclass words (html-sym) ())
+
+(defclass empty-words (words) ())
+
+(defclass nonempty-words (words)
+ ((words :initarg :words)
+ (word :initarg :word)))
+
+(defclass html-balanced (html-sym)
+ ((start :initarg :start)
+ (end :initarg :end)))
+
+(defclass html (html-balanced)
+ ((head :initarg :head)
+ (body :initarg :body)))
+
+(defclass head (html-balanced)
+ ((title :initarg :title)))
+
+(defclass html-words (html-balanced)
+ ((words :initarg :words)))
+
+(defclass title (html-words) ())
+(defclass body (html-words) ())
+(defclass h1 (html-words) ())
+(defclass h2 (html-words) ())
+(defclass h3 (html-words) ())
+(defclass para (html-words) ())
+
+(defclass html-token (html-sym)
+ ((start-mark :initarg :start-mark :reader start-mark)
+ (size :initarg :size)))
+
+(defgeneric end-offset (html-token))
+
+(defmethod end-offset ((token html-token))
+ (with-slots (start-mark size) token
+ (+ (offset start-mark) size)))
+
+(defgeneric start-offset (html-token))
+
+(defmethod start-offset ((token html-token))
+ (offset (start-mark token)))
+
+(defclass <html> (html-token) () (:default-initargs :size 6))
+(defclass </html> (html-token) ()(:default-initargs :size 7))
+(defclass <head> (html-token) () (:default-initargs :size 6))
+(defclass </head> (html-token) () (:default-initargs :size 7))
+(defclass <title> (html-token) () (:default-initargs :size 7))
+(defclass </title> (html-token) () (:default-initargs :size 8))
+(defclass <body> (html-token) () (:default-initargs :size 6))
+(defclass </body> (html-token) () (:default-initargs :size 7))
+(defclass <h1> (html-token) () (:default-initargs :size 4))
+(defclass </h1> (html-token) () (:default-initargs :size 5))
+(defclass <h2> (html-token) () (:default-initargs :size 4))
+(defclass </h2> (html-token) () (:default-initargs :size 5))
+(defclass <h3> (html-token) () (:default-initargs :size 4))
+(defclass </h3> (html-token) () (:default-initargs :size 5))
+(defclass <p> (html-token) () (:default-initargs :size 3))
+(defclass </p> (html-token) () (:default-initargs :size 4))
+(defclass <ul> (html-token) () (:default-initargs :size 4))
+(defclass </ul> (html-token) () (:default-initargs :size 5))
+(defclass <li> (html-token) () (:default-initargs :size 4))
+(defclass </li> (html-token) () (:default-initargs :size 5))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;
;;; lexer
-(defparameter *token-table*
- '(("<html>" . <html>)
- ("</html>" . </html>)
- ("<head>" . <head>)
- ("</head>" . </head>)
- ("<title>" . <title>)
- ("</title>" . </title>)
- ("<body>" . <body>)
- ("</body>" . </body>)
- ("<h1>" . <h1>)
- ("</h1>" . </h1>)
- ("<h2>" . <h2>)
- ("</h2>" . </h2>)
- ("<h3>" . <h3>)
- ("</h3>" . </h3>)
- ("<p>" . <p>)
- ("</p>" . </p>)
- ("<ul>" . <ul>)
- ("</ul>" . </ul>)
- ("<li>" . <li>)
- ("</li>" . </li>)))
-
-(defclass html-lexer (lexer)
- ((mark :initarg :mark)))
-
-(defmethod lex ((lexer html-lexer))
- (with-slots (mark) lexer
- (assert (not (end-of-buffer-p mark)))
- (cond ((or (end-of-line-p mark)
- (not (eql (object-after mark) #\<)))
- (when (end-of-line-p mark)
- (forward-object mark))
- (loop until (or (end-of-line-p mark)
- (eql (object-after mark) #\<))
- do (forward-object mark))
- (make-instance 'text))
- (t
- (let ((offset (offset mark)))
- (forward-object mark)
- (loop until (or (end-of-line-p mark)
- (whitespacep (object-after mark))
- (eql (object-before mark) #\>))
- do (forward-object mark))
- (let* ((word (region-to-sequence offset mark))
- (class-name (cdr (assoc word *token-table* :test #'equalp))))
- (make-instance (or class-name 'error-token))))))))
+(defclass html-element (html-token)
+ ((state :initarg :state)))
+
+(defclass start-element (html-element) ())
+(defclass tag-start (html-element) ())
+(defclass tag-end (html-element) ())
+(defclass slash (html-element) ())
+(defclass word (html-element) ())
+(defclass delimiter (html-element) ())
+
+(defun next-token (scan)
+ (let ((start-mark (clone-mark scan)))
+ (flet ((fo () (forward-object scan)))
+ (macrolet ((make-entry (type)
+ `(return-from next-token
+ (make-instance ,type :start-mark start-mark
+ :size (- (offset scan) (offset start-mark))))))
+ (loop with object = (object-after scan)
+ until (end-of-buffer-p scan)
+ do (case object
+ (#\< (fo) (make-entry 'tag-start))
+ (#\> (fo) (make-entry 'tag-end))
+ (#\/ (fo) (make-entry 'slash))
+ (t (cond ((alphanumericp object)
+ (loop until (end-of-buffer-p scan)
+ while (alphanumericp (object-after scan))
+ do (fo))
+ (make-entry 'word))
+ (t
+ (fo) (make-entry 'delimiter))))))))))
+
+(define-syntax html-syntax ("HTML" (basic-syntax))
+ ((tokens :initform (make-instance 'standard-flexichain))
+ (guess-pos :initform 1)
+ (valid-parse :initform 1)
+ (parser)))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;
;;; parser
+(defun word-is (word string)
+ (string-equal (coerce (region-to-sequence (start-mark word) (end-offset word)) 'string)
+ string))
+
(defparameter *html-grammar*
(grammar
- (html -> (<html> head body </html>))
- (head -> (<head> title </head>))
- (title -> (<title> texts </title>))
- (body -> (<body> texts </body>))
- (texts -> ())
- (texts -> (texts text))))
-
-(define-syntax html-syntax ("HTML" (basic-syntax))
- ((parser)
- (states)))
+ (<html> -> (tag-start
+ (word (and (= (end-offset tag-start) (start-offset word))
+ (word-is word "html")))
+ (tag-end (= (end-offset word) (start-offset tag-end))))
+ :start-mark (start-mark tag-start))
+ (</html> -> (tag-start
+ (slash (= (end-offset tag-start) (start-offset slash)))
+ (word (and (= (end-offset slash) (start-offset word))
+ (word-is word "html")))
+ (tag-end (= (end-offset word) (start-offset tag-end))))
+ :start-mark (start-mark tag-start))
+ (<head> -> (tag-start
+ (word (and (= (end-offset tag-start) (start-offset word))
+ (word-is word "head")))
+ (tag-end (= (end-offset word) (start-offset tag-end))))
+ :start-mark (start-mark tag-start))
+ (</head> -> (tag-start
+ (slash (= (end-offset tag-start) (start-offset slash)))
+ (word (and (= (end-offset slash) (start-offset word))
+ (word-is word "head")))
+ (tag-end (= (end-offset word) (start-offset tag-end))))
+ :start-mark (start-mark tag-start))
+ (<title> -> (tag-start
+ (word (and (= (end-offset tag-start) (start-offset word))
+ (word-is word "title")))
+ (tag-end (= (end-offset word) (start-offset tag-end))))
+ :start-mark (start-mark tag-start))
+ (</title> -> (tag-start
+ (slash (= (end-offset tag-start) (start-offset slash)))
+ (word (and (= (end-offset slash) (start-offset word))
+ (word-is word "title")))
+ (tag-end (= (end-offset word) (start-offset tag-end))))
+ :start-mark (start-mark tag-start))
+ (<body> -> (tag-start
+ (word (and (= (end-offset tag-start) (start-offset word))
+ (word-is word "body")))
+ (tag-end (= (end-offset word) (start-offset tag-end))))
+ :start-mark (start-mark tag-start))
+ (</body> -> (tag-start
+ (slash (= (end-offset tag-start) (start-offset slash)))
+ (word (and (= (end-offset slash) (start-offset word))
+ (word-is word "body")))
+ (tag-end (= (end-offset word) (start-offset tag-end))))
+ :start-mark (start-mark tag-start))
+ (html -> (<html> head body </html>)
+ :start <html> :head head :body body :end </html>)
+ (head -> (<head> title </head>)
+ :start <head> :title title :end </head>)
+ (title -> (<title> words </title>)
+ :start <title> :words words :end </title>)
+ (body -> (<body> words </body>)
+ :start <body> :words words :end </body>)
+ (words -> ()
+ (make-instance 'empty-words))
+ (words -> (words word)
+ (make-instance 'nonempty-words :words words :word word))))
(defmethod initialize-instance :after ((syntax html-syntax) &rest args)
(declare (ignore args))
- (with-slots (parser states buffer) syntax
+ (with-slots (parser tokens buffer) syntax
(setf parser (make-instance 'parser
:grammar *html-grammar*
- :lexer (make-instance 'html-lexer
- :mark (make-instance 'standard-left-sticky-mark :buffer buffer))
:target 'html))
- (setf states (list (cons (make-instance 'standard-left-sticky-mark :buffer buffer)
- (initial-state parser))))))
+ (insert* tokens 0 (make-instance 'start-element
+ :start-mark (make-instance 'standard-left-sticky-mark
+ :buffer buffer
+ :offset 0)
+ :size 0
+ :state (initial-state parser)))))
+
+(defmethod update-syntax-for-display (buffer (syntax html-syntax) top bot)
+ (with-slots (parser tokens valid-parse) syntax
+ (loop until (= valid-parse (nb-elements tokens))
+ while (mark< (end-offset (element* tokens valid-parse)) bot)
+ do (let ((current-token (element* tokens (1- valid-parse)))
+ (next-token (element* tokens valid-parse)))
+ (setf (slot-value next-token 'state)
+ (advance-parse parser (list next-token) (slot-value current-token 'state))))
+ (incf valid-parse))))
(defmethod update-syntax (buffer (syntax html-syntax))
- (let ((low-mark (low-mark buffer)))
- (with-slots (parser states) syntax
- (with-slots (lexer) parser
- (with-slots (mark) lexer
- (loop until (or (null (cdr states))
- (< (offset (caar states)) (offset low-mark)))
- do (pop states))
- (setf (offset mark) (offset (caar states)))
- (loop until (end-of-buffer-p mark)
- do (let ((token (lex lexer)))
- (push (cons (clone-mark mark)
- (advance-parse parser (list token) (cdar states)))
- states)))))
- (print (find 'html (gethash (initial-state parser) (parse-trees (cdar states)))
- :key #'type-of)
- *query-io*)
- (finish-output *query-io*))))
-
-(defgeneric forward-to-error (point syntax))
-(defgeneric backward-to-error (point syntax))
-
-(defun find-bad-parse-tree (state)
- (maphash (lambda (key parse-trees)
- (declare (ignore key))
- (let ((parse-tree (find-if (lambda (parse-tree)
- (plusp (badness parse-tree)))
- parse-trees)))
- (when parse-tree
- (return-from find-bad-parse-tree parse-tree))))
- (parse-trees state)))
-
-(defgeneric empty-state-p (state))
-
-(defmethod empty-state-p (state)
- (maphash (lambda (key val)
- (declare (ignore key))
- (loop for parse-tree in val
- do (return-from empty-state-p nil)))
- (parse-trees state))
- (maphash (lambda (key val)
- (declare (ignore key))
- (loop for parse-tree in val
- do (return-from empty-state-p nil)))
- (incomplete-items state)))
-
-(defmethod backward-to-error (point (syntax html-syntax))
- (let ((states (slot-value syntax 'states)))
- ;; find the last state before point
- (loop until (or (null states)
- (mark< (caar states) point))
- do (pop states))
- (when (null states)
- (return-from backward-to-error "no more errors"))
- (when (empty-state-p (cdar states))
- (loop for ((m1 . s1) (m2 . s2)) on states
- until (not (empty-state-p s2))
- finally (setf (offset point) (offset m1)))
- (return-from backward-to-error "no valid parse from this point"))
- (loop for (mark . state) in states
- for tree = (find-bad-parse-tree state)
- when tree
- do (setf (offset point) (offset mark))
- (return (message tree))
- finally (return "no more errors"))))
+ (let ((low-mark (low-mark buffer))
+ (high-mark (high-mark buffer))
+ (scan))
+ (with-slots (tokens guess-pos valid-parse) syntax
+ (when (mark<= low-mark high-mark)
+ ;; go back to a position before low-mark
+ (loop until (or (= guess-pos 1)
+ (mark< (end-offset (element* tokens (1- guess-pos))) low-mark))
+ do (decf guess-pos))
+ ;; go forward to the last position before low-mark
+ (loop with nb-elements = (nb-elements tokens)
+ until (or (= guess-pos nb-elements)
+ (mark>= (end-offset (element* tokens guess-pos)) low-mark))
+ do (incf guess-pos))
+ ;; mark valid parse
+ (setf valid-parse guess-pos)
+ ;; delete entries that must be reparsed
+ (loop until (or (= guess-pos (nb-elements tokens))
+ (mark> (start-mark (element* tokens guess-pos)) high-mark))
+ do (delete* tokens guess-pos))
+ (setf scan (make-instance 'standard-left-sticky-mark
+ :buffer buffer
+ :offset (if (zerop guess-pos)
+ 0
+ (end-offset (element* tokens (1- guess-pos))))))
+ ;; scan
+ (loop with start-mark = nil
+ do (loop until (end-of-buffer-p scan)
+ while (whitespacep (object-after scan))
+ do (forward-object scan))
+ until (if (end-of-buffer-p high-mark)
+ (end-of-buffer-p scan)
+ (mark> scan high-mark))
+ do (setf start-mark (clone-mark scan))
+ (insert* tokens guess-pos (next-token scan))
+ (incf guess-pos))))))
+
Index: climacs/packages.lisp
diff -u climacs/packages.lisp:1.51 climacs/packages.lisp:1.52
--- climacs/packages.lisp:1.51 Sun Feb 27 19:52:01 2005
+++ climacs/packages.lisp Mon Feb 28 09:51:35 2005
@@ -91,8 +91,7 @@
#:basic-syntax
#:update-syntax #:update-syntax-for-display
#:syntax-line-indentation
- #:beginning-of-paragraph #:end-of-paragraph
- #:forward-to-error #:backward-to-error))
+ #:beginning-of-paragraph #:end-of-paragraph))
(defpackage :climacs-cl-syntax
(:use :clim-lisp :clim :climacs-buffer :climacs-base :flexichain :climacs-syntax)
More information about the Climacs-cvs
mailing list