[climacs-cvs] CVS update: climacs/prolog-syntax.lisp
Christophe Rhodes
crhodes at common-lisp.net
Sat Apr 2 22:13:43 UTC 2005
Update of /project/climacs/cvsroot/climacs
In directory common-lisp.net:/tmp/cvs-serv8882
Modified Files:
prolog-syntax.lisp
Log Message:
Prolog syntax improvements:
* PRINT-OBJECT for PROLOG-LEXEMEs, for less pain while debugging;
* OPEN-CT production rule from OPEN-CT-LEXEME, because we can;
* rework the lexer a bit. Now UPDATE-SYNTAX just invalidates the lex
as it invalidates the parse, and UPDATE-SYNTAX-FOR-DISPLAY relexes as
far as it needs to;
* we need operator-compound-lterm and subclasses, because we cannot
create multiple nonterminals from one rule: returning
(make-instance 'lterm :term (make-instance 'foo ...))
from a production rule leaves some slots in the FOO unfilled;
* note my own bafflement as to why an apparently infinitely-recursive
production doesn't recurse infinitely. It can be fixed when needed,
but why isn't it triggering?
This version still gets various aspects of multiline lexemes wrong, but
it's a lot better than before.
Date: Sun Apr 3 00:13:27 2005
Author: crhodes
Index: climacs/prolog-syntax.lisp
diff -u climacs/prolog-syntax.lisp:1.5 climacs/prolog-syntax.lisp:1.6
--- climacs/prolog-syntax.lisp:1.5 Thu Mar 31 12:16:23 2005
+++ climacs/prolog-syntax.lisp Sun Apr 3 00:13:26 2005
@@ -65,6 +65,9 @@
(defclass prolog-lexeme (prolog-token)
((state :initarg :state)))
+(defmethod print-object ((o prolog-lexeme) s)
+ (print-unreadable-object (o s :type t)
+ (format s (lexeme-string o))))
(defclass start-lexeme (prolog-lexeme) ())
@@ -127,13 +130,25 @@
(def (error)))
+;;; open-ct is a special case: by 6.5.1 it cannot be preceded by
+;;; layout text. We could elide this and its grammar rules, but this
+;;; way we get a clearer relationship between the standard and its
+;;; expression here.
+(defclass open-ct (prolog-nonterminal)
+ ((syntactic-lexeme :initarg :syntactic-lexeme :accessor syntactic-lexeme)))
+(defmethod display-parse-tree ((entity open-ct) (syntax prolog-syntax) pane)
+ (display-parse-tree (syntactic-lexeme entity) syntax pane))
+(define-prolog-rule (open-ct -> (open-ct-lexeme))
+ (make-instance 'open-ct :syntactic-lexeme open-ct-lexeme))
+
;;; 6.4.1
(define-prolog-rule (layout-text -> (comment-lexeme layout-text))
(make-instance 'layout-text :comment comment-lexeme :cont layout-text))
(define-prolog-rule (layout-text -> ())
(make-instance 'layout-text :cont nil))
-(defclass prolog-lexer (incremental-lexer) ())
+(defclass prolog-lexer (incremental-lexer)
+ ((valid-lex :initarg :valid-lex :accessor valid-lex :initform 1)))
(defmethod next-lexeme ((lexer prolog-lexer) scan)
(let ((string (make-array 0 :element-type 'character
@@ -180,10 +195,10 @@
(t (fo) (return (make-instance 'error-lexeme))))
IDENTIFIER
(loop until (end-of-buffer-p scan)
- while (let ((object (object-after scan)))
- (or (alphanumericp object)
- (eql object #\_)))
- do (fo))
+ while (let ((object (object-after scan)))
+ (or (alphanumericp object)
+ (eql object #\_)))
+ do (fo))
(return (make-instance 'identifier-lexeme))
LINE-COMMENT
(loop until (end-of-buffer-p scan)
@@ -429,7 +444,6 @@
(defclass atom (prolog-nonterminal)
((value :initarg :value :accessor value)))
(defmethod syntactic-lexeme ((thing atom))
- ;; FIXME: wrong for empty-list atom and curly-brackets atom
(syntactic-lexeme (value thing)))
(defclass empty-list (prolog-nonterminal)
(([ :initarg :[ :accessor [)
@@ -484,6 +498,42 @@
(defmethod display-parse-tree ((entity lterm) (syntax prolog-syntax) pane)
(display-parse-tree (term entity) syntax pane))
+;;; FIXME: the need for these is because it is a protocol violation to
+;;; create nested nonterminals from one rule.
+(defclass operator-compound-lterm (lterm)
+ ((operator :initarg :operator :accessor operator)))
+(defmethod compound-term-p ((l operator-compound-lterm))
+ t)
+(defmethod functor ((l operator-compound-lterm))
+ (operator l))
+(defclass binary-operator-compound-lterm (operator-compound-lterm)
+ ((left :initarg :left :accessor left)
+ (right :initarg :right :accessor right)))
+(defmethod arity ((l binary-operator-compound-lterm))
+ 2)
+(defclass prefix-operator-compound-lterm (operator-compound-lterm)
+ ((right :initarg :right :accessor right)))
+(defmethod arity ((l prefix-operator-compound-lterm))
+ 1)
+(defclass postfix-operator-compound-lterm (operator-compound-lterm)
+ ((left :initarg :left :accessor left)))
+(defmethod arity ((l postfix-operator-compound-lterm))
+ 1)
+
+(defmethod display-parse-tree
+ ((entity binary-operator-compound-lterm) (syntax prolog-syntax) pane)
+ (display-parse-tree (left entity) syntax pane)
+ (display-parse-tree (operator entity) syntax pane)
+ (display-parse-tree (right entity) syntax pane))
+(defmethod display-parse-tree
+ ((entity prefix-operator-compound-lterm) (syntax prolog-syntax) pane)
+ (display-parse-tree (operator entity) syntax pane)
+ (display-parse-tree (right entity) syntax pane))
+(defmethod display-parse-tree
+ ((entity postfix-operator-compound-lterm) (syntax prolog-syntax) pane)
+ (display-parse-tree (left entity) syntax pane)
+ (display-parse-tree (operator entity) syntax pane))
+
(defclass op (prolog-nonterminal)
((name :initarg :name :accessor name)
(priority :initarg :priority :accessor priority)
@@ -579,9 +629,9 @@
(make-instance 'variable-term :priority 0 :name variable))
;;; 6.3.3
-(define-prolog-rule (term -> (atom open-ct-lexeme arg-list close))
+(define-prolog-rule (term -> (atom open-ct arg-list close))
(make-instance 'functional-compound-term :priority 0 :functor atom
- :arg-list arg-list :open-ct open-ct-lexeme :close close))
+ :arg-list arg-list :open-ct open-ct :close close))
(define-prolog-rule (arg-list -> (exp))
(make-instance 'arg-list :exp exp))
(define-prolog-rule (arg-list -> (exp comma arg-list))
@@ -613,17 +663,21 @@
;;; term would be, by explicitly writing the second production rule
;;; out here, and by using inegality tests rather than equalities for
;;; priorities elsewhere. LTERMs act as containers for terms.
+;;;
+;;; FIXME: why on earth doesn't this cause infinite recursion? If
+;;; LTERM is a subtype of TERM, as it is, this rule should surely be
+;;; always applicable.
(define-prolog-rule (lterm -> (term))
(make-instance 'lterm :term term :priority (1+ (priority term))))
(define-prolog-rule (term -> (open (term (<= (priority term) 1201)) close))
(make-instance 'bracketed-term :priority 0
:open open :term term :close close))
-(define-prolog-rule (term -> (open-ct-lexeme
+(define-prolog-rule (term -> (open-ct
(term (<= (priority term) 1201))
close))
(make-instance 'bracketed-term :priority 0
- :open open-ct-lexeme :term term :close close))
+ :open open-ct :term term :close close))
;;; 6.3.4.2
;;;
@@ -636,17 +690,15 @@
(right term)))
(when (and (< (priority left) (priority op))
(< (priority right) (priority op)))
- (make-instance 'lterm :priority (priority op) :term
- (make-instance 'binary-operator-compound-term
- :left left :operator op :right right))))
+ (make-instance 'binary-operator-compound-lterm :priority (priority op)
+ :left left :operator op :right right)))
(define-prolog-rule (lterm -> ((left lterm)
(op (eql (specifier op) :yfx))
(right term)))
(when (and (<= (priority left) (priority op))
(< (priority right) (priority op)))
- (make-instance 'lterm :priority (priority op) :term
- (make-instance 'binary-operator-compound-term
- :left left :operator op :right right))))
+ (make-instance 'binary-operator-compound-lterm :priority (priority op)
+ :left left :operator op :right right)))
(define-prolog-rule (term -> ((left term)
(op (eql (specifier op) :xfy))
(right term)))
@@ -656,14 +708,12 @@
:left left :operator op :right right)))
(define-prolog-rule (lterm -> (lterm (op (eql (specifier op) :yf))))
(when (<= (priority lterm) (priority op))
- (make-instance 'lterm :priority (priority op) :term
- (make-instance 'postfix-operator-compound-term
- :left lterm :operator op))))
+ (make-instance 'postfix-operator-compound-lterm :priority (priority op)
+ :left lterm :operator op)))
(define-prolog-rule (lterm -> (term (op (eql (specifier op) :xf))))
(when (< (priority term) (priority op))
- (make-instance 'lterm :priority (priority op) :term
- (make-instance 'postfix-operator-compound-term
- :left term :operator op))))
+ (make-instance 'postfix-operator-compound-lterm :priority (priority op)
+ :left term :operator op)))
(define-prolog-rule (term -> ((op (eql (specifier op) :fy)) term))
(when (and (or (not (string= (lexeme-string (syntactic-lexeme op)) "-"))
(not (numeric-constant-p term)))
@@ -676,9 +726,8 @@
(not (numeric-constant-p term)))
(not (typep (first-lexeme term) 'open-ct-lexeme))
(< (priority term) (priority op)))
- (make-instance 'lterm :priority (priority op) :term
- (make-instance 'prefix-operator-compound-term
- :right term :operator op))))
+ (make-instance 'prefix-operator-compound-lterm :priority (priority op)
+ :right term :operator op)))
;;; 6.3.4.3
(macrolet ((def (class &rest specifiers)
@@ -782,7 +831,7 @@
(and (consp value)
(typep (car value) 'atom)
(typep (cadr value) 'integer))))))
-
+
(defun first-lexeme (thing)
;; FIXME: we'll need to implement this.
(declare (ignore thing))
@@ -792,25 +841,66 @@
(defmethod update-syntax-for-display (buffer (syntax prolog-syntax) top bot)
(with-slots (parser lexer valid-parse) syntax
- (loop until (= valid-parse (nb-lexemes lexer))
- while (mark<= (end-offset (lexeme lexer valid-parse)) bot)
- do (let ((current-token (lexeme lexer (1- valid-parse)))
- (next-lexeme (lexeme lexer valid-parse)))
- (setf (slot-value next-lexeme 'state)
- (advance-parse parser (list next-lexeme) (slot-value current-token 'state))))
- (incf valid-parse))))
+ (with-slots (climacs-syntax::lexemes valid-lex) lexer
+ (let ((scan (clone-mark (low-mark buffer) :left)))
+ (setf (offset scan)
+ (end-offset (lexeme lexer (1- valid-lex))))
+ ;; lex as far as we need. We actually win quite a lot if we
+ ;; can implement the splicing described in the FIXME note,
+ ;; below, because there's then a good chance that CLIM's
+ ;; incremental redisplay will Do The Right Thing (on the EQ
+ ;; lexemes)
+ (loop do (skip-inter-lexeme-objects lexer scan)
+ ;; FIXME: are we allowed to mix DO and UNTIL like this?
+ ;; I doubt it.
+ until (end-of-buffer-p scan)
+ until (mark< bot (start-offset (lexeme lexer (1- valid-lex))))
+ ;; FIXME: a further criterion is when scan matches the
+ ;; start-offset of an element in lexemes, at which point
+ ;; we know that the entirety of the rest of the old lex
+ ;; is valid without doing any further work.
+ do (let* ((start-mark (clone-mark scan))
+ (lexeme (next-lexeme lexer scan))
+ (size (- (offset scan) (offset start-mark))))
+ (setf (slot-value lexeme 'climacs-syntax::start-mark) start-mark
+ (slot-value lexeme 'climacs-syntax::size) size)
+ (insert-lexeme lexer valid-lex lexeme)
+ (incf valid-lex)))
+ ;; remove lexemes which we know to be invalid
+ (let ((end (end-offset (lexeme lexer (1- valid-lex)))))
+ (loop until (= (nb-lexemes lexer) valid-lex)
+ while (< (start-offset (lexeme lexer valid-lex)) end)
+ do (delete* climacs-syntax::lexemes valid-lex))))
+ ;; parse up to the limit of validity imposed by the lexer, or
+ ;; the bottom of the visible area
+ (loop until (= valid-parse valid-lex)
+ while (mark<= (end-offset (lexeme lexer valid-parse)) bot)
+ do (let ((current-token (lexeme lexer (1- valid-parse)))
+ (next-lexeme (lexeme lexer valid-parse)))
+ (setf (slot-value next-lexeme 'state)
+ (advance-parse parser (list next-lexeme)
+ (slot-value current-token 'state)))
+ (incf valid-parse))))))
(defmethod inter-lexeme-object-p ((lexer prolog-lexer) object)
(member object '(#\Space #\Newline)))
(defmethod update-syntax (buffer (syntax prolog-syntax))
(with-slots (lexer valid-parse) syntax
- (let* ((low-mark (low-mark buffer))
- (high-mark (high-mark buffer)))
- (when (mark<= low-mark high-mark)
- (let ((first-invalid-position (delete-invalid-lexemes lexer low-mark high-mark)))
- (setf valid-parse first-invalid-position)
- (update-lex lexer first-invalid-position high-mark))))))
+ (let* ((low-mark (low-mark buffer))
+ (high-mark (high-mark buffer)))
+ (when (mark<= low-mark high-mark)
+ (with-slots (climacs-syntax::lexemes valid-lex) lexer
+ (let ((start 1)
+ (end (nb-elements climacs-syntax::lexemes)))
+ (loop while (< start end)
+ do (let ((middle (floor (+ start end) 2)))
+ (if (mark< (end-offset (element* climacs-syntax::lexemes middle))
+ low-mark)
+ (setf start (1+ middle))
+ (setf end middle))))
+ (setf valid-lex start)
+ (setf valid-parse start)))))))
;;; display
@@ -866,10 +956,7 @@
'string
:stream pane))))
-;;; KLUDGE: below this line, this is just s/html/prolog/ on the
-;;; definitions in html-syntax.lisp
-
-(defmethod display-parse-tree :before ((entity prolog-token) (syntax prolog-syntax) pane)
+(defmethod display-parse-tree :before ((entity prolog-lexeme) (syntax prolog-syntax) pane)
(handle-whitespace pane (buffer pane) *white-space-start* (start-offset entity))
(setf *white-space-start* (end-offset entity)))
@@ -888,13 +975,16 @@
(display-parse-stack (parse-stack-symbol top) top syntax pane)
(display-parse-tree (target-parse-tree state) syntax pane))))
+(defun nb-valid-lexemes (lexer)
+ (slot-value lexer 'valid-lex))
+
(defmethod redisplay-pane-with-syntax ((pane climacs-pane) (syntax prolog-syntax) current-p)
(with-slots (top bot) pane
(setf *cursor-positions* (make-array (1+ (number-of-lines-in-region top bot)))
*current-line* 0
(aref *cursor-positions* 0) (stream-cursor-position pane))
(with-slots (lexer) syntax
- (let ((average-token-size (max (float (/ (size (buffer pane)) (nb-lexemes lexer)))
+ (let ((average-token-size (max (float (/ (size (buffer pane)) (nb-valid-lexemes lexer)))
1.0)))
;; find the last token before bot
(let ((end-token-index (max (floor (/ (offset bot) average-token-size)) 1)))
@@ -902,7 +992,7 @@
(loop until (mark<= (end-offset (lexeme lexer (1- end-token-index))) bot)
do (decf end-token-index))
;; go forward to the last token before bot
- (loop until (or (= end-token-index (nb-lexemes lexer))
+ (loop until (or (= end-token-index (nb-valid-lexemes lexer))
(mark> (start-offset (lexeme lexer end-token-index)) bot))
do (incf end-token-index))
(let ((start-token-index end-token-index))
More information about the Climacs-cvs
mailing list