[climacs-cvs] CVS update: climacs/prolog-syntax.lisp
Christophe Rhodes
crhodes at common-lisp.net
Sat May 7 16:41:04 UTC 2005
Update of /project/climacs/cvsroot/climacs
In directory common-lisp.net:/tmp/cvs-serv14244
Modified Files:
prolog-syntax.lisp
Log Message:
Improve the Prolog tokenizer. We now recognize
* binary constants: 0b...
* octal constants: 0o...
* hexadecimal constants: 0x...
* char-code constants: 0'<quoted-char>
* escaped characters in quoted strings:
** meta escapes such as \"
** control escapes such as \a
** numeric escapes such as \0177\ and \xabcd\
** "" (within a char-code-string) and '' (within a quoted-atom)
Date: Sat May 7 18:41:03 2005
Author: crhodes
Index: climacs/prolog-syntax.lisp
diff -u climacs/prolog-syntax.lisp:1.17 climacs/prolog-syntax.lisp:1.18
--- climacs/prolog-syntax.lisp:1.17 Sun Apr 17 17:44:39 2005
+++ climacs/prolog-syntax.lisp Sat May 7 18:41:03 2005
@@ -111,7 +111,8 @@
(def (name t) identifier graphic quoted semicolon cut)
(def (variable t) anonymous named)
- (def (integer t))
+ (def (integer t) integer-constant character-code-constant binary-constant
+ octal-constant hexadecimal-constant)
(def (float-number t))
(def (char-code-list t))
(def (open-ct))
@@ -157,6 +158,58 @@
(bo ()
(vector-pop string)
(backward-object scan)))
+ (macrolet ((read-quoted-char (char)
+ `(block read-quoted-char
+ (let ((o (object-after scan)))
+ (tagbody
+ START
+ (cond
+ ((eql o #\\) (fo) (go ESCAPE))
+ ((eql o ,char) (fo) (go QUOTE))
+ (t (fo) (return-from read-quoted-char t)))
+ QUOTE
+ (if (end-of-buffer-p scan)
+ (return-from read-quoted-char nil)
+ (let ((o (object-after scan)))
+ (cond
+ ((eql o ,char) (fo) (return-from read-quoted-char t))
+ (t (return-from read-quoted-char nil)))))
+ ESCAPE
+ (if (end-of-buffer-p scan)
+ (return (make-instance 'error-lexeme))
+ (let ((o (object-after scan)))
+ (cond
+ ;; meta (6.5.5)
+ ((position o "\\'\"`") (fo) (return-from read-quoted-char t))
+ ;; symbolic (6.4.2.1)
+ ((position o "abfnrtv") (fo) (return-from read-quoted-char t))
+ ;; octal
+ ((digit-char-p o 8) (fo)
+ (tagbody
+ LOOP
+ (when (end-of-buffer-p scan)
+ (return (make-instance 'error-lexeme)))
+ (let ((o (object-after scan)))
+ (cond
+ ((eql o #\\) (fo) (return-from read-quoted-char t))
+ ((digit-char-p o 8) (fo) (go LOOP))
+ (t (return (make-instance 'error-lexeme)))))))
+ ((eql o #\x) (fo)
+ (if (or (end-of-buffer-p scan)
+ (not (digit-char-p (object-after scan) 16)))
+ (return (make-instance 'error-lexeme))
+ (progn
+ (fo)
+ (tagbody
+ LOOP
+ (when (end-of-buffer-p scan)
+ (return (make-instance 'error-lexeme)))
+ (let ((o (object-after scan)))
+ (cond
+ ((eql o #\\) (fo) (return-from read-quoted-char t))
+ ((digit-char-p o 16) (fo) (go LOOP))
+ (t (return (make-instance 'error-lexeme)))))))))
+ (t (return (make-instance 'error-lexeme)))))))))))
(let ((object (object-after scan)))
(block nil
(tagbody
@@ -173,6 +226,7 @@
(fo) (return (make-instance 'cut-lexeme)))
((eql object #\_) (fo) (go VARIABLE))
((upper-case-p object) (fo) (go NAMED-VARIABLE))
+ ((eql object #\0) (fo) (go NUMBER-OR-INTEGER))
((digit-char-p object) (fo) (go NUMBER))
((eql object #\") (fo) (go CHAR-CODE-LIST))
((eql object #\()
@@ -243,14 +297,10 @@
(return (make-instance 'end-lexeme)))
(t (return (make-instance 'graphic-lexeme))))))
QUOTED-TOKEN
- (loop until (end-of-buffer-p scan)
- ;; FIXME
- until (eql (object-after scan) #\')
- do (fo))
- (if (end-of-buffer-p scan)
- (return (make-instance 'error-lexeme))
- (progn (fo)
- (return (make-instance 'quoted-lexeme))))
+ (loop named #:mu
+ until (end-of-buffer-p scan)
+ while (read-quoted-char #\'))
+ (return (make-instance 'quoted-lexeme))
VARIABLE
(if (or (end-of-buffer-p scan)
(let ((object (object-after scan)))
@@ -265,20 +315,47 @@
(eql object #\_)))
do (fo))
(return (make-instance 'named-lexeme))
+ NUMBER-OR-INTEGER
+ (if (end-of-buffer-p scan)
+ (return (make-instance 'integer-lexeme))
+ (let ((object (object-after scan)))
+ (cond
+ ((eql object #\') (fo) (go CHARACTER-CODE-CONSTANT))
+ ((eql object #\b) (fo) (go BINARY-CONSTANT))
+ ((eql object #\o) (fo) (go OCTAL-CONSTANT))
+ ((eql object #\x) (fo) (go HEXADECIMAL-CONSTANT))
+ ((digit-char-p object) (fo) (go NUMBER))
+ ;; FIXME: floats
+ (t (return (make-instance 'integer-lexeme))))))
+ CHARACTER-CODE-CONSTANT
+ (if (read-quoted-char #\')
+ (return (make-instance 'character-code-constant-lexeme))
+ (return (make-instance 'error-lexeme)))
+ BINARY-CONSTANT
+ (loop until (end-of-buffer-p scan)
+ while (digit-char-p (object-after scan) 2)
+ do (fo))
+ (return (make-instance 'binary-constant-lexeme))
+ OCTAL-CONSTANT
+ (loop until (end-of-buffer-p scan)
+ while (digit-char-p (object-after scan) 8)
+ do (fo))
+ (return (make-instance 'octal-constant-lexeme))
+ HEXADECIMAL-CONSTANT
+ (loop until (end-of-buffer-p scan)
+ while (digit-char-p (object-after scan) 16)
+ do (fo))
+ (return (make-instance 'hexadecimal-constant-lexeme))
NUMBER
(loop until (end-of-buffer-p scan)
- while (digit-char-p (object-after scan))
- do (fo))
- (return (make-instance 'integer-lexeme))
+ while (digit-char-p (object-after scan))
+ do (fo))
+ (return (make-instance 'integer-constant-lexeme))
CHAR-CODE-LIST
- (loop until (end-of-buffer-p scan)
- ;; FIXME
- until (eql (object-after scan) #\")
- do (fo))
- (if (end-of-buffer-p scan)
- (return (make-instance 'error-lexeme))
- (progn (fo)
- (return (make-instance 'char-code-list-lexeme))))))))))
+ (loop named #:mu
+ until (end-of-buffer-p scan)
+ while (read-quoted-char #\"))
+ (return (make-instance 'char-code-list-lexeme)))))))))
;;; parser
More information about the Climacs-cvs
mailing list