[cxml-cvs] CVS update: cxml/xml/xml-parse.lisp

David Lichteblau dlichteblau at common-lisp.net
Sun Nov 27 12:24:41 UTC 2005


Update of /project/cxml/cvsroot/cxml/xml
In directory common-lisp.net:/tmp/cvs-serv14010/xml

Modified Files:
	xml-parse.lisp 
Log Message:
eof in character references

Date: Sun Nov 27 13:24:40 2005
Author: dlichteblau

Index: cxml/xml/xml-parse.lisp
diff -u cxml/xml/xml-parse.lisp:1.23 cxml/xml/xml-parse.lisp:1.24
--- cxml/xml/xml-parse.lisp:1.23	Sun Nov 27 13:13:52 2005
+++ cxml/xml/xml-parse.lisp	Sun Nov 27 13:24:40 2005
@@ -647,12 +647,12 @@
 
 (defun validity-error (x &rest args)
   (error 'validity-error
-         :format-control "Validity constraint violated: ~?"
+         :format-control "Document not valid: ~?"
          :format-arguments (list x args)))
 
 (defun wf-error (x &rest args)
   (error 'well-formedness-violation
-         :format-control "Well-formedness violated: ~?"
+         :format-control "Document not well-formed: ~?"
          :format-arguments (list x args)))
 
 (defun eox (stream &optional x &rest args)
@@ -1208,10 +1208,10 @@
           (:DOC
            (cond
             ((rune= c #/&)
-             (multiple-value-bind (kind data) (read-entity-ref input)
-               (cond ((eq kind :NAMED)
-                      (values :ENTITY-REF data) )
-                     ((eq kind :NUMERIC)
+             (multiple-value-bind (kind data) (read-entity-like input)
+               (cond ((eq kind :ENTITY-REFERENCE)
+                      (values :ENTITY-REF data))
+                     ((eq kind :CHARACTER-REFERENCE)
                       (values :CDATA
                               (with-rune-collector (collect)
                                 (%put-unicode-char data collect)))))))
@@ -1309,16 +1309,16 @@
         (t
          nil)))
 
-(defun read-entity-ref (input)
+(defun read-entity-like (input)
   "Read an entity reference off the xstream `input'. Returns two values:
-   either :NAMED <interned-rod> in case of a named entity
-   or     :NUMERIC <integer> in case of numeric entities.
+   either :ENTITY-REFERENCE <interned-rod> in case of a named entity
+   or     :CHARACTER-REFERENCE <integer> in case of character references.
    The initial #\\& is considered to be consumed already."
   (let ((c (peek-rune input)))
     (cond ((eq c :eof)
            (eox input "EOF after '&'"))
           ((rune= c #/#)
-           (values :NUMERIC (read-numeric-entity input)))
+           (values :CHARACTER-REFERENCE (read-character-reference input)))
           (t
            (unless (name-start-rune-p (peek-rune input))
              (wf-error "Expecting name after &."))
@@ -1326,7 +1326,7 @@
              (setf c (read-rune input))
              (unless (rune= c #/\;)
                (perror input "Expected \";\"."))
-             (values :NAMED name))))))
+             (values :ENTITY-REFERENCE name))))))
 
 (defun read-tag-2 (zinput input kind)
   (let ((name (read-name-token input))
@@ -1420,7 +1420,7 @@
                          ((rune= c #/&)
                           (setf c (peek-rune input))
                           (cond ((rune= c #/#)
-                                 (let ((c (read-numeric-entity input)))
+                                 (let ((c (read-character-reference input)))
                                    (%put-unicode-char c collect)))
                                 (t
                                  (unless (name-start-rune-p (peek-rune input))
@@ -1476,17 +1476,25 @@
                           (assert (member delim '(#/\" #/\')))
                           delim))))))
 
-(defun read-numeric-entity (input)
+(defun check-rune (input actual expected)
+  (declare (ignore input))
+  (unless (eql actual expected)
+    (wf-error "expected #/~A but found #/~A"
+	      (rune-char expected)
+	      (rune-char actual))))
+
+(defun read-character-reference (input)
   ;; xxx eof handling
   ;; The #/& is already read
   (let ((res
          (let ((c (read-rune input)))
-           (assert (rune= c #/#))
+           (check-rune input c #/#)
            (setq c (read-rune input))
-           (cond ((rune= c #/x)
+           (cond ((eql c #/x)
                   ;; hexadecimal
                   (setq c (read-rune input))
-                  (assert (digit-rune-p c 16))
+                  (unless (digit-rune-p c 16)
+		    (wf-error "garbage in character reference"))
                   (prog1
                       (parse-integer
                        (with-output-to-string (sink)
@@ -1494,8 +1502,7 @@
                          (while (digit-rune-p (setq c (read-rune input)) 16)
                            (write-char (rune-char c) sink)))
                        :radix 16)
-                    (assert (rune= c #/\;)))
-                  )
+                    (check-rune input c #/\;)))
                  ((rune<= #/0 c #/9)
                   ;; decimal
                   (prog1
@@ -1505,7 +1512,7 @@
                          (while (rune<= #/0 (setq c (read-rune input)) #/9)
                            (write-char (rune-char c) sink)))
                        :radix 10)
-                    (assert (rune= c #/\;))) )
+                    (check-rune input c #/\;)))
                  (t
                   (wf-error "Bad char in numeric character entity.") )))))
     (unless (code-data-char-p res)
@@ -3185,7 +3192,7 @@
                            ((rune= c #/&)
                             (setf c (peek-rune input))
                             (cond ((rune= c #/#)
-                                   (let ((c (read-numeric-entity input)))
+                                   (let ((c (read-character-reference input)))
                                      (%put-unicode-char c collect)))
                                   (t
                                    (unless (name-start-rune-p (peek-rune input))
@@ -3248,11 +3255,11 @@
                 ((rune= c #/<)
 		 (wf-error "'<' not allowed in attribute values"))
                 ((rune= #/& c)
-                 (multiple-value-bind (kind sem) (read-entity-ref input)
+                 (multiple-value-bind (kind sem) (read-entity-like input)
                    (ecase kind
-                     (:NUMERIC
+                     (:CHARACTER-REFERENCE
                       (%put-unicode-char sem collect))
-                     (:NAMED
+                     (:ENTITY-REFERENCE
                       (let* ((exp (internal-entity-expansion sem))
                              (n (length exp)))
                         (declare (type (simple-array rune (*)) exp))




More information about the Cxml-cvs mailing list