[cxml-cvs] CVS update: cxml/xml/xml-parse.lisp
David Lichteblau
dlichteblau at common-lisp.net
Sun Nov 27 12:24:41 UTC 2005
Update of /project/cxml/cvsroot/cxml/xml
In directory common-lisp.net:/tmp/cvs-serv14010/xml
Modified Files:
xml-parse.lisp
Log Message:
eof in character references
Date: Sun Nov 27 13:24:40 2005
Author: dlichteblau
Index: cxml/xml/xml-parse.lisp
diff -u cxml/xml/xml-parse.lisp:1.23 cxml/xml/xml-parse.lisp:1.24
--- cxml/xml/xml-parse.lisp:1.23 Sun Nov 27 13:13:52 2005
+++ cxml/xml/xml-parse.lisp Sun Nov 27 13:24:40 2005
@@ -647,12 +647,12 @@
(defun validity-error (x &rest args)
(error 'validity-error
- :format-control "Validity constraint violated: ~?"
+ :format-control "Document not valid: ~?"
:format-arguments (list x args)))
(defun wf-error (x &rest args)
(error 'well-formedness-violation
- :format-control "Well-formedness violated: ~?"
+ :format-control "Document not well-formed: ~?"
:format-arguments (list x args)))
(defun eox (stream &optional x &rest args)
@@ -1208,10 +1208,10 @@
(:DOC
(cond
((rune= c #/&)
- (multiple-value-bind (kind data) (read-entity-ref input)
- (cond ((eq kind :NAMED)
- (values :ENTITY-REF data) )
- ((eq kind :NUMERIC)
+ (multiple-value-bind (kind data) (read-entity-like input)
+ (cond ((eq kind :ENTITY-REFERENCE)
+ (values :ENTITY-REF data))
+ ((eq kind :CHARACTER-REFERENCE)
(values :CDATA
(with-rune-collector (collect)
(%put-unicode-char data collect)))))))
@@ -1309,16 +1309,16 @@
(t
nil)))
-(defun read-entity-ref (input)
+(defun read-entity-like (input)
"Read an entity reference off the xstream `input'. Returns two values:
- either :NAMED <interned-rod> in case of a named entity
- or :NUMERIC <integer> in case of numeric entities.
+ either :ENTITY-REFERENCE <interned-rod> in case of a named entity
+ or :CHARACTER-REFERENCE <integer> in case of character references.
The initial #\\& is considered to be consumed already."
(let ((c (peek-rune input)))
(cond ((eq c :eof)
(eox input "EOF after '&'"))
((rune= c #/#)
- (values :NUMERIC (read-numeric-entity input)))
+ (values :CHARACTER-REFERENCE (read-character-reference input)))
(t
(unless (name-start-rune-p (peek-rune input))
(wf-error "Expecting name after &."))
@@ -1326,7 +1326,7 @@
(setf c (read-rune input))
(unless (rune= c #/\;)
(perror input "Expected \";\"."))
- (values :NAMED name))))))
+ (values :ENTITY-REFERENCE name))))))
(defun read-tag-2 (zinput input kind)
(let ((name (read-name-token input))
@@ -1420,7 +1420,7 @@
((rune= c #/&)
(setf c (peek-rune input))
(cond ((rune= c #/#)
- (let ((c (read-numeric-entity input)))
+ (let ((c (read-character-reference input)))
(%put-unicode-char c collect)))
(t
(unless (name-start-rune-p (peek-rune input))
@@ -1476,17 +1476,25 @@
(assert (member delim '(#/\" #/\')))
delim))))))
-(defun read-numeric-entity (input)
+(defun check-rune (input actual expected)
+ (declare (ignore input))
+ (unless (eql actual expected)
+ (wf-error "expected #/~A but found #/~A"
+ (rune-char expected)
+ (rune-char actual))))
+
+(defun read-character-reference (input)
;; xxx eof handling
;; The #/& is already read
(let ((res
(let ((c (read-rune input)))
- (assert (rune= c #/#))
+ (check-rune input c #/#)
(setq c (read-rune input))
- (cond ((rune= c #/x)
+ (cond ((eql c #/x)
;; hexadecimal
(setq c (read-rune input))
- (assert (digit-rune-p c 16))
+ (unless (digit-rune-p c 16)
+ (wf-error "garbage in character reference"))
(prog1
(parse-integer
(with-output-to-string (sink)
@@ -1494,8 +1502,7 @@
(while (digit-rune-p (setq c (read-rune input)) 16)
(write-char (rune-char c) sink)))
:radix 16)
- (assert (rune= c #/\;)))
- )
+ (check-rune input c #/\;)))
((rune<= #/0 c #/9)
;; decimal
(prog1
@@ -1505,7 +1512,7 @@
(while (rune<= #/0 (setq c (read-rune input)) #/9)
(write-char (rune-char c) sink)))
:radix 10)
- (assert (rune= c #/\;))) )
+ (check-rune input c #/\;)))
(t
(wf-error "Bad char in numeric character entity.") )))))
(unless (code-data-char-p res)
@@ -3185,7 +3192,7 @@
((rune= c #/&)
(setf c (peek-rune input))
(cond ((rune= c #/#)
- (let ((c (read-numeric-entity input)))
+ (let ((c (read-character-reference input)))
(%put-unicode-char c collect)))
(t
(unless (name-start-rune-p (peek-rune input))
@@ -3248,11 +3255,11 @@
((rune= c #/<)
(wf-error "'<' not allowed in attribute values"))
((rune= #/& c)
- (multiple-value-bind (kind sem) (read-entity-ref input)
+ (multiple-value-bind (kind sem) (read-entity-like input)
(ecase kind
- (:NUMERIC
+ (:CHARACTER-REFERENCE
(%put-unicode-char sem collect))
- (:NAMED
+ (:ENTITY-REFERENCE
(let* ((exp (internal-entity-expansion sem))
(n (length exp)))
(declare (type (simple-array rune (*)) exp))
More information about the Cxml-cvs
mailing list