[cxml-cvs] CVS cxml/xml
dlichteblau
dlichteblau at common-lisp.net
Sat Dec 22 15:24:53 UTC 2007
Update of /project/cxml/cvsroot/cxml/xml
In directory clnet:/tmp/cvs-serv4227/xml
Modified Files:
xml-parse.lisp
Log Message:
Use 21 bit characters on Lisp offering them.
--- /project/cxml/cvsroot/cxml/xml/xml-parse.lisp 2007/11/24 00:04:16 1.75
+++ /project/cxml/cvsroot/cxml/xml/xml-parse.lisp 2007/12/22 15:24:52 1.76
@@ -458,7 +458,8 @@
(defmacro %put-unicode-char (code-var put)
`(progn
- (cond ((%> ,code-var #xFFFF)
+ (cond #+rune-is-utf-16
+ ((%> ,code-var #xFFFF)
(,put (the rune (code-rune (%+ #xD7C0 (%ash ,code-var -10)))))
(,put (the rune (code-rune (%ior #xDC00 (%and ,code-var #x03FF))))))
(t
@@ -1489,19 +1490,14 @@
value))))
(definline data-rune-p (rune)
- ;; any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
- ;;
- ;; FIXME: das halte ich fuer verkehrt. Surrogates als Unicode-Zeichen
- ;; sind verboten. Das liegt hier aber nicht vor, denn wir arbeiten
- ;; ja tatsaechlich mit UTF-16. Verboten ist es nur, wenn wir ein
- ;; solches Zeichen beim Dekodieren finden, das wird aber eben
- ;; in encodings.lisp bereits geprueft. --david
+ ;; Any Unicode character, excluding FFFE, and FFFF.
+ ;; Allow surrogates if using UTF-16, else allow >= 0x10000.
(let ((c (rune-code rune)))
(or (= c #x9) (= c #xA) (= c #xD)
(<= #x20 c #xD7FF)
+ #+rune-is-utf-16 (<= #xD800 c #xDFFF)
(<= #xE000 c #xFFFD)
- (<= #xD800 c #xDBFF)
- (<= #xDC00 c #xDFFF))))
+ #-rune-is-utf-16 (<= #x10000 c #x10FFFF))))
(defun read-att-value (zinput input mode &optional canon-space-p (delim nil))
(with-rune-collector-2 (collect)
@@ -1761,11 +1757,13 @@
(rune= rune #/U+000D)))
(defun code-data-char-p (c)
- ;; any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
+ ;; Any Unicode character, excluding FFFE, and FFFF.
+ ;; Allow surrogates if using UTF-16, else allow >= 0x10000.
(or (= c #x9) (= c #xA) (= c #xD)
(<= #x20 c #xD7FF)
+ #+rune-is-utf-16 (<= #xD800 c #xDFFF)
(<= #xE000 c #xFFFD)
- (<= #x10000 c #x10FFFF)))
+ #-rune-is-utf-16 (<= #x10000 c #x10FFFF)))
(defun pubid-char-p (c)
(or (rune= c #/u+0020) (rune= c #/u+000D) (rune= c #/u+000A)
More information about the Cxml-cvs
mailing list