[cxml-cvs] CVS cxml/xml

dlichteblau dlichteblau at common-lisp.net
Sat Dec 22 15:24:53 UTC 2007


Update of /project/cxml/cvsroot/cxml/xml
In directory clnet:/tmp/cvs-serv4227/xml

Modified Files:
	xml-parse.lisp 
Log Message:
Use 21 bit characters on Lisp offering them.

--- /project/cxml/cvsroot/cxml/xml/xml-parse.lisp	2007/11/24 00:04:16	1.75
+++ /project/cxml/cvsroot/cxml/xml/xml-parse.lisp	2007/12/22 15:24:52	1.76
@@ -458,7 +458,8 @@
 
 (defmacro %put-unicode-char (code-var put)
   `(progn
-     (cond ((%> ,code-var #xFFFF)
+     (cond #+rune-is-utf-16
+           ((%> ,code-var #xFFFF)
           (,put (the rune (code-rune (%+ #xD7C0 (%ash ,code-var -10)))))
           (,put (the rune (code-rune (%ior #xDC00 (%and ,code-var #x03FF))))))
          (t
@@ -1489,19 +1490,14 @@
            value))))
 
 (definline data-rune-p (rune)
-  ;; any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
-  ;;
-  ;; FIXME: das halte ich fuer verkehrt.  Surrogates als Unicode-Zeichen
-  ;; sind verboten.  Das liegt hier aber nicht vor, denn wir arbeiten
-  ;; ja tatsaechlich mit UTF-16.  Verboten ist es nur, wenn wir ein
-  ;; solches Zeichen beim Dekodieren finden, das wird aber eben
-  ;; in encodings.lisp bereits geprueft.  --david
+  ;; Any Unicode character, excluding FFFE, and FFFF.
+  ;; Allow surrogates if using UTF-16, else allow >= 0x10000.
   (let ((c (rune-code rune)))
     (or (= c #x9) (= c #xA) (= c #xD)
         (<= #x20 c #xD7FF)
+	#+rune-is-utf-16 (<= #xD800 c #xDFFF)
         (<= #xE000 c #xFFFD)
-        (<= #xD800 c #xDBFF)
-        (<= #xDC00 c #xDFFF))))
+	#-rune-is-utf-16 (<= #x10000 c #x10FFFF))))
 
 (defun read-att-value (zinput input mode &optional canon-space-p (delim nil))
   (with-rune-collector-2 (collect)
@@ -1761,11 +1757,13 @@
       (rune= rune #/U+000D)))
 
 (defun code-data-char-p (c)
-  ;; any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
+  ;; Any Unicode character, excluding FFFE, and FFFF.
+  ;; Allow surrogates if using UTF-16, else allow >= 0x10000.
   (or (= c #x9) (= c #xA) (= c #xD)
       (<= #x20 c #xD7FF)
+      #+rune-is-utf-16 (<= #xD800 c #xDFFF)
       (<= #xE000 c #xFFFD)
-      (<= #x10000 c #x10FFFF)))
+      #-rune-is-utf-16 (<= #x10000 c #x10FFFF)))
 
 (defun pubid-char-p (c)
   (or (rune= c #/u+0020) (rune= c #/u+000D) (rune= c #/u+000A)




More information about the Cxml-cvs mailing list