[cxml-cvs] CVS update: cxml/xml/xml-parse.lisp

Sun Nov 27 18:20:13 UTC 2005

Update of /project/cxml/cvsroot/cxml/xml
In directory common-lisp.net:/tmp/cvs-serv8375/xml

Modified Files:
	xml-parse.lisp 
Log Message:
Hmm.  Auf U+ffff und dergleichen wurde durch data-rune-p geprueft,
das aber eben nicht ueberall benutzt wurde.  Ich habe die Pruefung
jetzt mal direkt im Decoding eingebaut.

-xmltest/not-wf/sa/171.xml [not validating:] FAILED:
-  well-formedness violation not detected
-[
-    Character FFFF is not legal anywhere in an XML document. ]

Date: Sun Nov 27 19:20:11 2005
Author: dlichteblau

Index: cxml/xml/xml-parse.lisp
diff -u cxml/xml/xml-parse.lisp:1.35 cxml/xml/xml-parse.lisp:1.36

--- cxml/xml/xml-parse.lisp:1.35	Sun Nov 27 18:19:14 2005
+++ cxml/xml/xml-parse.lisp	Sun Nov 27 19:20:11 2005
@@ -1413,15 +1413,18 @@
 
 (definline data-rune-p (rune)
   ;; any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
+  ;;
+  ;; FIXME: das halte ich fuer verkehrt.  Surrogates als Unicode-Zeichen
+  ;; sind verboten.  Das liegt hier aber nicht vor, denn wir arbeiten
+  ;; ja tatsaechlich mit UTF-16.  Verboten ist es nur, wenn wir ein
+  ;; solches Zeichen beim Dekodieren finden, das wird aber eben
+  ;; in encodings.lisp bereits geprueft.  --david
   (let ((c (rune-code rune)))
     (or (= c #x9) (= c #xA) (= c #xD)
         (<= #x20 c #xD7FF)
         (<= #xE000 c #xFFFD)
-        ;;
         (<= #xD800 c #xDBFF)
-        (<= #xDC00 c #xDFFF)
-        ;;
-        )))
+        (<= #xDC00 c #xDFFF))))
 
 (defun read-att-value (zinput input mode &optional canon-space-p (delim nil))
   (with-rune-collector-2 (collect)
@@ -2686,7 +2689,7 @@
       ((:ENTITY-REF)
        (let ((name sem))
          (consume-token input)
-         (append ;; nil  #+(OR)
+         (append
           (recurse-on-entity input name :general
                              (lambda (input)
                                (prog1
@@ -3190,17 +3193,10 @@
 (defun read-cdata (input)
   (read-data-until* ((lambda (rune)
                        (declare (type rune rune))
-		       (when (or (and (%rune< rune #/U+0020)
-				      (not (or (%rune= rune #/U+0009)
-					       (%rune= rune #/U+000a)
-					       (%rune= rune #/U+000d))))
-				 ;; Surrogates nicht ausschliessen, denn wir
-				 ;; haben ja UTF-16 Runen.
-				 #+(or)
-				 (and (%rune<= #/U+D800 rune)
-				      (%rune< rune #/U+E000))
-				 (%rune= rune #/U+FFFE)
-				 (%rune= rune #/U+FFFF))
+		       (when (and (%rune< rune #/U+0020)
+				  (not (or (%rune= rune #/U+0009)
+					   (%rune= rune #/U+000a)
+					   (%rune= rune #/U+000d))))
 			 (wf-error "code point invalid: ~A" rune))
                        (or (%rune= rune #/<) (%rune= rune #/&)))
                      input