[bknr-cvs] r1845 - / vendor vendor/cxml vendor/cxml/CVS vendor/cxml/sax-tests vendor/cxml/sax-tests/CVS
bknr at bknr.net
bknr at bknr.net
Sat Feb 18 09:34:19 UTC 2006
Author: hhubner
Date: 2006-02-18 03:34:15 -0600 (Sat, 18 Feb 2006)
New Revision: 1845
Added:
vendor/
vendor/cxml/
vendor/cxml/CVS/
vendor/cxml/CVS/Entries
vendor/cxml/CVS/Entries.Log
vendor/cxml/CVS/Repository
vendor/cxml/CVS/Root
vendor/cxml/CVS/Template
vendor/cxml/catalog.lisp
vendor/cxml/characters.lisp
vendor/cxml/package.lisp
vendor/cxml/recoder.lisp
vendor/cxml/sax-handler.lisp
vendor/cxml/sax-proxy.lisp
vendor/cxml/sax-tests/
vendor/cxml/sax-tests/CVS/
vendor/cxml/sax-tests/CVS/Entries
vendor/cxml/sax-tests/CVS/Repository
vendor/cxml/sax-tests/CVS/Root
vendor/cxml/sax-tests/CVS/Template
vendor/cxml/sax-tests/event-collecting-handler.lisp
vendor/cxml/sax-tests/package.lisp
vendor/cxml/sax-tests/tests.lisp
vendor/cxml/space-normalizer.lisp
vendor/cxml/split-sequence.lisp
vendor/cxml/unparse.lisp
vendor/cxml/util.lisp
vendor/cxml/xml-name-rune-p.lisp
vendor/cxml/xml-parse.lisp
vendor/cxml/xmlns-normalizer.lisp
vendor/cxml/xmls-compat.lisp
Log:
importing current cxml
Added: vendor/cxml/CVS/Entries
===================================================================
--- vendor/cxml/CVS/Entries 2006-02-18 09:24:24 UTC (rev 1844)
+++ vendor/cxml/CVS/Entries 2006-02-18 09:34:15 UTC (rev 1845)
@@ -0,0 +1,15 @@
+/catalog.lisp/1.4/Mon Jan 23 21:49:42 2006//
+/characters.lisp/1.2/Mon Nov 28 22:33:47 2005//
+/package.lisp/1.11/Thu Dec 29 00:31:36 2005//
+/recoder.lisp/1.5/Thu Dec 29 00:31:36 2005//
+/sax-handler.lisp/1.4/Thu Dec 29 00:31:36 2005//
+/sax-proxy.lisp/1.4/Thu Dec 29 00:31:36 2005//
+/space-normalizer.lisp/1.2/Thu Dec 29 00:39:25 2005//
+/split-sequence.lisp/1.1.1.1/Sun Mar 13 18:02:35 2005//
+/unparse.lisp/1.9/Fri Feb 17 12:53:19 2006//
+/util.lisp/1.2/Mon Nov 28 22:33:47 2005//
+/xml-name-rune-p.lisp/1.6/Mon Nov 28 22:33:47 2005//
+/xml-parse.lisp/1.59/Mon Jan 23 21:45:48 2006//
+/xmlns-normalizer.lisp/1.2/Tue Dec 27 20:01:32 2005//
+/xmls-compat.lisp/1.2/Mon Nov 28 22:33:47 2005//
+D
Added: vendor/cxml/CVS/Entries.Log
===================================================================
--- vendor/cxml/CVS/Entries.Log 2006-02-18 09:24:24 UTC (rev 1844)
+++ vendor/cxml/CVS/Entries.Log 2006-02-18 09:34:15 UTC (rev 1845)
@@ -0,0 +1 @@
+A D/sax-tests////
Added: vendor/cxml/CVS/Repository
===================================================================
--- vendor/cxml/CVS/Repository 2006-02-18 09:24:24 UTC (rev 1844)
+++ vendor/cxml/CVS/Repository 2006-02-18 09:34:15 UTC (rev 1845)
@@ -0,0 +1 @@
+cxml/xml
Added: vendor/cxml/CVS/Root
===================================================================
--- vendor/cxml/CVS/Root 2006-02-18 09:24:24 UTC (rev 1844)
+++ vendor/cxml/CVS/Root 2006-02-18 09:34:15 UTC (rev 1845)
@@ -0,0 +1 @@
+:pserver:anonymous at common-lisp.net:/project/cxml/cvsroot
Added: vendor/cxml/CVS/Template
===================================================================
Added: vendor/cxml/catalog.lisp
===================================================================
--- vendor/cxml/catalog.lisp 2006-02-18 09:24:24 UTC (rev 1844)
+++ vendor/cxml/catalog.lisp 2006-02-18 09:34:15 UTC (rev 1845)
@@ -0,0 +1,348 @@
+;;;; catalogs.lisp -- XML Catalogs -*- Mode: Lisp; readtable: runes -*-
+;;;;
+;;;; This file is part of the CXML parser, released under Lisp-LGPL.
+;;;; See file COPYING for details.
+;;;;
+;;;; Developed 2004 for headcraft - http://headcraft.de/
+;;;; Copyright: David Lichteblau
+
+(in-package :cxml)
+
+;;; http://www.oasis-open.org/committees/entity/spec.html
+;;;
+;;; Bugs:
+;;; - We validate using the Catalog DTD while parsing, which is too strict
+;;; and will will fail to parse files using other parser's extensions.
+;;; (Jedenfalls behauptet das die Spec.)
+;;; A long-term solution might be an XML Schema validator.
+
+(defvar *prefer* :public)
+(defvar *default-catalog*
+ '(;; libxml standard
+ "/etc/xml/catalog"
+ ;; FreeBSD
+ "/usr/local/share/xml/catalog.ports"))
+
+(defstruct (catalog (:constructor %make-catalog ()))
+ main-files
+ (dtd-cache (make-dtd-cache))
+ (file-table (puri:make-uri-space)))
+
+(defstruct (entry-file (:conc-name ""))
+ (system-entries) ;extid 2
+ (rewrite-system-entries) ; 3
+ (delegate-system-entries) ; 4
+ (public-entries) ; 5
+ (delegate-public-entries) ; 6
+ (uri-entries) ;uri 2
+ (rewrite-uri-entries) ; 3
+ (delegate-uri-entries) ; 4
+ (next-catalog-entries) ; 5/7
+ )
+
+(defun starts-with-p (string prefix)
+ (let ((mismatch (mismatch string prefix)))
+ (or (null mismatch) (= mismatch (length prefix)))))
+
+(defun normalize-public (str)
+ (setf str (rod-to-utf8-string (rod str)))
+ (flet ((whitespacep (c)
+ (find c #.(map 'string #'code-char '(#x9 #xa #xd #x20)))))
+ (let ((start (position-if-not #'whitespacep str))
+ (end (position-if-not #'whitespacep str :from-end t))
+ (spacep nil))
+ (with-output-to-string (out)
+ (when start
+ (loop for i from start to end do
+ (let ((c (char str i)))
+ (cond
+ ((whitespacep c)
+ (unless spacep
+ (setf spacep t)
+ (write-char #\space out)))
+ (t
+ (setf spacep nil)
+ (write-char c out))))))))))
+
+(defun normalize-uri (str)
+ (when (typep str 'puri:uri)
+ (setf str (puri:render-uri str nil)))
+ (setf str (rod-to-utf8-string (rod str)))
+ (with-output-to-string (out)
+ (loop for ch across str do
+ (let ((c (char-code ch)))
+ (if (< c 15)
+ (write-string (string-upcase (format nil "%~2,'0X" c)) out)
+ (write-char ch out))))))
+
+(defun unwrap-publicid (str)
+ (normalize-public
+ (with-output-to-string (out)
+ (let ((i (length "urn:publicid:"))
+ (n (length str)))
+ (while (< i n)
+ (let ((c (char str i)))
+ (case c
+ (#\+ (write-char #\space out))
+ (#\: (write-string "//" out))
+ (#\; (write-string "::" out))
+ (#\%
+ (let ((code
+ (parse-integer str
+ :start (+ i 1)
+ :end (+ i 3)
+ :radix 16)))
+ (write-char (code-char code) out))
+ (incf i 2))
+ (t (write-char c out))))
+ (incf i))))))
+
+(defun match-exact (key table &optional check-prefer)
+ (dolist (pair table)
+ (destructuring-bind (from to &optional prefer) pair
+ (when (and (equal key from) (or (not check-prefer) (eq prefer :public)))
+ (return to)))))
+
+(defun match-prefix/rewrite (key table &optional check-prefer)
+ (let ((match nil)
+ (match-length -1))
+ (dolist (pair table)
+ (destructuring-bind (from to &optional prefer) pair
+ (when (and (or (not check-prefer) (eq prefer :public))
+ (starts-with-p key from)
+ (> (length from) match-length))
+ (setf match-length (length from))
+ (setf match to))))
+ (if match
+ (concatenate 'string
+ match
+ (subseq key match-length))
+ nil)))
+
+(defun match-prefix/sorted (key table &optional check-prefer)
+ (let ((result '()))
+ (dolist (pair table)
+ (destructuring-bind (from to &optional prefer) pair
+ (when (and (or (not check-prefer) (eq prefer :public))
+ (starts-with-p key from))
+ (push (cons (length from) to) result))))
+ (mapcar #'cdr (sort result #'> :key #'car))))
+
+(defun resolve-extid (public system catalog)
+ (when public (setf public (normalize-public public)))
+ (when system (setf system (normalize-uri system)))
+ (when (and system (starts-with-p system "urn:publicid:"))
+ (let ((new-public (unwrap-publicid system)))
+ (assert (or (null public) (equal public new-public)))
+ (setf public new-public
+ system nil)))
+ (let ((files (catalog-main-files catalog))
+ (seen '()))
+ (while files
+ (let ((file (pop files))
+ (delegates nil))
+ (unless (typep file 'entry-file)
+ (setf file (find-catalog-file file catalog)))
+ (unless (or (null file) (member file seen))
+ (push file seen)
+ (when system
+ (let ((result
+ (or (match-exact system (system-entries file))
+ (match-prefix/rewrite
+ system
+ (rewrite-system-entries file)))))
+ (when result
+ (return result))
+ (setf delegates
+ (match-prefix/sorted
+ system
+ (delegate-system-entries file)))))
+ (when (and public (not delegates))
+ (let* ((check-prefer (and system t))
+ (result
+ (match-exact public
+ (public-entries file)
+ check-prefer)))
+ (when result
+ (return result))
+ (setf delegates
+ (match-prefix/sorted
+ public
+ (delegate-public-entries file)
+ check-prefer))))
+ (if delegates
+ (setf files delegates)
+ (setf files (append (next-catalog-entries file) files))))))))
+
+(defun resolve-uri (uri catalog)
+ (setf uri (normalize-uri uri))
+ (when (starts-with-p uri "urn:publicid:")
+ (return-from resolve-uri
+ (resolve-extid (unwrap-publicid uri) nil catalog)))
+ (let ((files (catalog-main-files catalog))
+ (seen '()))
+ (while files
+ (let ((file (pop files)))
+ (unless (typep file 'entry-file)
+ (setf file (find-catalog-file file catalog)))
+ (unless (or (null file) (member file seen))
+ (push file seen)
+ (let ((result
+ (or (match-exact uri (uri-entries file))
+ (match-prefix/rewrite uri (rewrite-uri-entries file)))))
+ (when result
+ (return result))
+ (let* ((delegate-entries
+ (delegate-uri-entries file))
+ (delegates
+ (match-prefix/sorted uri delegate-entries)))
+ (if delegates
+ (setf files delegates)
+ (setf files (append (next-catalog-entries file) files))))))))))
+
+(defun find-catalog-file (uri catalog)
+ (setf uri (if (stringp uri) (safe-parse-uri uri) uri))
+ (let* ((*dtd-cache* (catalog-dtd-cache catalog))
+ (*cache-all-dtds* t)
+ (file (parse-catalog-file uri)))
+ (when file
+ (let ((interned (puri:intern-uri uri (catalog-file-table catalog))))
+ (setf (getf (puri:uri-plist interned) 'catalog) file)))
+ file))
+
+(defun make-catalog (&optional (uris *default-catalog*))
+ (let ((result (%make-catalog)))
+ (setf (catalog-main-files result)
+ (loop
+ for uri in uris
+ for file = (find-catalog-file uri result)
+ when file collect file))
+ result))
+
+(defun parse-catalog-file (uri)
+ (handler-case
+ (parse-catalog-file/strict uri)
+ ((or file-error xml-parse-error) (c)
+ (warn "ignoring catalog error: ~A" c))))
+
+(defparameter *catalog-dtd*
+ (let* ((cxml
+ (slot-value (asdf:find-system :cxml) 'asdf::relative-pathname))
+ (dtd (merge-pathnames "catalog.dtd" cxml)))
+ (with-open-file (s dtd :element-type '(unsigned-byte 8))
+ (let ((bytes
+ (make-array (file-length s) :element-type '(unsigned-byte 8))))
+ (read-sequence bytes s)
+ bytes))))
+
+(defun parse-catalog-file/strict (uri)
+ (let* ((*catalog* nil)
+ (dtd-sysid
+ (puri:parse-uri "http://www.oasis-open.org/committees/entity/release/1.0/catalog.dtd")))
+ (flet ((entity-resolver (public system)
+ (declare (ignore public))
+ (if (puri:uri= system dtd-sysid)
+ (make-octet-input-stream *catalog-dtd*)
+ nil)))
+ (with-open-stream (s (open (uri-to-pathname uri)
+ :element-type '(unsigned-byte 8)
+ :direction :input))
+ (parse-stream s
+ (make-instance 'catalog-parser :uri uri)
+ :validate nil
+ :dtd (make-extid nil dtd-sysid)
+ :root #"catalog"
+ :entity-resolver #'entity-resolver)))))
+
+(defclass catalog-parser ()
+ ((result :initform (make-entry-file) :accessor result)
+ (next :initform '() :accessor next)
+ (prefer-stack :initform (list *prefer*) :accessor prefer-stack)
+ (base-stack :accessor base-stack)))
+
+(defmethod initialize-instance :after
+ ((instance catalog-parser) &key uri)
+ (setf (base-stack instance) (list uri)))
+
+(defmethod prefer ((handler catalog-parser))
+ (car (prefer-stack handler)))
+
+(defmethod base ((handler catalog-parser))
+ (car (base-stack handler)))
+
+(defun get-attribute/lname (name attributes)
+ (let ((a (find name attributes
+ :key (lambda (a)
+ (or (sax:attribute-local-name a)
+ (sax:attribute-qname a)))
+ :test #'string=)))
+ (and a (sax:attribute-value a))))
+
+(defmethod sax:start-element ((handler catalog-parser) uri lname qname attrs)
+ (declare (ignore uri))
+ (setf lname (or lname qname))
+ ;; we can dispatch on lnames only because we validate against the DTD,
+ ;; which disallows other namespaces.
+ (push (let ((new (get-attribute/lname "prefer" attrs)))
+ (cond
+ ((equal new "public") :public)
+ ((equal new "system") :system)
+ ((null new) (prefer handler))))
+ (prefer-stack handler))
+ (push (string-or (get-attribute/lname "base" attrs) (base handler))
+ (base-stack handler))
+ (flet ((geturi (lname)
+ (puri:merge-uris
+ (safe-parse-uri (get-attribute/lname lname attrs))
+ (base handler))))
+ (cond
+ ((string= lname "public")
+ (push (list (normalize-public (get-attribute/lname "publicId" attrs))
+ (geturi "uri")
+ (prefer handler))
+ (public-entries (result handler))))
+ ((string= lname "system")
+ (push (list (normalize-uri (get-attribute/lname "systemId" attrs))
+ (geturi "uri"))
+ (system-entries (result handler))))
+ ((string= lname "uri")
+ (push (list (normalize-uri (get-attribute/lname "name" attrs))
+ (geturi "uri"))
+ (uri-entries (result handler))))
+ ((string= lname "rewriteSystem")
+ (push (list (normalize-uri
+ (get-attribute/lname "systemIdStartString" attrs))
+ (get-attribute/lname "rewritePrefix" attrs))
+ (rewrite-system-entries (result handler))))
+ ((string= lname "rewriteURI")
+ (push (list (normalize-uri
+ (get-attribute/lname "uriStartString" attrs))
+ (get-attribute/lname "rewritePrefix" attrs))
+ (rewrite-uri-entries (result handler))))
+ ((string= lname "delegatePublic")
+ (push (list (normalize-public
+ (get-attribute/lname "publicIdStartString" attrs))
+ (geturi "catalog")
+ (prefer handler))
+ (delegate-public-entries (result handler))))
+ ((string= lname "delegateSystem")
+ (push (list (normalize-uri
+ (get-attribute/lname "systemIdStartString" attrs))
+ (geturi "catalog"))
+ (delegate-system-entries (result handler))))
+ ((string= lname "delegateURI")
+ (push (list (normalize-uri
+ (get-attribute/lname "uriStartString" attrs))
+ (geturi "catalog"))
+ (delegate-uri-entries (result handler))))
+ ((string= lname "nextCatalog")
+ (push (geturi "catalog")
+ (next-catalog-entries (result handler)))))))
+
+(defmethod sax:end-element ((handler catalog-parser) uri lname qname)
+ (declare (ignore uri lname qname))
+ (pop (base-stack handler))
+ (pop (prefer-stack handler)))
+
+(defmethod sax:end-document ((handler catalog-parser))
+ (result handler))
Added: vendor/cxml/characters.lisp
===================================================================
--- vendor/cxml/characters.lisp 2006-02-18 09:24:24 UTC (rev 1844)
+++ vendor/cxml/characters.lisp 2006-02-18 09:34:15 UTC (rev 1845)
@@ -0,0 +1,145 @@
+;;;; characters.lisp -- character class definitions
+;;;;
+;;;; This file is part of the CXML parser, released under Lisp-LGPL.
+;;;; See file COPYING for details.
+;;;;
+;;;; Author: David Lichteblau
+;;;; Copyright (C) 2004 knowledgeTools Int. GmbH
+
+;;; XXX xml-name-rune-p.lisp habe ich erst nach dem Schreiben dieses
+;;; Files gefunden...
+
+;;; XXX wird derzeit in DOM:CREATE-ATTRIBUTE verwendet. Muesste aber
+;;; wohl vom Parser auch schon geprueft werden (oder tut der das
+;;; schon?). Vorher sollte man allerdings die Geschwindigkeit der Sache
+;;; mal untersuchen.
+
+(in-package :cxml)
+
+(defparameter *base-char-ranges*
+ #((#x0041 #x005A) (#x0061 #x007A) (#x00C0 #x00D6) (#x00D8 #x00F6)
+ (#x00F8 #x00FF) (#x0100 #x0131) (#x0134 #x013E) (#x0141 #x0148)
+ (#x014A #x017E) (#x0180 #x01C3) (#x01CD #x01F0) (#x01F4 #x01F5)
+ (#x01FA #x0217) (#x0250 #x02A8) (#x02BB #x02C1) (#x0386 #x0386)
+ (#x0388 #x038A) (#x038C #x038C) (#x038E #x03A1) (#x03A3 #x03CE)
+ (#x03D0 #x03D6) (#x03DA #x03DA) (#x03DC #x03DC) (#x03DE #x03DE)
+ (#x03E0 #x03E0) (#x03E2 #x03F3) (#x0401 #x040C) (#x040E #x044F)
+ (#x0451 #x045C) (#x045E #x0481) (#x0490 #x04C4) (#x04C7 #x04C8)
+ (#x04CB #x04CC) (#x04D0 #x04EB) (#x04EE #x04F5) (#x04F8 #x04F9)
+ (#x0531 #x0556) (#x0559 #x0559) (#x0561 #x0586) (#x05D0 #x05EA)
+ (#x05F0 #x05F2) (#x0621 #x063A) (#x0641 #x064A) (#x0671 #x06B7)
+ (#x06BA #x06BE) (#x06C0 #x06CE) (#x06D0 #x06D3) (#x06D5 #x06D5)
+ (#x06E5 #x06E6) (#x0905 #x0939) (#x093D #x093D) (#x0958 #x0961)
+ (#x0985 #x098C) (#x098F #x0990) (#x0993 #x09A8) (#x09AA #x09B0)
+ (#x09B2 #x09B2) (#x09B6 #x09B9) (#x09DC #x09DD) (#x09DF #x09E1)
+ (#x09F0 #x09F1) (#x0A05 #x0A0A) (#x0A0F #x0A10) (#x0A13 #x0A28)
+ (#x0A2A #x0A30) (#x0A32 #x0A33) (#x0A35 #x0A36) (#x0A38 #x0A39)
+ (#x0A59 #x0A5C) (#x0A5E #x0A5E) (#x0A72 #x0A74) (#x0A85 #x0A8B)
+ (#x0A8D #x0A8D) (#x0A8F #x0A91) (#x0A93 #x0AA8) (#x0AAA #x0AB0)
+ (#x0AB2 #x0AB3) (#x0AB5 #x0AB9) (#x0ABD #x0ABD) (#x0AE0 #x0AE0)
+ (#x0B05 #x0B0C) (#x0B0F #x0B10) (#x0B13 #x0B28) (#x0B2A #x0B30)
+ (#x0B32 #x0B33) (#x0B36 #x0B39) (#x0B3D #x0B3D) (#x0B5C #x0B5D)
+ (#x0B5F #x0B61) (#x0B85 #x0B8A) (#x0B8E #x0B90) (#x0B92 #x0B95)
+ (#x0B99 #x0B9A) (#x0B9C #x0B9C) (#x0B9E #x0B9F) (#x0BA3 #x0BA4)
+ (#x0BA8 #x0BAA) (#x0BAE #x0BB5) (#x0BB7 #x0BB9) (#x0C05 #x0C0C)
+ (#x0C0E #x0C10) (#x0C12 #x0C28) (#x0C2A #x0C33) (#x0C35 #x0C39)
+ (#x0C60 #x0C61) (#x0C85 #x0C8C) (#x0C8E #x0C90) (#x0C92 #x0CA8)
+ (#x0CAA #x0CB3) (#x0CB5 #x0CB9) (#x0CDE #x0CDE) (#x0CE0 #x0CE1)
+ (#x0D05 #x0D0C) (#x0D0E #x0D10) (#x0D12 #x0D28) (#x0D2A #x0D39)
+ (#x0D60 #x0D61) (#x0E01 #x0E2E) (#x0E30 #x0E30) (#x0E32 #x0E33)
+ (#x0E40 #x0E45) (#x0E81 #x0E82) (#x0E84 #x0E84) (#x0E87 #x0E88)
+ (#x0E8A #x0E8A) (#x0E8D #x0E8D) (#x0E94 #x0E97) (#x0E99 #x0E9F)
+ (#x0EA1 #x0EA3) (#x0EA5 #x0EA5) (#x0EA7 #x0EA7) (#x0EAA #x0EAB)
+ (#x0EAD #x0EAE) (#x0EB0 #x0EB0) (#x0EB2 #x0EB3) (#x0EBD #x0EBD)
+ (#x0EC0 #x0EC4) (#x0F40 #x0F47) (#x0F49 #x0F69) (#x10A0 #x10C5)
+ (#x10D0 #x10F6) (#x1100 #x1100) (#x1102 #x1103) (#x1105 #x1107)
+ (#x1109 #x1109) (#x110B #x110C) (#x110E #x1112) (#x113C #x113C)
+ (#x113E #x113E) (#x1140 #x1140) (#x114C #x114C) (#x114E #x114E)
+ (#x1150 #x1150) (#x1154 #x1155) (#x1159 #x1159) (#x115F #x1161)
+ (#x1163 #x1163) (#x1165 #x1165) (#x1167 #x1167) (#x1169 #x1169)
+ (#x116D #x116E) (#x1172 #x1173) (#x1175 #x1175) (#x119E #x119E)
+ (#x11A8 #x11A8) (#x11AB #x11AB) (#x11AE #x11AF) (#x11B7 #x11B8)
+ (#x11BA #x11BA) (#x11BC #x11C2) (#x11EB #x11EB) (#x11F0 #x11F0)
+ (#x11F9 #x11F9) (#x1E00 #x1E9B) (#x1EA0 #x1EF9) (#x1F00 #x1F15)
+ (#x1F18 #x1F1D) (#x1F20 #x1F45) (#x1F48 #x1F4D) (#x1F50 #x1F57)
+ (#x1F59 #x1F59) (#x1F5B #x1F5B) (#x1F5D #x1F5D) (#x1F5F #x1F7D)
+ (#x1F80 #x1FB4) (#x1FB6 #x1FBC) (#x1FBE #x1FBE) (#x1FC2 #x1FC4)
+ (#x1FC6 #x1FCC) (#x1FD0 #x1FD3) (#x1FD6 #x1FDB) (#x1FE0 #x1FEC)
+ (#x1FF2 #x1FF4) (#x1FF6 #x1FFC) (#x2126 #x2126) (#x212A #x212B)
+ (#x212E #x212E) (#x2180 #x2182) (#x3041 #x3094) (#x30A1 #x30FA)
+ (#x3105 #x312C) (#xAC00 #xD7A3)))
+
+(defparameter *ideographic-ranges*
+ #((#x3007 #x3007) (#x3021 #x3029)(#x4E00 #x9FA5)))
+
+(defparameter *combining-char-ranges*
+ #((#x0300 #x0345) (#x0360 #x0361) (#x0483 #x0486) (#x0591 #x05A1)
+ (#x05A3 #x05B9) (#x05BB #x05BD) (#x05BF #x05BF) (#x05C1 #x05C2)
+ (#x05C4 #x05C4) (#x064B #x0652) (#x0670 #x0670) (#x06D6 #x06DC)
+ (#x06DD #x06DF) (#x06E0 #x06E4) (#x06E7 #x06E8) (#x06EA #x06ED)
+ (#x0901 #x0903) (#x093C #x093C) (#x093E #x094C) (#x094D #x094D)
+ (#x0951 #x0954) (#x0962 #x0963) (#x0981 #x0983) (#x09BC #x09BC)
+ (#x09BE #x09BE) (#x09BF #x09BF) (#x09C0 #x09C4) (#x09C7 #x09C8)
+ (#x09CB #x09CD) (#x09D7 #x09D7) (#x09E2 #x09E3) (#x0A02 #x0A02)
+ (#x0A3C #x0A3C) (#x0A3E #x0A3E) (#x0A3F #x0A3F) (#x0A40 #x0A42)
+ (#x0A47 #x0A48) (#x0A4B #x0A4D) (#x0A70 #x0A71) (#x0A81 #x0A83)
+ (#x0ABC #x0ABC) (#x0ABE #x0AC5) (#x0AC7 #x0AC9) (#x0ACB #x0ACD)
+ (#x0B01 #x0B03) (#x0B3C #x0B3C) (#x0B3E #x0B43) (#x0B47 #x0B48)
+ (#x0B4B #x0B4D) (#x0B56 #x0B57) (#x0B82 #x0B83) (#x0BBE #x0BC2)
+ (#x0BC6 #x0BC8) (#x0BCA #x0BCD) (#x0BD7 #x0BD7) (#x0C01 #x0C03)
+ (#x0C3E #x0C44) (#x0C46 #x0C48) (#x0C4A #x0C4D) (#x0C55 #x0C56)
+ (#x0C82 #x0C83) (#x0CBE #x0CC4) (#x0CC6 #x0CC8) (#x0CCA #x0CCD)
+ (#x0CD5 #x0CD6) (#x0D02 #x0D03) (#x0D3E #x0D43) (#x0D46 #x0D48)
+ (#x0D4A #x0D4D) (#x0D57 #x0D57) (#x0E31 #x0E31) (#x0E34 #x0E3A)
+ (#x0E47 #x0E4E) (#x0EB1 #x0EB1) (#x0EB4 #x0EB9) (#x0EBB #x0EBC)
+ (#x0EC8 #x0ECD) (#x0F18 #x0F19) (#x0F35 #x0F35) (#x0F37 #x0F37)
+ (#x0F39 #x0F39) (#x0F3E #x0F3E) (#x0F3F #x0F3F) (#x0F71 #x0F84)
+ (#x0F86 #x0F8B) (#x0F90 #x0F95) (#x0F97 #x0F97) (#x0F99 #x0FAD)
+ (#x0FB1 #x0FB7) (#x0FB9 #x0FB9) (#x20D0 #x20DC) (#x20E1 #x20E1)
+ (#x302A #x302F) (#x3099 #x3099) (#x309A #x309A)))
+
+(defparameter *digit-ranges*
+ #((#x0030 #x0039) (#x0660 #x0669) (#x06F0 #x06F9) (#x0966 #x096F)
+ (#x09E6 #x09EF) (#x0A66 #x0A6F) (#x0AE6 #x0AEF) (#x0B66 #x0B6F)
+ (#x0BE7 #x0BEF) (#x0C66 #x0C6F) (#x0CE6 #x0CEF) (#x0D66 #x0D6F)
+ (#x0E50 #x0E59) (#x0ED0 #x0ED9) (#x0F20 #x0F29)))
+
+(defparameter *extender-ranges*
+ #((#x00B7 #x00B7) (#x02D0 #x02D0) (#x02D1 #x02D1) (#x0387 #x0387)
+ (#x0640 #x0640) (#x0E46 #x0E46) (#x0EC6 #x0EC6) (#x3005 #x3005)
+ (#x3031 #x3035) (#x309D #x309E) (#x30FC #x30FE)))
+
+(defun valid-name-p (rod)
+ (and (not (zerop (length rod)))
+ (let ((initial (elt rod 0)))
+ (or (rune-in-range-p initial *base-char-ranges*)
+ (rune-in-range-p initial *ideographic-ranges*)
+ (rune= initial #/_)
+ (eql initial #/:)))
+ (every #'rune-name-char-p rod)))
+
+(defun valid-nmtoken-p (rod)
+ (and (not (zerop (length rod)))
+ (every #'rune-name-char-p rod)))
+
+(defun rune-name-char-p (rune)
+ (or (rune-in-range-p rune *base-char-ranges*)
+ (rune-in-range-p rune *ideographic-ranges*)
+ (rune-in-range-p rune *digit-ranges*)
+ (eql rune #/.)
+ (eql rune #/-)
+ (eql rune #/_)
+ (eql rune #/:)
+ (rune-in-range-p rune *combining-char-ranges*)
+ (rune-in-range-p rune *extender-ranges*)))
+
+(defun rune-in-range-p (rune range)
+ ;; XXX FIXME, das geht doch besser
+ (let ((code (rune-code rune)))
+ (block nil
+ (map nil (lambda (range)
+ (when (< code (car range))
+ (return nil))
+ (when (<= code (cadr range))
+ (return t)))
+ range))))
Added: vendor/cxml/package.lisp
===================================================================
--- vendor/cxml/package.lisp 2006-02-18 09:24:24 UTC (rev 1844)
+++ vendor/cxml/package.lisp 2006-02-18 09:34:15 UTC (rev 1845)
@@ -0,0 +1,82 @@
+;;;; package.lisp -- Paketdefinition
+;;;;
+;;;; This file is part of the CXML parser, released under Lisp-LGPL.
+;;;; See file COPYING for details.
+
+(in-package :cl-user)
+
+(defpackage :cxml
+ (:use :cl :runes :runes-encoding :trivial-gray-streams)
+ (:export
+ ;; xstreams
+ #:make-xstream
+ #:make-rod-xstream
+ #:close-xstream
+ #:read-rune
+ #:peek-rune
+ #:unread-rune
+ #:fread-rune
+ #:fpeek-rune
+ #:xstream-position
+ #:xstream-line-number
+ #:xstream-column-number
+ #:xstream-plist
+ #:xstream-encoding
+
+ ;; xstream controller protocol
+ #:read-octects
+ #:xstream/close
+
+ #:attribute-namespace-uri
+ #:attribute-local-name
+ #:attribute-qname
+ #:attribute-value
+
+ #:parse-file
+ #:parse-stream
+ #:parse-rod
+ #:parse-octets
+
+ #:make-octet-vector-sink
+ #:make-octet-stream-sink
+ #:make-rod-sink
+ #+rune-is-character #:make-string-sink
+ #+rune-is-character #:make-character-stream-sink
+ #-rune-is-character #:make-string-sink/utf8
+ #-rune-is-character #:make-character-stream-sink/utf8
+
+ #:with-xml-output
+ #:with-element
+ #:attribute
+ #:cdata
+ #:text
+
+ #:xml-parse-error
+ #:well-formedness-violation
+ #:validity-error
+
+ #:parse-dtd-file
+ #:parse-dtd-stream
+ #:make-validator
+
+ #:*cache-all-dtds*
+ #:*dtd-cache*
+ #:getdtd
+ #:remdtd
+ #:make-dtd-cache
+ #:clear-dtd-cache
+ #:make-extid
+
+ #:*catalog*
+ #:*prefer*
+ #:make-catalog
+ #:resolve-uri
+ #:resolve-extid
+
+ #:make-recoder
+ #:sax-proxy
+ #:proxy-chained-handler
+ #:make-namespace-normalizer
+ #:make-whitespace-normalizer
+ #:rod-to-utf8-string
+ #:utf8-string-to-rod))
Added: vendor/cxml/recoder.lisp
===================================================================
--- vendor/cxml/recoder.lisp 2006-02-18 09:24:24 UTC (rev 1844)
+++ vendor/cxml/recoder.lisp 2006-02-18 09:34:15 UTC (rev 1845)
@@ -0,0 +1,125 @@
+;;;; recoder.lisp -- SAX handler for string conversion
+;;;;
+;;;; This file is part of the CXML parser, released under Lisp-LGPL.
+;;;; See file COPYING for details.
+;;;;
+;;;; Developed 2004 for headcraft - http://headcraft.de/
+;;;; Copyright: David Lichteblau
+
+(in-package :cxml)
+
+(defclass recoder ()
+ ((recoder :initarg :recoder :accessor recoder)
+ (chained-handler :initarg :chained-handler :accessor chained-handler)))
+
+(defun make-recoder (chained-handler recoder-fn)
+ (make-instance 'recoder
+ :recoder recoder-fn
+ :chained-handler chained-handler))
+
+(macrolet ((%string (rod)
+ `(let ((rod ,rod))
+ (if (typep rod '(or rod string))
+ (funcall (recoder handler) rod)
+ rod)))
+ (defwrapper (name (&rest args) &rest forms)
+ `(defmethod ,name ((handler recoder) , at args)
+ (,name (chained-handler handler) , at forms))))
+ (defwrapper sax:start-document ())
+
+ (defwrapper sax:start-element
+ (namespace-uri local-name qname attributes)
+ (%string namespace-uri)
+ (%string local-name)
+ (%string qname)
+ (mapcar (lambda (attr)
+ (sax:make-attribute
+ :namespace-uri (%string (sax:attribute-namespace-uri attr))
+ :local-name (%string (sax:attribute-local-name attr))
+ :qname (%string (sax:attribute-qname attr))
+ :value (%string (sax:attribute-value attr))
+ :specified-p (sax:attribute-specified-p attr)))
+ attributes))
+
+ (defwrapper sax:start-prefix-mapping (prefix uri)
+ (%string prefix)
+ (%string uri))
+
+ (defwrapper sax:characters (data)
+ (%string data))
+
+ (defwrapper sax:processing-instruction (target data)
+ (%string target)
+ (%string data))
+
+ (defwrapper sax:end-prefix-mapping (prefix)
+ (%string prefix))
+
+ (defwrapper sax:end-element (namespace-uri local-name qname)
+ (%string namespace-uri)
+ (%string local-name)
+ (%string qname))
+
+ (defwrapper sax:end-document ())
+
+ (defwrapper sax:comment (data)
+ (%string data))
+
+ (defwrapper sax:start-cdata ())
+
+ (defwrapper sax:end-cdata ())
+
+ (defwrapper sax:start-dtd (name public-id system-id)
+ (%string name)
+ (%string public-id)
+ (%string system-id))
+
+ (defwrapper sax:start-internal-subset ())
+ (defwrapper sax:end-internal-subset ())
+
+ (defwrapper sax:end-dtd ())
+
+ (defwrapper sax:unparsed-entity-declaration
+ (name public-id system-id notation-name)
+ (%string name)
+ (%string public-id)
+ (%string system-id)
+ (%string notation-name))
+
+ (defwrapper sax:external-entity-declaration
+ (kind name public-id system-id)
+ (%string kind)
+ (%string name)
+ (%string public-id)
+ (%string system-id))
+
+ (defwrapper sax:internal-entity-declaration
+ (kind name value)
+ kind
+ (%string name)
+ (%string value))
+
+ (defwrapper sax:notation-declaration
+ (name public-id system-id)
+ (%string name)
+ (%string public-id)
+ (%string system-id))
+
+ (defwrapper sax:element-declaration (name model)
+ (%string name)
+ model)
+
+ (defwrapper sax:attribute-declaration
+ (element-name attribute-name type default)
+ (%string element-name)
+ (%string attribute-name)
+ (%string type)
+ (%string default))
+
+ (defwrapper sax:entity-resolver
+ (resolver)
+ resolver)
+
+ (defwrapper sax::dtd
+ (dtd)
+ dtd))
Added: vendor/cxml/sax-handler.lisp
===================================================================
--- vendor/cxml/sax-handler.lisp 2006-02-18 09:24:24 UTC (rev 1844)
+++ vendor/cxml/sax-handler.lisp 2006-02-18 09:34:15 UTC (rev 1845)
@@ -0,0 +1,354 @@
+;;; -*- Mode: Lisp; Syntax: Common-Lisp; Package: SAX; readtable: runes; Encoding: utf-8; -*-
+;;; ---------------------------------------------------------------------------
+;;; Title: A SAX2-like API for the xml parser
+;;; Created: 2003-06-30
+;;; Author: Henrik Motakef <hmot at henrik-motakef.de>
+;;; Author: David Lichteblau (DTD-related changes)
+;;; License: BSD
+;;; ---------------------------------------------------------------------------
+;;; © copyright 2003 by Henrik Motakef
+;;; © copyright 2004 knowledgeTools Int. GmbH
+
+;;; Redistribution and use in source and binary forms, with or without
+;;; modification, are permitted provided that the following conditions are
+;;; met:
+;;;
+;;; 1. Redistributions of source code must retain the above copyright
+;;; notice, this list of conditions and the following disclaimer.
+;;;
+;;; 2. Redistributions in binary form must reproduce the above copyright
+;;; notice, this list of conditions and the following disclaimer in the
+;;; documentation and/or other materials provided with the distribution
+;;;
+;;; THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+;;; WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+;;; MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+;;; IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+;;; INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+;;; (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+;;; SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+;;; HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+;;; STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+;;; IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+;;; POSSIBILITY OF SUCH DAMAGE.
+
+;;; TODO/ Open Questions:
+
+;; o Should there be a predefined "handler" class, or even several
+;; (like Java SAX' ContentHandler, DTDHandler, LexicalHandler etc? I
+;; don't really see why.
+;; o Missing stuff from Java SAX2:
+;; * ignorable-whitespace
+;; * document-locator/(setf document-locator)
+;; (probably implies a handler class with an appropriate slot)
+;; * skipped-entity
+;; * The whole ErrorHandler class, this is better handled using
+;; conditions (but isn't yet)
+;; * The LexicalHandler (start-cdata etc) would be nice [-- partly done]
+
+(defpackage :sax
+ (:use :common-lisp)
+ (:export #:*namespace-processing*
+ #:*include-xmlns-attributes*
+ #:*use-xmlns-namespace*
+
+ #:make-attribute
+ #:find-attribute
+ #:find-attribute-ns
+ #:attribute-namespace-uri
+ #:attribute-local-name
+ #:attribute-qname
+ #:attribute-value
+ #:attribute-specified-p
+
+ #:start-document
+ #:start-prefix-mapping
+ #:start-element
+ #:characters
+ #:processing-instruction
+ #:end-element
+ #:end-prefix-mapping
+ #:end-document
+ #:comment
+ #:start-cdata
+ #:end-cdata
+ #:start-dtd
+ #:end-dtd
+ #:start-internal-subset
+ #:end-internal-subset
+ #:unparsed-entity-declaration
+ #:external-entity-declaration
+ #:internal-entity-declaration
+ #:notation-declaration
+ #:element-declaration
+ #:attribute-declaration
+ #:entity-resolver))
+
+(in-package :sax)
+
+;; The http://xml.org/sax/features/namespaces property
+(defvar *namespace-processing* t
+ "If non-nil (the default), namespace processing is enabled.
+
+See also `start-element' and `end-element' for a detailed description
+of the consequences of modifying this variable, and
+`*include-xmlns-attributes*' and `*use-xmlns-namespace*' for further
+related options.")
+
+;; The http://xml.org/sax/features/namespace-prefixes property.
+(defvar *include-xmlns-attributes* t
+ "If non-nil, namespace declarations are reported as normal
+attributes.
+
+This variable has no effect unless `*namespace-processing*' is
+non-nil.
+
+See also `*use-xmlns-namespace*', and `start-element' for a detailed
+description of the consequences of setting this variable.")
+
+(defvar *use-xmlns-namespace* t
+ "If this variable is nil (the default), attributes with a name like
+'xmlns:x' are not considered to be in a namespace, following the
+'Namespaces in XML' specification.
+
+If it is non-nil, such attributes are considered to be in a namespace
+with the URI 'http://www.w3.org/2000/xmlns/', following an
+incompatible change silently introduced in the errata to that spec,
+and adopted by some W3C standards.
+
+For example, an attribute like xmlns:ex='http://example.com' would be
+reported like this:
+
+*use-xmlns-namespace*: nil
+namespace-uri: nil
+local-name: nil
+qname: #\"xmlns:ex\"
+
+*use-xmlns-namespace*: t
+namespace-uri: #\"http://www.w3.org/2000/xmlns/\"
+local-name: #\"ex\"
+qname: #\"xmlns:ex\"
+
+Setting this variable has no effect unless both
+`*namespace-processing*' and `*include-xmlns-attributes*' are non-nil.")
+
+(defstruct attribute
+ namespace-uri
+ local-name
+ qname
+ value
+ specified-p)
+
+(defun %rod= (x y)
+ ;; allow rods *and* strings *and* null
+ (cond
+ ((zerop (length x)) (zerop (length y)))
+ ((zerop (length y)) nil)
+ ((stringp x) (string= x y))
+ (t (runes:rod= x y))))
+
+(defun find-attribute (qname attrs)
+ (find qname attrs :key #'attribute-qname :test #'%rod=))
+
+(defun find-attribute-ns (uri lname attrs)
+ (find-if (lambda (attr)
+ (and (%rod= uri (sax:attribute-namespace-uri attr))
+ (%rod= lname (sax:attribute-local-name attr))))
+ attrs))
+
+(defgeneric start-document (handler)
+ (:documentation "Called at the beginning of the parsing process,
+before any element, processing instruction or comment is reported.
+
+Handlers that need to maintain internal state may use this to perform
+any neccessary initializations.")
+ (:method ((handler t)) nil))
+
+(defgeneric start-element (handler namespace-uri local-name qname attributes)
+ (:documentation "Called to report the beginning of an element.
+
+There will always be a corresponding call to end-element, even in the
+case of an empty element (i.e. <foo/>).
+
+If the value of *namespaces* is non-nil, namespace-uri, local-name and
+qname are rods. If it is nil, namespace-uri and local-name are always
+nil, and it is not an error if the qname is not a well-formed
+qualified element name (for example, if it contains more than one
+colon).
+
+The attributes parameter is a list (in arbitrary order) of instances
+of the `attribute' structure class. The for their namespace-uri and
+local-name properties, the same rules as for the element name
+apply. Additionally, namespace-declaring attributes (those whose name
+is \"xmlns\" or starts with \"xmlns:\") are only included if
+*namespace-prefixes* is non-nil.")
+ (:method ((handler t) namespace-uri local-name qname attributes)
+ (declare (ignore namespace-uri local-name qname attributes))
+ nil))
+
+(defgeneric start-prefix-mapping (handler prefix uri)
+ (:documentation "Called when the scope of a new prefix -> namespace-uri mapping begins.
+
+This will always be called immediatly before the `start-element' event
+for the element on which the namespaces are declared.
+
+Clients don't usually have to implement this except under special
+circumstances, for example when they have to deal with qualified names
+in textual content. The parser will handle namespaces of elements and
+attributes on its own.")
+ (:method ((handler t) prefix uri) (declare (ignore prefix uri)) nil))
+
+(defgeneric characters (handler data)
+ (:documentation "Called for textual element content.
+
+The data is passed as a rod, with all entity references resolved.
+It is possible that the character content of an element is reported
+via multiple subsequent calls to this generic function.")
+ (:method ((handler t) data) (declare (ignore data)) nil))
+
+(defgeneric processing-instruction (handler target data)
+ (:documentation "Called when a processing instruction is read.
+
+Both target and data are rods.")
+ (:method ((handler t) target data) (declare (ignore target data)) nil))
+
+(defgeneric end-prefix-mapping (handler prefix)
+ (:documentation "Called when a prefix -> namespace-uri mapping goes out of scope.
+
+This will always be called immediatly after the `end-element' event
+for the element on which the namespace is declared. The order of the
+end-prefix-mapping events is otherwise not guaranteed.
+
+Clients don't usually have to implement this except under special
+circumstances, for example when they have to deal with qualified names
+in textual content. The parser will handle namespaces of elements and
+attributes on its own.")
+ (:method ((handler t) prefix) prefix nil))
+
+(defgeneric end-element (handler namespace-uri local-name qname)
+ (:documentation "Called to report the end of an element.
+
+See the documentation for `start-element' for a description of the
+parameters.")
+ (:method ((handler t) namespace-uri local-name qname)
+ (declare (ignore namespace-uri local-name qname))
+ nil))
+
+(defgeneric end-document (handler)
+ (:documentation "Called at the end of parsing a document.
+This is always the last function called in the parsing process.
+
+In contrast to all of the other methods, the return value of this gf
+is significant, it will be returned by the parse-file/stream/string function.")
+ (:method ((handler t)) nil))
+
+;; LexicalHandler
+
+(defgeneric comment (handler data)
+ (:method ((handler t) data) data nil))
+
+(defgeneric start-cdata (handler)
+ (:documentation "Called at the beginning of parsing a CDATA section.
+
+Handlers only have to implement this if they are interested in the
+lexical structure of the parsed document. The content of the CDATA
+section is reported via the `characters' generic function like all
+other textual content.")
+ (:method ((handler t)) nil))
+
+(defgeneric end-cdata (handler)
+ (:documentation "Called at the end of parsing a CDATA section.
+
+Handlers only have to implement this if they are interested in the
+lexical structure of the parsed document. The content of the CDATA
+section is reported via the `characters' generic function like all
+other textual content.")
+ (:method ((handler t)) nil))
+
+(defgeneric start-dtd (handler name public-id system-id)
+ (:documentation "Called at the beginning of parsing a DTD.")
+ (:method ((handler t) name public-id system-id)
+ (declare (ignore name public-id system-id))
+ nil))
+
+(defgeneric end-dtd (handler)
+ (:documentation "Called at the end of parsing a DTD.")
+ (:method ((handler t)) nil))
+
+(defgeneric start-internal-subset (handler)
+ (:documentation "Reports that an internal subset is present. Called before
+any definition from the internal subset is reported.")
+ (:method ((handler t)) nil))
+
+(defgeneric end-internal-subset (handler)
+ (:documentation "Called after processing of the internal subset has
+finished, if present.")
+ (:method ((handler t)) nil))
+
+(defgeneric unparsed-entity-declaration
+ (handler name public-id system-id notation-name)
+ (:documentation
+ "Called when an unparsed entity declaration is seen in a DTD.")
+ (:method ((handler t) name public-id system-id notation-name)
+ (declare (ignore name public-id system-id notation-name))
+ nil))
+
+(defgeneric external-entity-declaration
+ (handler kind name public-id system-id)
+ (:documentation
+ "Called when a parsed external entity declaration is seen in a DTD.")
+ (:method ((handler t) kind name public-id system-id)
+ (declare (ignore kind name public-id system-id))
+ nil))
+
+(defgeneric internal-entity-declaration
+ (handler kind name value)
+ (:documentation
+ "Called when an internal entity declaration is seen in a DTD.")
+ (:method ((handler t) kind name value)
+ (declare (ignore kind name value))
+ nil))
+
+(defgeneric notation-declaration
+ (handler name public-id system-id)
+ (:documentation
+ "Called when a notation declaration is seen while parsing a DTD.")
+ (:method ((handler t) name public-id system-id)
+ (declare (ignore name public-id system-id))
+ nil))
+
+(defgeneric element-declaration (handler name model)
+ (:documentation
+ "Called when a element declaration is seen in a DTD. Model is not a string,
+ but a nested list, with *, ?, +, OR, and AND being the operators, rods
+ as names, :EMPTY and :PCDATA as special tokens. (AND represents
+ sequences.)")
+ (:method ((handler t) name model)
+ (declare (ignore name model))
+ nil))
+
+(defgeneric attribute-declaration
+ (handler element-name attribute-name type default)
+ (:documentation
+ "Called when an attribute declaration is seen in a DTD.
+ type one of :CDATA, :ID, :IDREF, :IDREFS,
+ :ENTITY, :ENTITIES, :NMTOKEN, :NMTOKENS,
+ (:NOTATION <name>*), or (:ENUMERATION <name>*)
+ default :REQUIRED, :IMPLIED, (:FIXED content), or (:DEFAULT content)")
+ (:method ((handler t) element-name attribute-name type value)
+ (declare (ignore element-name attribute-name type value))
+ nil))
+
+(defgeneric entity-resolver
+ (handler resolver)
+ (:documentation
+ "Called between sax:end-dtd and sax:end-document to register an entity
+ resolver, a function of two arguments: An entity name and SAX handler.
+ When called, the resolver function will parse the named entity's data.")
+ (:method ((handler t) resolver)
+ (declare (ignore resolver))
+ nil))
+
+;; internal for now
+(defgeneric dtd (handler dtd)
+ (:method ((handler t) dtd) (declare (ignore dtd)) nil))
Added: vendor/cxml/sax-proxy.lisp
===================================================================
--- vendor/cxml/sax-proxy.lisp 2006-02-18 09:24:24 UTC (rev 1844)
+++ vendor/cxml/sax-proxy.lisp 2006-02-18 09:34:15 UTC (rev 1845)
@@ -0,0 +1,41 @@
+;;;; sax-proxy.lisp
+;;;;
+;;;; This file is part of the CXML parser, released under Lisp-LGPL.
+;;;; See file COPYING for details.
+;;;;
+;;;; Copyright (c) 2004 David Lichteblau
+;;;; Author: David Lichteblau
+
+(in-package :cxml)
+
+(defclass sax-proxy ()
+ ((chained-handler :initform nil
+ :initarg :chained-handler
+ :accessor proxy-chained-handler)))
+
+(macrolet ((define-proxy-method (name (&rest args))
+ `(defmethod ,name ((handler sax-proxy) , at args)
+ (,name (proxy-chained-handler handler) , at args))))
+ (define-proxy-method sax:start-document ())
+ (define-proxy-method sax:start-element (uri lname qname attributes))
+ (define-proxy-method sax:start-prefix-mapping (prefix uri))
+ (define-proxy-method sax:characters (data))
+ (define-proxy-method sax:processing-instruction (target data))
+ (define-proxy-method sax:end-prefix-mapping (prefix))
+ (define-proxy-method sax:end-element (namespace-uri local-name qname))
+ (define-proxy-method sax:end-document ())
+ (define-proxy-method sax:comment (data))
+ (define-proxy-method sax:start-cdata ())
+ (define-proxy-method sax:end-cdata ())
+ (define-proxy-method sax:start-dtd (name public-id system-id))
+ (define-proxy-method sax:end-dtd ())
+ (define-proxy-method sax:start-internal-subset ())
+ (define-proxy-method sax:end-internal-subset ())
+ (define-proxy-method sax:unparsed-entity-declaration (name pub sys not))
+ (define-proxy-method sax:external-entity-declaration (kind name pub sys))
+ (define-proxy-method sax:internal-entity-declaration (kind name value))
+ (define-proxy-method sax:notation-declaration (name public-id system-id))
+ (define-proxy-method sax:element-declaration (name model))
+ (define-proxy-method sax:attribute-declaration (elt attr type default))
+ (define-proxy-method sax:entity-resolver (resolver))
+ (define-proxy-method sax::dtd (dtd)))
Added: vendor/cxml/sax-tests/CVS/Entries
===================================================================
--- vendor/cxml/sax-tests/CVS/Entries 2006-02-18 09:24:24 UTC (rev 1844)
+++ vendor/cxml/sax-tests/CVS/Entries 2006-02-18 09:34:15 UTC (rev 1845)
@@ -0,0 +1,4 @@
+/event-collecting-handler.lisp/1.1.1.1/Sun Mar 13 18:02:10 2005//
+/package.lisp/1.1.1.1/Sun Mar 13 18:02:10 2005//
+/tests.lisp/1.2/Wed Dec 28 23:18:07 2005//
+D
Added: vendor/cxml/sax-tests/CVS/Repository
===================================================================
--- vendor/cxml/sax-tests/CVS/Repository 2006-02-18 09:24:24 UTC (rev 1844)
+++ vendor/cxml/sax-tests/CVS/Repository 2006-02-18 09:34:15 UTC (rev 1845)
@@ -0,0 +1 @@
+cxml/xml/sax-tests
Added: vendor/cxml/sax-tests/CVS/Root
===================================================================
--- vendor/cxml/sax-tests/CVS/Root 2006-02-18 09:24:24 UTC (rev 1844)
+++ vendor/cxml/sax-tests/CVS/Root 2006-02-18 09:34:15 UTC (rev 1845)
@@ -0,0 +1 @@
+:pserver:anonymous at common-lisp.net:/project/cxml/cvsroot
Added: vendor/cxml/sax-tests/CVS/Template
===================================================================
Added: vendor/cxml/sax-tests/event-collecting-handler.lisp
===================================================================
--- vendor/cxml/sax-tests/event-collecting-handler.lisp 2006-02-18 09:24:24 UTC (rev 1844)
+++ vendor/cxml/sax-tests/event-collecting-handler.lisp 2006-02-18 09:34:15 UTC (rev 1845)
@@ -0,0 +1,37 @@
+(in-package :sax-tests)
+
+(defclass event-collecting-handler ()
+ ((event-list :initform '() :accessor event-list)))
+
+(defmethod start-document ((handler event-collecting-handler))
+ (push (list :start-document) (event-list handler)))
+
+(defmethod start-element ((handler event-collecting-handler) ns-uri local-name qname attrs)
+ (push (list :start-element ns-uri local-name qname attrs)
+ (event-list handler)))
+
+(defmethod start-prefix-mapping ((handler event-collecting-handler) prefix uri)
+ (push (list :start-prefix-mapping prefix uri)
+ (event-list handler)))
+
+(defmethod characters ((handler event-collecting-handler) data)
+ (push (list :characters data)
+ (event-list handler)))
+
+(defmethod processing-instruction ((handler event-collecting-handler) target data)
+ (push (list :processing-instruction target data)
+ (event-list handler)))
+
+(defmethod end-prefix-mapping ((handler event-collecting-handler) prefix)
+ (push (list :end-prefix-mapping prefix)
+ (event-list handler)))
+
+(defmethod end-element ((handler event-collecting-handler) namespace-uri local-name qname)
+ (push (list :end-element namespace-uri local-name qname)
+ (event-list handler)))
+
+(defmethod end-document ((handler event-collecting-handler))
+ (push (list :end-document)
+ (event-list handler))
+
+ (nreverse (event-list handler)))
\ No newline at end of file
Added: vendor/cxml/sax-tests/package.lisp
===================================================================
--- vendor/cxml/sax-tests/package.lisp 2006-02-18 09:24:24 UTC (rev 1844)
+++ vendor/cxml/sax-tests/package.lisp 2006-02-18 09:34:15 UTC (rev 1845)
@@ -0,0 +1,4 @@
+(defpackage :sax-tests
+ (:use :cl :xml :sax :glisp :rt)
+ (:export #:event-collecting-handler))
+
Added: vendor/cxml/sax-tests/tests.lisp
===================================================================
--- vendor/cxml/sax-tests/tests.lisp 2006-02-18 09:24:24 UTC (rev 1844)
+++ vendor/cxml/sax-tests/tests.lisp 2006-02-18 09:34:15 UTC (rev 1845)
@@ -0,0 +1,330 @@
+(in-package :sax-tests)
+
+(defun first-start-element-event (string)
+ (let ((events (cxml:parse-rod string (make-instance 'event-collecting-handler))))
+ (find :start-element events :key #'car)))
+
+
+;;; Attribute handling
+
+(deftest no-default-namespace-for-attributes
+ (let* ((evt (first-start-element-event "<x xmlns='http://example.com' a='b'/>"))
+ (attr (car (fifth evt))))
+ (values
+ (attribute-namespace-uri attr)
+ (attribute-local-name attr)))
+ nil nil)
+
+(deftest attribute-uniqueness-1
+ (handler-case
+ (cxml:parse-rod "<x xmlns:a='http://example.com' xmlns:b='http://example.com' a:a='1' b:a='1'/>")
+ (error () t)
+ (:no-error (&rest junk)
+ (declare (ignore junk))
+ nil))
+ t)
+
+(deftest attribute-uniqueness-2
+ (handler-case
+ (cxml:parse-rod "<x xmlns:a='http://example.com' xmlns='http://example.com' a:a='1' a='1'/>")
+ (error () nil)
+ (:no-error (&rest junk)
+ (declare (ignore junk))
+ t))
+ t)
+
+(deftest attribute-uniqueness-3
+ (let ((sax:*namespace-processing* nil))
+ (handler-case
+ (cxml:parse-rod "<x xmlns:a='http://example.com' xmlns:b='http://example.com' a:a='1' b:a='1'/>")
+ (error () nil)
+ (:no-error (&rest junk)
+ (declare (ignore junk))
+ t)))
+ t)
+
+;;; Namespace undeclaring
+
+(deftest undeclare-default-namespace-1
+ (let* ((evts (cxml:parse-rod "<x xmlns='http://example.com'><y xmlns='' a='1'/></x>"
+ (make-instance 'event-collecting-handler)))
+ (start-elt-events (remove :start-element evts :test (complement #'eql) :key #'car))
+ (evt1 (first start-elt-events))
+ (evt2 (second start-elt-events )))
+ (values
+ (rod= #"http://example.com" (second evt1))
+ (second evt2)
+ (third evt2)))
+ t nil nil)
+
+(deftest undeclare-other-namespace
+ (handler-case
+ (cxml:parse-rod "<x:x xmlns:x='http://example.com'><x:y xmlns:x='' a='1'/></x:x>")
+ (error () t)
+ (:no-error (&rest junk)
+ (declare (ignore junk))
+ nil))
+ t)
+
+
+;;; Require names otherwise totally out of scope of the xmlns rec to be NcNames for no reason
+
+(deftest pi-names-are-ncnames-when-namespace-processing-1
+ (handler-case
+ (cxml:parse-rod "<?a:b c?><x/>")
+ (error () t)
+ (:no-error (&rest junk)
+ (declare (ignore junk))
+ nil))
+ t)
+
+(deftest pi-names-are-ncnames-when-namespace-processing-2
+ (let ((sax:*namespace-processing* nil))
+ (handler-case
+ (cxml:parse-rod "<?a:b c?><x/>")
+ (error () nil)
+ (:no-error (&rest junk)
+ (declare (ignore junk))
+ t)))
+ t)
+
+(deftest entity-names-are-ncnames-when-namespace-processing-1
+ (handler-case
+ (cxml:parse-rod "<!DOCTYPE x [ <!ENTITY y:z 'foo'> ]><x>&y:z;</x>")
+ (error () t)
+ (:no-error (&rest junk)
+ (declare (ignore junk))
+ nil))
+ t)
+
+(deftest entity-names-are-ncnames-when-namespace-processing-2
+ (handler-case
+ (cxml:parse-rod "<!DOCTYPE x [ <!ENTITY y:z 'foo'> ]><x/>")
+ (error () t)
+ (:no-error (&rest junk)
+ (declare (ignore junk))
+ nil))
+ t)
+
+(deftest entity-names-are-ncnames-when-namespace-processing-3
+ (let ((sax:*namespace-processing* nil))
+ (handler-case
+ (cxml:parse-rod "<!DOCTYPE x [ <!ENTITY y:z 'foo'> ]><x>&y:z;</x>")
+ (error () nil)
+ (:no-error (&rest junk)
+ (declare (ignore junk))
+ t)))
+ t)
+
+(deftest entity-names-are-ncnames-when-namespace-processing-4
+ (let ((sax:*namespace-processing* nil))
+ (handler-case
+ (cxml:parse-rod "<!DOCTYPE x [ <!ENTITY y:z 'foo'> ]><x/>")
+ (error () nil)
+ (:no-error (&rest junk)
+ (declare (ignore junk))
+ t)))
+ t)
+
+;;; Inclusion of xmlns attributes
+
+(deftest xmlns-attr-include-1
+ (let* ((sax:*namespace-processing* t)
+ (sax:*include-xmlns-attributes* t)
+ (evt (first-start-element-event "<x xmlns='http://example.com'/>"))
+ (attrs (fifth evt)))
+ (length attrs))
+ 1)
+
+(deftest xmlns-attr-discard-1
+ (let* ((sax:*namespace-processing* t)
+ (sax:*include-xmlns-attributes* nil)
+ (evt (first-start-element-event "<x xmlns='http://example.com'/>"))
+ (attrs (fifth evt)))
+ (length attrs))
+ 0)
+
+;;; Namespace of xmlns attributes
+
+(deftest xmlns-attr-ns-uri-1
+ (let* ((sax:*namespace-processing* t)
+ (sax:*include-xmlns-attributes* t)
+ (sax:*use-xmlns-namespace* nil)
+ (evt (first-start-element-event "<x xmlns='http://example.com'/>"))
+ (attrs (fifth evt)))
+ (attribute-namespace-uri (car attrs)))
+ nil)
+
+(deftest xmlns-attr-ns-uri-2
+ (let* ((sax:*namespace-processing* t)
+ (sax:*include-xmlns-attributes* t)
+ (sax:*use-xmlns-namespace* nil)
+ (evt (first-start-element-event "<x xmlns:foo='http://example.com'/>"))
+ (attrs (fifth evt)))
+ (attribute-namespace-uri (car attrs)))
+ nil)
+
+(deftest xmlns-attr-ns-uri-3
+ (let* ((sax:*namespace-processing* t)
+ (sax:*include-xmlns-attributes* t)
+ (sax:*use-xmlns-namespace* t)
+ (evt (first-start-element-event "<x xmlns='http://example.com'/>"))
+ (attrs (fifth evt)))
+ (attribute-namespace-uri (car attrs)))
+ nil)
+
+(deftest xmlns-attr-ns-uri-4
+ (let* ((sax:*namespace-processing* t)
+ (sax:*include-xmlns-attributes* t)
+ (sax:*use-xmlns-namespace* t)
+ (evt (first-start-element-event "<x xmlns:foo='http://example.com'/>"))
+ (attrs (fifth evt)))
+ (rod= #"http://www.w3.org/2000/xmlns/" (attribute-namespace-uri (car attrs))))
+ t)
+
+(deftest xmlns-attr-ns-local-name-1
+ (let* ((sax:*namespace-processing* t)
+ (sax:*include-xmlns-attributes* t)
+ (sax:*use-xmlns-namespace* nil)
+ (evt (first-start-element-event "<x xmlns='http://example.com'/>"))
+ (attrs (fifth evt)))
+ (attribute-local-name (car attrs)))
+ nil)
+
+(deftest xmlns-attr-ns-local-name-2
+ (let* ((sax:*namespace-processing* t)
+ (sax:*include-xmlns-attributes* t)
+ (sax:*use-xmlns-namespace* nil)
+ (evt (first-start-element-event "<x xmlns:foo='http://example.com'/>"))
+ (attrs (fifth evt)))
+ (attribute-local-name (car attrs)))
+ nil)
+
+(deftest xmlns-attr-ns-local-name-3
+ (let* ((sax:*namespace-processing* t)
+ (sax:*include-xmlns-attributes* t)
+ (sax:*use-xmlns-namespace* t)
+ (evt (first-start-element-event "<x xmlns='http://example.com'/>"))
+ (attrs (fifth evt)))
+ (attribute-local-name (car attrs)))
+ nil)
+
+(deftest xmlns-attr-ns-local-name-4
+ (let* ((sax:*namespace-processing* t)
+ (sax:*include-xmlns-attributes* t)
+ (sax:*use-xmlns-namespace* t)
+ (evt (first-start-element-event "<x xmlns:foo='http://example.com'/>"))
+ (attrs (fifth evt)))
+ (rod= #"foo" (attribute-local-name (car attrs))))
+ t)
+
+(deftest xmlns-attr-qname-1
+ (let* ((sax:*namespace-processing* t)
+ (sax:*include-xmlns-attributes* t)
+ (sax:*use-xmlns-namespace* nil)
+ (evt (first-start-element-event "<x xmlns='http://example.com'/>"))
+ (attrs (fifth evt)))
+ (rod= #"xmlns" (attribute-qname (car attrs))))
+ t)
+
+(deftest xmlns-attr-qname-2
+ (let* ((sax:*namespace-processing* t)
+ (sax:*include-xmlns-attributes* t)
+ (sax:*use-xmlns-namespace* nil)
+ (evt (first-start-element-event "<x xmlns:foo='http://example.com'/>"))
+ (attrs (fifth evt)))
+ (rod= #"xmlns:foo" (attribute-qname (car attrs))))
+ t)
+
+(deftest xmlns-attr-qname-4
+ (let* ((sax:*namespace-processing* t)
+ (sax:*include-xmlns-attributes* t)
+ (sax:*use-xmlns-namespace* t)
+ (evt (first-start-element-event "<x xmlns='http://example.com'/>"))
+ (attrs (fifth evt)))
+ (rod= #"xmlns" (attribute-qname (car attrs))))
+ t)
+
+(deftest xmlns-attr-qname-4
+ (let* ((sax:*namespace-processing* t)
+ (sax:*include-xmlns-attributes* t)
+ (sax:*use-xmlns-namespace* t)
+ (evt (first-start-element-event "<x xmlns:foo='http://example.com'/>"))
+ (attrs (fifth evt)))
+ (rod= #"xmlns:foo" (attribute-qname (car attrs))))
+ t)
+
+
+;;; Predefined Namespaces
+
+(deftest redefine-xml-namespace-1
+ (handler-case
+ (cxml:parse-rod "<x xmlns:xml='http://www.w3.org/XML/1998/namespace'/>")
+ (error () nil)
+ (:no-error (&rest junk)
+ (declare (ignore junk))
+ t))
+ t)
+
+(deftest redefine-xml-namespace-2
+ (handler-case
+ (cxml:parse-rod "<x xmlns:xml='http://example.com/wrong-uri'/>")
+ (error () t)
+ (:no-error (&rest junk)
+ (declare (ignore junk))
+ nil))
+ t)
+
+(deftest redefine-xml-namespace-3
+ (handler-case
+ (cxml:parse-rod "<x xmlns:wrong='http://www.w3.org/XML/1998/namespace'/>")
+ (error () t)
+ (:no-error (&rest junk)
+ (declare (ignore junk))
+ nil))
+ t)
+
+(deftest redefine-xml-namespace-4
+ (handler-case
+ (cxml:parse-rod "<x xmlns:wrong='http://www.w3.org/XML/1998/namespace'/>")
+ (error () t)
+ (:no-error (&rest junk)
+ (declare (ignore junk))
+ nil))
+ t)
+
+(deftest redefine-xmlns-namespace-1
+ (handler-case
+ (cxml:parse-rod "<x xmlns:xmlns='http://www.w3.org/2000/xmlns/'/>")
+ (error () t)
+ (:no-error (&rest junk)
+ (declare (ignore junk))
+ nil))
+ t)
+
+(deftest redefine-xmlns-namespace-2
+ (handler-case
+ (cxml:parse-rod "<x xmlns:xmlns='http://example.com/wrong-ns'/>")
+ (error () t)
+ (:no-error (&rest junk)
+ (declare (ignore junk))
+ nil))
+ t)
+
+(deftest redefine-xmlns-namespace-3
+ (handler-case
+ (cxml:parse-rod "<x xmlns:wrong='http://www.w3.org/2000/xmlns/'/>")
+ (error () t)
+ (:no-error (&rest junk)
+ (declare (ignore junk))
+ nil))
+ t)
+
+(deftest redefine-xmlns-namespace-4
+ (handler-case
+ (cxml:parse-rod "<x xmlns='http://www.w3.org/2000/xmlns/'/>")
+ (error () t)
+ (:no-error (&rest junk)
+ (declare (ignore junk))
+ nil))
+ t)
Added: vendor/cxml/space-normalizer.lisp
===================================================================
--- vendor/cxml/space-normalizer.lisp 2006-02-18 09:24:24 UTC (rev 1844)
+++ vendor/cxml/space-normalizer.lisp 2006-02-18 09:34:15 UTC (rev 1845)
@@ -0,0 +1,62 @@
+;;;; space-normalizer.lisp -- whitespace removal
+;;;;
+;;;; This file is part of the CXML parser, released under Lisp-LGPL.
+;;;; See file COPYING for details.
+;;;;
+;;;; Copyright (c) 2005 David Lichteblau
+
+(in-package :cxml)
+
+(defclass whitespace-normalizer (sax-proxy)
+ ((attributes :initform '(t) :accessor xml-space-attributes)
+ (models :initform nil :accessor xml-space-models)
+ (dtd :initarg :dtd :accessor xml-space-dtd)))
+
+(defun make-whitespace-normalizer (chained-handler &optional dtd)
+ (make-instance 'whitespace-normalizer
+ :dtd dtd
+ :chained-handler chained-handler))
+
+(defmethod sax::dtd ((handler whitespace-normalizer) dtd)
+ (unless (xml-space-dtd handler)
+ (setf (xml-space-dtd handler) dtd)))
+
+(defmethod sax:start-element
+ ((handler whitespace-normalizer) uri lname qname attrs)
+ (declare (ignore uri lname))
+ (let ((dtd (xml-space-dtd handler)))
+ (when dtd
+ (let ((xml-space
+ (sax:find-attribute (if (stringp qname) "xml:space" #"xml:space")
+ attrs)))
+ (push (if xml-space
+ (rod= (rod (sax:attribute-value xml-space)) #"default")
+ (car (xml-space-attributes handler)))
+ (xml-space-attributes handler)))
+ (let* ((e (cxml::find-element (rod qname) dtd))
+ (cspec (when e (cxml::elmdef-content e))))
+ (push (and (consp cspec)
+ (not (and (eq (car cspec) '*)
+ (let ((subspec (second cspec)))
+ (and (eq (car subspec) 'or)
+ (eq (cadr subspec) :PCDATA))))))
+ (xml-space-models handler)))))
+ (call-next-method))
+
+(defmethod sax:characters ((handler whitespace-normalizer) data)
+ (cond
+ ((and (xml-space-dtd handler)
+ (car (xml-space-attributes handler))
+ (car (xml-space-models handler)))
+ (unless (every #'white-space-rune-p (rod data))
+ (warn "non-whitespace character data in element content")
+ (call-next-method)))
+ (t
+ (call-next-method))))
+
+(defmethod sax:end-element ((handler whitespace-normalizer) uri lname qname)
+ (declare (ignore uri lname qname))
+ (when (xml-space-dtd handler)
+ (pop (xml-space-attributes handler))
+ (pop (xml-space-models handler)))
+ (call-next-method))
Added: vendor/cxml/split-sequence.lisp
===================================================================
--- vendor/cxml/split-sequence.lisp 2006-02-18 09:24:24 UTC (rev 1844)
+++ vendor/cxml/split-sequence.lisp 2006-02-18 09:34:15 UTC (rev 1845)
@@ -0,0 +1,44 @@
+;;; This code was based on Arthur Lemmens' in
+;;; <URL:http://groups.google.com/groups?as_umsgid=39F36F1A.B8F19D20%40simplex.nl>;
+
+(in-package :cxml)
+
+(defun split-sequence-if (predicate seq &key (count nil) (remove-empty-subseqs nil) (from-end nil) (start 0) (end nil) (key nil key-supplied))
+ (let ((len (length seq))
+ (other-keys (when key-supplied
+ (list :key key))))
+ (unless end (setq end len))
+ (if from-end
+ (loop for right = end then left
+ for left = (max (or (apply #'position-if predicate seq
+ :end right
+ :from-end t
+ other-keys)
+ -1)
+ (1- start))
+ unless (and (= right (1+ left))
+ remove-empty-subseqs) ; empty subseq we don't want
+ if (and count (>= nr-elts count))
+ ;; We can't take any more. Return now.
+ return (values (nreverse subseqs) right)
+ else
+ collect (subseq seq (1+ left) right) into subseqs
+ and sum 1 into nr-elts
+ until (< left start)
+ finally (return (values (nreverse subseqs) (1+ left))))
+ (loop for left = start then (+ right 1)
+ for right = (min (or (apply #'position-if predicate seq
+ :start left
+ other-keys)
+ len)
+ end)
+ unless (and (= right left)
+ remove-empty-subseqs) ; empty subseq we don't want
+ if (and count (>= nr-elts count))
+ ;; We can't take any more. Return now.
+ return (values subseqs left)
+ else
+ collect (subseq seq left right) into subseqs
+ and sum 1 into nr-elts
+ until (>= right end)
+ finally (return (values subseqs right))))))
Added: vendor/cxml/unparse.lisp
===================================================================
--- vendor/cxml/unparse.lisp 2006-02-18 09:24:24 UTC (rev 1844)
+++ vendor/cxml/unparse.lisp 2006-02-18 09:34:15 UTC (rev 1845)
@@ -0,0 +1,569 @@
+;;; -*- Mode: Lisp; Syntax: Common-Lisp; Package: CXML; readtable: runes; Encoding: utf-8; -*-
+;;; ---------------------------------------------------------------------------
+;;; Title: Unparse XML
+;;; Title: (including support for canonic XML according to J.Clark)
+;;; Created: 1999-09-09
+;;; Author: Gilbert Baumann <unk6 at rz.uni-karlsruhe.de>
+;;; Author: David Lichteblau <david at lichteblau.com>
+;;; License: Lisp-LGPL (See file COPYING for details).
+;;; ---------------------------------------------------------------------------
+;;; © copyright 1999 by Gilbert Baumann
+;;; © copyright 2004 by knowledgeTools Int. GmbH
+;;; © copyright 2004 by David Lichteblau (for headcraft.de)
+
+;;; This library is free software; you can redistribute it and/or
+;;; modify it under the terms of the GNU Library General Public
+;;; License as published by the Free Software Foundation; either
+;;; version 2 of the License, or (at your option) any later version.
+;;;
+;;; This library is distributed in the hope that it will be useful,
+;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+;;; Library General Public License for more details.
+;;;
+;;; You should have received a copy of the GNU Library General Public
+;;; License along with this library; if not, write to the
+;;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+;;; Boston, MA 02111-1307 USA.
+
+(in-package :cxml)
+
+;;
+;; | Canonical XML
+;; | =============
+;; |
+;; | This document defines a subset of XML called canonical XML. The
+;; | intended use of canonical XML is in testing XML processors, as a
+;; | representation of the result of parsing an XML document.
+;; |
+;; | Every well-formed XML document has a unique structurally equivalent
+;; | canonical XML document. Two structurally equivalent XML documents have
+;; | a byte-for-byte identical canonical XML document. Canonicalizing an
+;; | XML document requires only information that an XML processor is
+;; | required to make available to an application.
+;; |
+;; | A canonical XML document conforms to the following grammar:
+;; |
+;; | CanonXML ::= Pi* element Pi*
+;; | element ::= Stag (Datachar | Pi | element)* Etag
+;; | Stag ::= '<' Name Atts '>'
+;; | Etag ::= '</' Name '>'
+;; | Pi ::= '<?' Name ' ' (((Char - S) Char*)? - (Char* '?>' Char*)) '?>'
+;; | Atts ::= (' ' Name '=' '"' Datachar* '"')*
+;; | Datachar ::= '&' | '<' | '>' | '"'
+;; | | ' '| '
'| '
'
+;; | | (Char - ('&' | '<' | '>' | '"' | #x9 | #xA | #xD))
+;; | Name ::= (see XML spec)
+;; | Char ::= (see XML spec)
+;; | S ::= (see XML spec)
+;; |
+;; | Attributes are in lexicographical order (in Unicode bit order).
+;; |
+;; | A canonical XML document is encoded in UTF-8.
+;; |
+;; | Ignorable white space is considered significant and is treated
+;; | equivalently to data.
+;;
+;; -- James Clark (jjc at jclark.com)
+
+
+;;;; SINK: an xml output sink
+
+(defclass sink ()
+ ((ystream :initarg :ystream :accessor sink-ystream)
+ (width :initform 79 :initarg :width :accessor width)
+ (canonical :initform t :initarg :canonical :accessor canonical)
+ (indentation :initform nil :initarg :indentation :accessor indentation)
+ (current-indentation :initform 0 :accessor current-indentation)
+ (notations :initform (make-buffer :element-type t) :accessor notations)
+ (name-for-dtd :accessor name-for-dtd)
+ (previous-notation :initform nil :accessor previous-notation)
+ (have-doctype :initform nil :accessor have-doctype)
+ (stack :initform nil :accessor stack)))
+
+(defmethod initialize-instance :after ((instance sink) &key)
+ (when (eq (canonical instance) t)
+ (setf (canonical instance) 1))
+ (unless (member (canonical instance) '(nil 1 2))
+ (error "Invalid canonical form: ~A" (canonical instance)))
+ (when (and (canonical instance) (indentation instance))
+ (error "Cannot indent XML in canonical mode")))
+
+(defun make-buffer (&key (element-type '(unsigned-byte 8)))
+ (make-array 1
+ :element-type element-type
+ :adjustable t
+ :fill-pointer 0))
+
+;; total haesslich, aber die ystreams will ich im moment eigentlich nicht
+;; dokumentieren
+(macrolet ((define-maker (make-sink make-ystream &rest args)
+ `(defun ,make-sink (, at args &rest initargs)
+ (apply #'make-instance
+ 'sink
+ :ystream (,make-ystream , at args)
+ initargs))))
+ (define-maker make-octet-vector-sink make-octet-vector-ystream)
+ (define-maker make-octet-stream-sink make-octet-stream-ystream stream)
+ (define-maker make-rod-sink make-rod-ystream)
+
+ #+rune-is-character
+ (define-maker make-character-stream-sink make-character-stream-ystream stream)
+
+ #-rune-is-character
+ (define-maker make-string-sink/utf8 make-string-ystream/utf8)
+
+ #-rune-is-character
+ (define-maker make-character-stream-sink/utf8
+ make-character-stream-ystream/utf8
+ stream))
+
+#+rune-is-character
+(defun make-string-sink (&rest args) (apply #'make-rod-sink args))
+
+
+(defmethod sax:end-document ((sink sink))
+ (close-ystream (sink-ystream sink)))
+
+
+;;;; doctype and notations
+
+(defmethod sax:start-document ((sink sink))
+ (unless (canonical sink)
+ (%write-rod #"<?xml version=\"1.0\" encoding=\"UTF-8\"?>" sink)
+ (%write-rune #/U+000A sink)))
+
+(defmethod sax:start-dtd ((sink sink) name public-id system-id)
+ (setf (name-for-dtd sink) name)
+ (unless (canonical sink)
+ (ensure-doctype sink public-id system-id)))
+
+(defun ensure-doctype (sink &optional public-id system-id)
+ (unless (have-doctype sink)
+ (setf (have-doctype sink) t)
+ (%write-rod #"<!DOCTYPE " sink)
+ (%write-rod (name-for-dtd sink) sink)
+ (cond
+ (public-id
+ (%write-rod #" PUBLIC \"" sink)
+ (unparse-string public-id sink)
+ (%write-rod #"\" \"" sink)
+ (unparse-string system-id sink)
+ (%write-rod #"\"" sink))
+ (system-id
+ (%write-rod #" SYSTEM \"" sink)
+ (unparse-string public-id sink)
+ (%write-rod #"\"" sink)))))
+
+(defmethod sax:start-internal-subset ((sink sink))
+ (ensure-doctype sink)
+ (%write-rod #" [" sink)
+ (%write-rune #/U+000A sink))
+
+(defmethod sax:end-internal-subset ((sink sink))
+ (ensure-doctype sink)
+ (%write-rod #"]" sink))
+
+(defmethod sax:notation-declaration ((sink sink) name public-id system-id)
+ (let ((prev (previous-notation sink)))
+ (when (and (and (canonical sink) (>= (canonical sink) 2))
+ prev
+ (not (rod< prev name)))
+ (error "misordered notations; cannot unparse canonically"))
+ (setf (previous-notation sink) name))
+ (%write-rod #"<!NOTATION " sink)
+ (%write-rod name sink)
+ (cond
+ ((zerop (length public-id))
+ (%write-rod #" SYSTEM '" sink)
+ (%write-rod system-id sink)
+ (%write-rune #/' sink))
+ ((zerop (length system-id))
+ (%write-rod #" PUBLIC '" sink)
+ (%write-rod public-id sink)
+ (%write-rune #/' sink))
+ (t
+ (%write-rod #" PUBLIC '" sink)
+ (%write-rod public-id sink)
+ (%write-rod #"' '" sink)
+ (%write-rod system-id sink)
+ (%write-rune #/' sink)))
+ (%write-rune #/> sink)
+ (%write-rune #/U+000A sink))
+
+(defmethod sax:unparsed-entity-declaration
+ ((sink sink) name public-id system-id notation-name)
+ (unless (and (canonical sink) (< (canonical sink) 3))
+ (%write-rod #"<!ENTITY " sink)
+ (%write-rod name sink)
+ (cond
+ ((zerop (length public-id))
+ (%write-rod #" SYSTEM '" sink)
+ (%write-rod system-id sink)
+ (%write-rune #/' sink))
+ ((zerop (length system-id))
+ (%write-rod #" PUBLIC '" sink)
+ (%write-rod public-id sink)
+ (%write-rune #/' sink))
+ (t
+ (%write-rod #" PUBLIC '" sink)
+ (%write-rod public-id sink)
+ (%write-rod #"' '" sink)
+ (%write-rod system-id sink)
+ (%write-rune #/' sink)))
+ (%write-rod #" NDATA " sink)
+ (%write-rod notation-name sink)
+ (%write-rune #/> sink)
+ (%write-rune #/U+000A sink)))
+
+(defmethod sax:external-entity-declaration
+ ((sink sink) kind name public-id system-id)
+ (when (canonical sink)
+ (error "cannot serialize parsed entities in canonical mode"))
+ (%write-rod #"<!ENTITY " sink)
+ (when (eq kind :parameter)
+ (%write-rod #" % " sink))
+ (%write-rod name sink)
+ (cond
+ ((zerop (length public-id))
+ (%write-rod #" SYSTEM '" sink)
+ (%write-rod system-id sink)
+ (%write-rune #/' sink))
+ ((zerop (length system-id))
+ (%write-rod #" PUBLIC '" sink)
+ (%write-rod public-id sink)
+ (%write-rune #/' sink))
+ (t
+ (%write-rod #" PUBLIC '" sink)
+ (%write-rod public-id sink)
+ (%write-rod #"' '" sink)
+ (%write-rod system-id sink)
+ (%write-rune #/' sink)))
+ (%write-rune #/> sink)
+ (%write-rune #/U+000A sink))
+
+(defmethod sax:internal-entity-declaration ((sink sink) kind name value)
+ (when (canonical sink)
+ (error "cannot serialize parsed entities in canonical mode"))
+ (%write-rod #"<!ENTITY " sink)
+ (when (eq kind :parameter)
+ (%write-rod #" % " sink))
+ (%write-rod name sink)
+ (%write-rune #/U+0020 sink)
+ (%write-rune #/\" sink)
+ (unparse-string value sink)
+ (%write-rune #/\" sink)
+ (%write-rune #/> sink)
+ (%write-rune #/U+000A sink))
+
+(defmethod sax:element-declaration ((sink sink) name model)
+ (when (canonical sink)
+ (error "cannot serialize element type declarations in canonical mode"))
+ (%write-rod #"<!ELEMENT " sink)
+ (%write-rod name sink)
+ (%write-rune #/U+0020 sink)
+ (labels ((walk (m)
+ (cond
+ ((eq m :EMPTY)
+ (%write-rod "EMPTY" sink))
+ ((eq m :PCDATA)
+ (%write-rod "#PCDATA" sink))
+ ((atom m)
+ (unparse-string m sink))
+ (t
+ (ecase (car m)
+ (and
+ (%write-rune #/\( sink)
+ (loop for (n . rest) on (cdr m) do
+ (walk n)
+ (when rest
+ (%write-rune #\, sink)))
+ (%write-rune #/\) sink))
+ (or
+ (%write-rune #/\( sink)
+ (loop for (n . rest) on (cdr m) do
+ (walk n)
+ (when rest
+ (%write-rune #\| sink)))
+ (%write-rune #/\) sink))
+ (*
+ (walk (second m))
+ (%write-rod #/* sink))
+ (+
+ (walk (second m))
+ (%write-rod #/+ sink))
+ (?
+ (walk (second m))
+ (%write-rod #/? sink)))))))
+ (walk model))
+ (%write-rune #/> sink)
+ (%write-rune #/U+000A sink))
+
+(defmethod sax:attribute-declaration ((sink sink) ename aname type default)
+ (when (canonical sink)
+ (error "cannot serialize attribute type declarations in canonical mode"))
+ (%write-rod #"<!ATTLIST " sink)
+ (%write-rod ename sink)
+ (%write-rune #/U+0020 sink)
+ (%write-rod aname sink)
+ (%write-rune #/U+0020 sink)
+ (cond
+ ((atom type)
+ (%write-rod (rod (string-upcase (symbol-name type))) sink))
+ (t
+ (when (eq :NOTATION (car type))
+ (%write-rod #"NOTATION " sink))
+ (%write-rune #/\( sink)
+ (loop for (n . rest) on (cdr type) do
+ (%write-rod n sink)
+ (when rest
+ (%write-rune #\| sink)))
+ (%write-rune #/\) sink)))
+ (cond
+ ((atom default)
+ (%write-rune #/# sink)
+ (%write-rod (rod (string-upcase (symbol-name default))) sink))
+ (t
+ (when (eq :FIXED (car default))
+ (%write-rod #"#FIXED " sink))
+ (%write-rune #/\" sink)
+ (unparse-string (second default) sink)
+ (%write-rune #/\" sink)))
+ (%write-rune #/> sink)
+ (%write-rune #/U+000A sink))
+
+(defmethod sax:end-dtd ((sink sink))
+ (when (have-doctype sink)
+ (%write-rod #">" sink)
+ (%write-rune #/U+000A sink)))
+
+
+;;;; elements
+
+(defstruct (tag (:constructor make-tag (name)))
+ name
+ (n-children 0)
+ (have-gt nil))
+
+(defun sink-fresh-line (sink)
+ (unless (zerop (ystream-column (sink-ystream sink)))
+ (%write-rune #/U+000A sink) ;newline
+ (indent sink)))
+
+(defun maybe-close-tag (sink)
+ (let ((tag (car (stack sink))))
+ (when (and (tag-p tag) (not (tag-have-gt tag)))
+ (setf (tag-have-gt tag) t)
+ (%write-rune #/> sink))))
+
+(defmethod sax:start-element
+ ((sink sink) namespace-uri local-name qname attributes)
+ (declare (ignore namespace-uri local-name))
+ (maybe-close-tag sink)
+ (when (stack sink)
+ (incf (tag-n-children (first (stack sink)))))
+ (push (make-tag qname) (stack sink))
+ (when (indentation sink)
+ (sink-fresh-line sink)
+ (start-indentation-block sink))
+ (%write-rune #/< sink)
+ (%write-rod qname sink)
+ (let ((atts (sort (copy-list attributes) #'rod< :key #'sax:attribute-qname)))
+ (dolist (a atts)
+ (%write-rune #/space sink)
+ (%write-rod (sax:attribute-qname a) sink)
+ (%write-rune #/= sink)
+ (%write-rune #/\" sink)
+ (unparse-string (sax:attribute-value a) sink)
+ (%write-rune #/\" sink)))
+ (when (canonical sink)
+ (maybe-close-tag sink)))
+
+(defmethod sax:end-element
+ ((sink sink) namespace-uri local-name qname)
+ (declare (ignore namespace-uri local-name))
+ (let ((tag (pop (stack sink))))
+ (unless (tag-p tag)
+ (error "output does not nest: not in an element"))
+ (unless (rod= (tag-name tag) qname)
+ (error "output does not nest: expected ~A but got ~A"
+ (rod qname) (rod (tag-name tag))))
+ (when (indentation sink)
+ (end-indentation-block sink)
+ (unless (zerop (tag-n-children tag))
+ (sink-fresh-line sink)))
+ (cond
+ ((tag-have-gt tag)
+ (%write-rod '#.(string-rod "</") sink)
+ (%write-rod qname sink)
+ (%write-rod '#.(string-rod ">") sink))
+ (t
+ (%write-rod #"/>" sink)))))
+
+(defmethod sax:processing-instruction ((sink sink) target data)
+ (maybe-close-tag sink)
+ (unless (rod-equal target '#.(string-rod "xml"))
+ (%write-rod '#.(string-rod "<?") sink)
+ (%write-rod target sink)
+ (when data
+ (%write-rune #/space sink)
+ (%write-rod data sink))
+ (%write-rod '#.(string-rod "?>") sink)))
+
+(defmethod sax:start-cdata ((sink sink))
+ (maybe-close-tag sink)
+ (push :cdata (stack sink)))
+
+(defmethod sax:characters ((sink sink) data)
+ (maybe-close-tag sink)
+ (cond
+ ((and (eq (car (stack sink)) :cdata)
+ (not (canonical sink))
+ (not (search #"]]" data)))
+ (when (indentation sink)
+ (sink-fresh-line sink))
+ (%write-rod #"<![CDATA[" sink)
+ ;; XXX signal error if body is unprintable?
+ (map nil (lambda (c) (%write-rune c sink)) data)
+ (%write-rod #"]]>" sink))
+ (t
+ (if (indentation sink)
+ (unparse-indented-text data sink)
+ (let ((y (sink-ystream sink)))
+ (if (canonical sink)
+ (loop for c across data do (unparse-datachar c y))
+ (loop for c across data do (unparse-datachar-readable c y))))))))
+
+(defmethod sax:end-cdata ((sink sink))
+ (unless (eq (pop (stack sink)) :cdata)
+ (error "output does not nest: not in a cdata section")))
+
+(defun indent (sink)
+ (dotimes (x (current-indentation sink))
+ (%write-rune #/U+0020 sink))) ; space
+
+(defun start-indentation-block (sink)
+ (incf (current-indentation sink) (indentation sink)))
+
+(defun end-indentation-block (sink)
+ (decf (current-indentation sink) (indentation sink)))
+
+(defun unparse-indented-text (data sink)
+ (flet ((whitespacep (x)
+ (or (rune= x #/U+000A) (rune= x #/U+0020))))
+ (let* ((n (length data))
+ (pos (position-if-not #'whitespacep data))
+ (need-whitespace-p nil))
+ (cond
+ ((zerop n))
+ (pos
+ (sink-fresh-line sink)
+ (while (< pos n)
+ (let* ((w (or (position-if #'whitespacep data :start (1+ pos)) n))
+ (next (or (position-if-not #'whitespacep data :start w) n)))
+ (when need-whitespace-p
+ (if (< (+ (ystream-column (sink-ystream sink)) w (- pos))
+ (width sink))
+ (%write-rune #/U+0020 sink)
+ (sink-fresh-line sink)))
+ (loop
+ with y = (sink-ystream sink)
+ for i from pos below w do
+ (unparse-datachar-readable (elt data i) y))
+ (setf need-whitespace-p (< w n))
+ (setf pos next))))
+ (t
+ (%write-rune #/U+0020 sink))))))
+
+(defun unparse-string (str sink)
+ (let ((y (sink-ystream sink)))
+ (loop for rune across str do (unparse-datachar rune y))))
+
+(defun unparse-datachar (c ystream)
+ (cond ((rune= c #/&) (write-rod '#.(string-rod "&") ystream))
+ ((rune= c #/<) (write-rod '#.(string-rod "<") ystream))
+ ((rune= c #/>) (write-rod '#.(string-rod ">") ystream))
+ ((rune= c #/\") (write-rod '#.(string-rod """) ystream))
+ ((rune= c #/U+0009) (write-rod '#.(string-rod " ") ystream))
+ ((rune= c #/U+000A) (write-rod '#.(string-rod "
") ystream))
+ ((rune= c #/U+000D) (write-rod '#.(string-rod "
") ystream))
+ (t
+ (write-rune c ystream))))
+
+(defun unparse-datachar-readable (c ystream)
+ (cond ((rune= c #/&) (write-rod '#.(string-rod "&") ystream))
+ ((rune= c #/<) (write-rod '#.(string-rod "<") ystream))
+ ((rune= c #/>) (write-rod '#.(string-rod ">") ystream))
+ ((rune= c #/\") (write-rod '#.(string-rod """) ystream))
+ (t
+ (write-rune c ystream))))
+
+(defun %write-rune (c sink)
+ (write-rune c (sink-ystream sink)))
+
+(defun %write-rod (r sink)
+ (write-rod r (sink-ystream sink)))
+
+
+;;;; convenience functions for DOMless XML serialization
+
+(defvar *current-element*)
+(defvar *sink*)
+
+(defmacro with-xml-output (sink &body body)
+ `(invoke-with-xml-output (lambda () , at body) ,sink))
+
+(defun invoke-with-xml-output (fn sink)
+ (let ((*sink* sink)
+ (*current-element* nil))
+ (sax:start-document *sink*)
+ (funcall fn)
+ (sax:end-document *sink*)))
+
+(defmacro with-element (qname &body body)
+ `(invoke-with-element (lambda () , at body) ,qname))
+
+(defun maybe-emit-start-tag ()
+ (when *current-element*
+ ;; starting child node, need to emit opening tag of parent first:
+ (destructuring-bind (qname &rest attributes) *current-element*
+ (sax:start-element *sink* nil nil qname (reverse attributes)))
+ (setf *current-element* nil)))
+
+(defun invoke-with-element (fn qname)
+ (setf qname (rod qname))
+ (maybe-emit-start-tag)
+ (let ((*current-element* (list qname)))
+ (multiple-value-prog1
+ (funcall fn)
+ (maybe-emit-start-tag)
+ (sax:end-element *sink* nil nil qname))))
+
+(defun attribute (name value)
+ (push (sax:make-attribute :qname (rod name) :value (rod value))
+ (cdr *current-element*))
+ value)
+
+(defun cdata (data)
+ (sax:start-cdata *sink*)
+ (sax:characters *sink* (rod data))
+ (sax:end-cdata *sink*)
+ data)
+
+(defun text (data)
+ (maybe-emit-start-tag)
+ (sax:characters *sink* (rod data))
+ data)
+
+(defun rod-to-utf8-string (rod)
+ (let ((out (make-buffer :element-type 'character)))
+ (runes-to-utf8/adjustable-string out rod (length rod))
+ out))
+
+(defun utf8-string-to-rod (str)
+ (let* ((bytes (map '(vector (unsigned-byte 8)) #'char-code str))
+ (buffer (make-array (length bytes) :element-type '(unsigned-byte 16)))
+ (n (decode-sequence :utf-8 bytes 0 (length bytes) buffer 0 0 nil))
+ (result (make-array n :element-type 'rune)))
+ (map-into result #'code-rune buffer)
+ result))
Added: vendor/cxml/util.lisp
===================================================================
--- vendor/cxml/util.lisp 2006-02-18 09:24:24 UTC (rev 1844)
+++ vendor/cxml/util.lisp 2006-02-18 09:34:15 UTC (rev 1845)
@@ -0,0 +1,73 @@
+;;; -*- Mode: Lisp; Syntax: Common-Lisp; Package: RUNES; -*-
+;;; ---------------------------------------------------------------------------
+;;; Title: Some common utilities for the Closure browser
+;;; Created: 1997-12-27
+;;; Author: Gilbert Baumann <unk6 at rz.uni-karlsruhe.de>
+;;; License: Lisp-LGPL (See file COPYING for details).
+;;; ---------------------------------------------------------------------------
+;;; (c) copyright 1997-1999 by Gilbert Baumann
+
+;;; This code is free software; you can redistribute it and/or modify it
+;;; under the terms of the version 2.1 of the GNU Lesser General Public
+;;; License as published by the Free Software Foundation, as clarified
+;;; by the "Preamble to the Gnu Lesser General Public License" found in
+;;; the file COPYING.
+;;;
+;;; This code is distributed in the hope that it will be useful,
+;;; but without any warranty; without even the implied warranty of
+;;; merchantability or fitness for a particular purpose. See the GNU
+;;; Lesser General Public License for more details.
+;;;
+;;; Version 2.1 of the GNU Lesser General Public License is in the file
+;;; COPYING that was distributed with this file. If it is not present,
+;;; you can access it from http://www.gnu.org/copyleft/lesser.txt (until
+;;; superseded by a newer version) or write to the Free Software
+;;; Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+;; Changes
+;;
+;; When Who What
+;; ----------------------------------------------------------------------------
+;; 1999-08-24 GB = fixed MULTIPLE-VALUE-OR it now takes any number of
+;; subforms
+;;
+
+(in-package :cxml)
+
+;;; --------------------------------------------------------------------------------
+;;; Meta functions
+
+(defun curry (fun &rest args)
+ #'(lambda (&rest more)
+ (apply fun (append args more))))
+
+(defun rcurry (fun &rest args)
+ #'(lambda (&rest more)
+ (apply fun (append more args))))
+
+(defun compose (f g)
+ #'(lambda (&rest args)
+ (funcall f (apply g args))))
+
+;;; --------------------------------------------------------------------------------
+;;; while and until
+
+(defmacro while (test &body body)
+ `(until (not ,test) , at body))
+
+(defmacro until (test &body body)
+ `(do () (,test) , at body))
+
+;; prime numbers
+
+(defun primep (n)
+ "Returns true, iff `n' is prime."
+ (and (> n 2)
+ (do ((i 2 (+ i 1)))
+ ((> (* i i) n) t)
+ (cond ((zerop (mod n i)) (return nil))))))
+
+(defun nearest-greater-prime (n)
+ "Returns the smallest prime number no less than `n'."
+ (cond ((primep n) n)
+ ((nearest-greater-prime (+ n 1)))))
Added: vendor/cxml/xml-name-rune-p.lisp
===================================================================
--- vendor/cxml/xml-name-rune-p.lisp 2006-02-18 09:24:24 UTC (rev 1844)
+++ vendor/cxml/xml-name-rune-p.lisp 2006-02-18 09:34:15 UTC (rev 1845)
@@ -0,0 +1,227 @@
+;;;; xml-name-rune-p -- character class definitions
+;;;;
+;;;; This file is part of the CXML parser, released under Lisp-LGPL.
+;;;; See file COPYING for details.
+;;;;
+;;;; Author: Gilbert Baumann <unk6 at rz.uni-karlsruhe.de>
+
+(in-package :cxml)
+
+#.(funcall
+ (compile
+ nil
+ '(lambda ()
+ (let ((+max+ #xD800))
+ (labels
+ ((name-start-rune-p (rune)
+ (or (letter-rune-p rune)
+ (= #.(char-code #\_) rune)
+ (= #.(char-code #\:) rune)))
+
+ (name-rune-p (rune)
+ (or (letter-rune-p rune)
+ (digit-rune-p* rune)
+ (= rune #.(char-code #\.))
+ (= rune #.(char-code #\-))
+ (= rune #.(char-code #\_))
+ (= rune #.(char-code #\:))
+ (combining-rune-p rune)
+ (extender-rune-p rune)))
+
+ (letter-rune-p (rune)
+ (or (base-rune-p rune)
+ (ideographic-rune-p rune)))
+
+ (digit-rune-p* (rune)
+ (or (<= 48 rune 57)
+ (<= 1632 rune 1641)
+ (<= 1776 rune 1785)
+ (<= 2406 rune 2415)
+ (<= 2534 rune 2543)
+ (<= 2662 rune 2671)
+ (<= 2790 rune 2799)
+ (<= 2918 rune 2927)
+ (<= 3047 rune 3055)
+ (<= 3174 rune 3183)
+ (<= 3302 rune 3311)
+ (<= 3430 rune 3439)
+ (<= 3664 rune 3673)
+ (<= 3792 rune 3801)
+ (<= 3872 rune 3881)))
+
+
+ (combining-rune-p (rune)
+ (or (<= 768 rune 837)
+ (<= 864 rune 865)
+ (<= 1155 rune 1158)
+ (<= 1425 rune 1441)
+ (<= 1443 rune 1465)
+ (<= 1467 rune 1469)
+ (= 1471 rune)
+ (<= 1473 rune 1474)
+ (= 1476 rune)
+ (<= 1611 rune 1618)
+ (= 1648 rune)
+ (<= 1750 rune 1756)
+ (<= 1757 rune 1759)
+ (<= 1760 rune 1764)
+ (<= 1767 rune 1768)
+ (<= 1770 rune 1773)
+ (<= 2305 rune 2307)
+ (= 2364 rune)
+ (<= 2366 rune 2380)
+ (= 2381 rune)
+ (<= 2385 rune 2388)
+ (<= 2402 rune 2403)
+ (<= 2433 rune 2435)
+ (= 2492 rune)
+ (= 2494 rune)
+ (= 2495 rune)
+ (<= 2496 rune 2500)
+ (<= 2503 rune 2504)
+ (<= 2507 rune 2509)
+ (= 2519 rune)
+ (<= 2530 rune 2531)
+ (= 2562 rune)
+ (= 2620 rune)
+ (= 2622 rune)
+ (= 2623 rune)
+ (<= 2624 rune 2626)
+ (<= 2631 rune 2632)
+ (<= 2635 rune 2637)
+ (<= 2672 rune 2673)
+ (<= 2689 rune 2691)
+ (= 2748 rune)
+ (<= 2750 rune 2757)
+ (<= 2759 rune 2761)
+ (<= 2763 rune 2765)
+ (<= 2817 rune 2819)
+ (= 2876 rune)
+ (<= 2878 rune 2883)
+ (<= 2887 rune 2888)
+ (<= 2891 rune 2893)
+ (<= 2902 rune 2903)
+ (<= 2946 rune 2947)
+ (<= 3006 rune 3010)
+ (<= 3014 rune 3016)
+ (<= 3018 rune 3021)
+ (= 3031 rune)
+ (<= 3073 rune 3075)
+ (<= 3134 rune 3140)
+ (<= 3142 rune 3144)
+ (<= 3146 rune 3149)
+ (<= 3157 rune 3158)
+ (<= 3202 rune 3203)
+ (<= 3262 rune 3268)
+ (<= 3270 rune 3272)
+ (<= 3274 rune 3277)
+ (<= 3285 rune 3286)
+ (<= 3330 rune 3331)
+ (<= 3390 rune 3395)
+ (<= 3398 rune 3400)
+ (<= 3402 rune 3405)
+ (= 3415 rune)
+ (= 3633 rune)
+ (<= 3636 rune 3642)
+ (<= 3655 rune 3662)
+ (= 3761 rune)
+ (<= 3764 rune 3769)
+ (<= 3771 rune 3772)
+ (<= 3784 rune 3789)
+ (<= 3864 rune 3865)
+ (= 3893 rune)
+ (= 3895 rune)
+ (= 3897 rune)
+ (= 3902 rune)
+ (= 3903 rune)
+ (<= 3953 rune 3972)
+ (<= 3974 rune 3979)
+ (<= 3984 rune 3989)
+ (= 3991 rune)
+ (<= 3993 rune 4013)
+ (<= 4017 rune 4023)
+ (= 4025 rune)
+ (<= 8400 rune 8412)
+ (= 8417 rune)
+ (<= 12330 rune 12335)
+ (= 12441 rune)
+ (= 12442 rune)))
+
+ (extender-rune-p (rune)
+ (or
+ (= 183 rune)
+ (= 720 rune)
+ (= 721 rune)
+ (= 903 rune)
+ (= 1600 rune)
+ (= 3654 rune)
+ (= 3782 rune)
+ (= 12293 rune)
+ (<= 12337 rune 12341)
+ (<= 12445 rune 12446)
+ (<= 12540 rune 12542)))
+
+ (base-rune-p (rune)
+ ;; split into two ORs for LispWorks...
+ (or
+ (or (<= 65 rune 90) (<= 97 rune 122) (<= 192 rune 214) (<= 216 rune 246) (<= 248 rune 255) (<= 256 rune 305)
+ (<= 308 rune 318) (<= 321 rune 328) (<= 330 rune 382) (<= 384 rune 451) (<= 461 rune 496) (<= 500 rune 501)
+ (<= 506 rune 535) (<= 592 rune 680) (<= 699 rune 705) (= 902 rune) (<= 904 rune 906) (= 908 rune)
+ (<= 910 rune 929) (<= 931 rune 974) (<= 976 rune 982) (= 986 rune) (= 988 rune) (= 990 rune) (= 992 rune)
+ (<= 994 rune 1011) (<= 1025 rune 1036) (<= 1038 rune 1103) (<= 1105 rune 1116) (<= 1118 rune 1153)
+ (<= 1168 rune 1220) (<= 1223 rune 1224) (<= 1227 rune 1228) (<= 1232 rune 1259) (<= 1262 rune 1269)
+ (<= 1272 rune 1273) (<= 1329 rune 1366) (= 1369 rune) (<= 1377 rune 1414) (<= 1488 rune 1514)
+ (<= 1520 rune 1522) (<= 1569 rune 1594) (<= 1601 rune 1610) (<= 1649 rune 1719) (<= 1722 rune 1726)
+ (<= 1728 rune 1742) (<= 1744 rune 1747) (= 1749 rune) (<= 1765 rune 1766) (<= 2309 rune 2361) (= 2365 rune)
+ (<= 2392 rune 2401) (<= 2437 rune 2444) (<= 2447 rune 2448) (<= 2451 rune 2472) (<= 2474 rune 2480)
+ (= 2482 rune) (<= 2486 rune 2489) (<= 2524 rune 2525) (<= 2527 rune 2529) (<= 2544 rune 2545)
+ (<= 2565 rune 2570) (<= 2575 rune 2576) (<= 2579 rune 2600) (<= 2602 rune 2608) (<= 2610 rune 2611)
+ (<= 2613 rune 2614) (<= 2616 rune 2617) (<= 2649 rune 2652) (= 2654 rune) (<= 2674 rune 2676)
+ (<= 2693 rune 2699) (= 2701 rune) (<= 2703 rune 2705) (<= 2707 rune 2728) (<= 2730 rune 2736)
+ (<= 2738 rune 2739) (<= 2741 rune 2745) (= 2749 rune) (= 2784 rune) (<= 2821 rune 2828) (<= 2831 rune 2832)
+ (<= 2835 rune 2856) (<= 2858 rune 2864) (<= 2866 rune 2867) (<= 2870 rune 2873) (= 2877 rune)
+ (<= 2908 rune 2909) (<= 2911 rune 2913) (<= 2949 rune 2954) (<= 2958 rune 2960) (<= 2962 rune 2965)
+ (<= 2969 rune 2970) (= 2972 rune))
+ (or (<= 2974 rune 2975) (<= 2979 rune 2980) (<= 2984 rune 2986)
+ (<= 2990 rune 2997) (<= 2999 rune 3001) (<= 3077 rune 3084) (<= 3086 rune 3088) (<= 3090 rune 3112)
+ (<= 3114 rune 3123) (<= 3125 rune 3129) (<= 3168 rune 3169) (<= 3205 rune 3212) (<= 3214 rune 3216)
+ (<= 3218 rune 3240) (<= 3242 rune 3251) (<= 3253 rune 3257) (= 3294 rune) (<= 3296 rune 3297)
+ (<= 3333 rune 3340) (<= 3342 rune 3344) (<= 3346 rune 3368) (<= 3370 rune 3385) (<= 3424 rune 3425)
+ (<= 3585 rune 3630) (= 3632 rune) (<= 3634 rune 3635) (<= 3648 rune 3653) (<= 3713 rune 3714) (= 3716 rune)
+ (<= 3719 rune 3720) (= 3722 rune) (= 3725 rune) (<= 3732 rune 3735) (<= 3737 rune 3743) (<= 3745 rune 3747)
+ (= 3749 rune) (= 3751 rune) (<= 3754 rune 3755) (<= 3757 rune 3758) (= 3760 rune) (<= 3762 rune 3763) (= 3773 rune)
+ (<= 3776 rune 3780) (<= 3904 rune 3911) (<= 3913 rune 3945) (<= 4256 rune 4293) (<= 4304 rune 4342)
+ (= 4352 rune) (<= 4354 rune 4355) (<= 4357 rune 4359) (= 4361 rune) (<= 4363 rune 4364) (<= 4366 rune 4370)
+ (= 4412 rune) (= 4414 rune) (= 4416 rune) (= 4428 rune) (= 4430 rune) (= 4432 rune) (<= 4436 rune 4437) (= 4441 rune)
+ (<= 4447 rune 4449) (= 4451 rune) (= 4453 rune) (= 4455 rune) (= 4457 rune) (<= 4461 rune 4462) (<= 4466 rune 4467)
+ (= 4469 rune) (= 4510 rune) (= 4520 rune) (= 4523 rune) (<= 4526 rune 4527) (<= 4535 rune 4536) (= 4538 rune)
+ (<= 4540 rune 4546) (= 4587 rune) (= 4592 rune) (= 4601 rune) (<= 7680 rune 7835) (<= 7840 rune 7929)
+ (<= 7936 rune 7957) (<= 7960 rune 7965) (<= 7968 rune 8005) (<= 8008 rune 8013) (<= 8016 rune 8023)
+ (= 8025 rune) (= 8027 rune) (= 8029 rune) (<= 8031 rune 8061) (<= 8064 rune 8116) (<= 8118 rune 8124) (= 8126 rune)
+ (<= 8130 rune 8132) (<= 8134 rune 8140) (<= 8144 rune 8147) (<= 8150 rune 8155) (<= 8160 rune 8172)
+ (<= 8178 rune 8180) (<= 8182 rune 8188) (= 8486 rune) (<= 8490 rune 8491) (= 8494 rune) (<= 8576 rune 8578)
+ (<= 12353 rune 12436) (<= 12449 rune 12538) (<= 12549 rune 12588) (<= 44032 rune 55203))))
+
+ (ideographic-rune-p (rune)
+ (or (<= 19968 rune 40869) (= 12295 rune) (<= 12321 rune 12329)))
+
+
+ (predicate-to-bv (p)
+ (let ((r (make-array +max+ :element-type 'bit :initial-element 0)))
+ (dotimes (i #x10000 r)
+ (when (funcall p i)
+ (setf (aref r i) 1))))) )
+
+ `(progn
+ (DEFINLINE NAME-RUNE-P (RUNE)
+ (SETF RUNE (RUNE-CODE RUNE))
+ (AND (<= 0 RUNE ,+max+)
+ (LOCALLY (DECLARE (OPTIMIZE (SAFETY 0) (SPEED 3)))
+ (= 1 (SBIT ',(predicate-to-bv #'name-rune-p)
+ (THE FIXNUM RUNE))))))
+ (DEFINLINE NAME-START-RUNE-P (RUNE)
+ (SETF RUNE (RUNE-CODE RUNE))
+ (AND (<= 0 RUNE ,+MAX+)
+ (LOCALLY (DECLARE (OPTIMIZE (SAFETY 0) (SPEED 3)))
+ (= 1 (SBIT ',(predicate-to-bv #'name-start-rune-p)
+ (THE FIXNUM RUNE)))))))) ))))
Added: vendor/cxml/xml-parse.lisp
===================================================================
--- vendor/cxml/xml-parse.lisp 2006-02-18 09:24:24 UTC (rev 1844)
+++ vendor/cxml/xml-parse.lisp 2006-02-18 09:34:15 UTC (rev 1845)
@@ -0,0 +1,3544 @@
+;;; -*- Mode: Lisp; Syntax: Common-Lisp; Package: CXML; readtable: runes; -*-
+;;; ---------------------------------------------------------------------------
+;;; Title: XML parser
+;;; Created: 1999-07-17
+;;; Author: Gilbert Baumann <unk6 at rz.uni-karlsruhe.de>
+;;; Author: Henrik Motakef <hmot at henrik-motakef.de>
+;;; Author: David Lichteblau <david at lichteblau.com>
+;;; License: Lisp-LGPL (See file COPYING for details).
+;;; ---------------------------------------------------------------------------
+;;; © copyright 1999 by Gilbert Baumann
+;;; © copyright 2003 by Henrik Motakef
+;;; © copyright 2004 knowledgeTools Int. GmbH
+;;; © copyright 2004 David Lichteblau
+;;; © copyright 2005 David Lichteblau
+
+;;; This library is free software; you can redistribute it and/or
+;;; modify it under the terms of the GNU Library General Public
+;;; License as published by the Free Software Foundation; either
+;;; version 2 of the License, or (at your option) any later version.
+;;;
+;;; This library is distributed in the hope that it will be useful,
+;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+;;; Library General Public License for more details.
+;;;
+;;; You should have received a copy of the GNU Library General Public
+;;; License along with this library; if not, write to the
+;;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+;;; Boston, MA 02111-1307 USA.
+
+;;; Streams
+
+;;; xstreams
+
+;; For reading runes, I defined my own streams, called xstreams,
+;; because we want to be fast. A function call or even a method call
+;; per character is not acceptable, instead of that we define a
+;; buffered stream with and advertised buffer layout, so that we
+;; could use the trick stdio uses: READ-RUNE and PEEK-RUNE are macros,
+;; directly accessing the buffer and only calling some underflow
+;; handler in case of stream underflows. This will yield to quite a
+;; performance boost vs calling READ-BYTE per character.
+
+;; Also we need to do encoding t conversion on ; this better done at large chunks of data rather than on a character
+;; by character basis. This way we need a dispatch on the active
+;; encoding only once in a while, instead of for each character. This
+;; allows us to use a CLOS interface to do the underflow handling.
+
+;;; zstreams
+
+;; Now, for reading tokens, we define another kind of streams, called
+;; zstreams. These zstreams also maintain an input stack to implement
+;; inclusion of external entities. This input stack contains xstreams
+;; or the special marker :STOP. Such a :STOP marker indicates, that
+;; input should not continue there, but well stop; that is simulate an
+;; EOF. The user is then responsible to pop this marker off the input
+;; stack.
+;;
+;; This input stack is also used to detect circular entity inclusion.
+
+;; The zstream tokenizer recognizes the following types of tokens and
+;; is controlled by the *DATA-BEHAVIOUR* flag. (Which should become a
+;; slot of zstreams instead).
+
+;; Common
+;; :xml-decl (<target> . <content>) ;processing-instruction starting with "<?xml"
+;; :pi (<target> . <content>) ;processing-instruction
+;; :stag (<name> . <atts>) ;start tag
+;; :etag (<name> . <atts>) ;end tag
+;; :ztag (<name> . <atts>) ;empty tag
+;; :<!element
+;; :<!entity
+;; :<!attlist
+;; :<!notation
+;; :<!doctype
+;; :<![
+;; :comment <content>
+
+;; *data-behaviour* = :DTD
+;;
+;; :nmtoken <interned-rod>
+;; :#required
+;; :#implied
+;; :#fixed
+;; :#pcdata
+;; :s
+;; :\[ :\] :\( :\) :|\ :\> :\" :\' :\, :\? :\* :\+
+
+;; *data-behaviour* = :DOC
+;;
+;; :entity-ref <interned-rod>
+;; :cdata <rod>
+
+
+;;; TODO
+;;
+;; o provide for a faster DOM
+;;
+;; o morph zstream into a context object and thus also get rid of
+;; special variables. Put the current DTD there too.
+;; [partly done]
+
+;; o the *scratch-pad* hack should become something much more
+;; reentrant, we could either define a system-wide resource
+;; or allocate some scratch-pads per context.
+;; [for thread-safety reasons the array are allocated per context now,
+;; reentrancy is still open]
+
+;; o CR handling in utf-16 deocders
+;;
+;; o UCS-4 reader
+;;
+;; o max depth together with circle detection
+;; (or proof, that our circle detection is enough).
+;; [gemeint ist zstream-push--david]
+;;
+;; o better extensibility wrt character representation, one may want to
+;; have
+;; - UCS-4 in vectoren
+;;
+;; o xstreams auslagern, documententieren und dann auch in SGML und
+;; CSS parser verwenden. (halt alles was zeichen liest).
+;; [ausgelagert sind sie; dokumentiert "so la la"; die Reintegration
+;; in Closure ist ein ganz anderes Thema]
+;;
+;; o recording of source locations for nodes.
+;;
+;; o based on the DTD and xml:space attribute implement HTML white
+;; space rules.
+;;
+;; o on a parser option, do not expand external entities.
+
+;;;; Validity constraints:
+;;;; (00) Root Element Type like (03), c.f. MAKE-ROOT-MODEL
+;;;; (01) Proper Declaration/PE Nesting P/MARKUP-DECL
+;;;; (02) Standalone Document Declaration all over the place [*]
+;;;; (03) Element Valid VALIDATE-*-ELEMENT, -CHARACTERS
+;;;; (04) Attribute Value Type VALIDATE-ATTRIBUTE
+;;;; (05) Unique Element Type Declaration DEFINE-ELEMENT
+;;;; (06) Proper Group/PE Nesting P/CSPEC
+;;;; (07) No Duplicate Types LEGAL-CONTENT-MODEL-P
+;;;; (08) ID VALIDATE-ATTRIBUTE
+;;;; (09) One ID per Element Type DEFINE-ATTRIBUTE
+;;;; (10) ID Attribute Default DEFINE-ATTRIBUTE
+;;;; (11) IDREF VALIDATE-ATTRIBUTE, P/DOCUMENT
+;;;; (12) Entity Name VALIDATE-ATTRIBUTE
+;;;; (13) Name Token VALIDATE-ATTRIBUTE
+;;;; (14) Notation Attributes VALIDATE-ATTRIBUTE, P/ATT-TYPE
+;;;; (15) One Notation Per Element Type DEFINE-ATTRIBUTE
+;;;; (16) No Notation on Empty Element DEFINE-ELEMENT, -ATTRIBUTE
+;;;; (17) Enumeration VALIDATE-ATTRIBUTE
+;;;; (18) Required Attribute PROCESS-ATTRIBUTES
+;;;; (19) Attribute Default Legal DEFINE-ATTRIBUTE
+;;;; (20) Fixed Attribute Default VALIDATE-ATTRIBUTE
+;;;; (21) Proper Conditional Section/PE Nesting P/CONDITIONAL-SECT, ...
+;;;; (22) Entity Declared [**]
+;;;; (23) Notation Declared P/ENTITY-DEF, P/DOCUMENT
+;;;; (24) Unique Notation Name DEFINE-NOTATION
+;;;;
+;;;; [*] Perhaps we could revert the explicit checks of (02), if we did
+;;;; _not_ read external subsets of standalone documents when parsing in
+;;;; validating mode. Violations of VC (02) constraints would then appear as
+;;;; wellformedness violations, right?
+;;;;
+;;;; [**] Although I haven't investigated this properly yet, I believe that
+;;;; we check this VC together with the WFC even in non-validating mode.
+
+(in-package :cxml)
+
+#+allegro
+(setf (excl:named-readtable :runes) *readtable*)
+
+(eval-when (:compile-toplevel :load-toplevel :execute)
+ (defparameter *fast* '(optimize (speed 3) (safety 0)))
+ ;;(defparameter *fast* '(optimize (speed 2) (safety 3)))
+ )
+
+;;; parser context
+
+(defvar *ctx* nil)
+
+(defstruct (context (:conc-name nil))
+ handler
+ (dtd nil)
+ model-stack
+ (referenced-notations '())
+ (id-table (%make-rod-hash-table))
+ ;; FIXME: Wofuer ist name-hashtable da? Will man das wissen?
+ (name-hashtable (make-rod-hashtable :size 2000))
+ (standalone-p nil)
+ (entity-resolver nil)
+ (disallow-internal-subset nil)
+ main-zstream)
+
+(defvar *expand-pe-p* nil)
+
+(defparameter *namespace-bindings*
+ '((#"" . nil)
+ (#"xmlns" . #"http://www.w3.org/2000/xmlns/")
+ (#"xml" . #"http://www.w3.org/XML/1998/namespace")))
+
+;;;; ---------------------------------------------------------------------------
+;;;; xstreams
+;;;;
+
+
+(defstruct (stream-name
+ (:print-function print-stream-name))
+ entity-name
+ entity-kind
+ uri)
+
+(defun print-stream-name (object stream depth)
+ (declare (ignore depth))
+ (format stream "[~A ~S ~A]"
+ (rod-string (stream-name-entity-name object))
+ (stream-name-entity-kind object)
+ (stream-name-uri object)))
+
+(deftype read-element () 'rune)
+
+(defun call-with-open-xstream (fn stream)
+ (unwind-protect
+ (funcall fn stream)
+ (close-xstream stream)))
+
+(defmacro with-open-xstream ((var value) &body body)
+ `(call-with-open-xstream (lambda (,var) , at body) ,value))
+
+(defun call-with-open-xfile (continuation &rest open-args)
+ (let ((input (apply #'open (car open-args) :element-type '(unsigned-byte 8) (cdr open-args))))
+ (unwind-protect
+ (progn
+ (funcall continuation (make-xstream input)))
+ (close input))))
+
+(defmacro with-open-xfile ((stream &rest open-args) &body body)
+ `(call-with-open-xfile (lambda (,stream) .,body) .,open-args))
+
+;;;; -------------------------------------------------------------------
+;;;; Rechnen mit Runen
+;;;;
+
+;; Let us first define fast fixnum arithmetric get rid of type
+;; checks. (After all we know what we do here).
+
+(defmacro fx-op (op &rest xs)
+ `(the fixnum (,op ,@(mapcar (lambda (x) `(the fixnum ,x)) xs))))
+(defmacro fx-pred (op &rest xs)
+ `(,op ,@(mapcar (lambda (x) `(the fixnum ,x)) xs)))
+
+(defmacro %+ (&rest xs) `(fx-op + , at xs))
+(defmacro %- (&rest xs) `(fx-op - , at xs))
+(defmacro %* (&rest xs) `(fx-op * , at xs))
+(defmacro %/ (&rest xs) `(fx-op floor , at xs))
+(defmacro %and (&rest xs) `(fx-op logand , at xs))
+(defmacro %ior (&rest xs) `(fx-op logior , at xs))
+(defmacro %xor (&rest xs) `(fx-op logxor , at xs))
+(defmacro %ash (&rest xs) `(fx-op ash , at xs))
+(defmacro %mod (&rest xs) `(fx-op mod , at xs))
+
+(defmacro %= (&rest xs) `(fx-pred = , at xs))
+(defmacro %<= (&rest xs) `(fx-pred <= , at xs))
+(defmacro %>= (&rest xs) `(fx-pred >= , at xs))
+(defmacro %< (&rest xs) `(fx-pred < , at xs))
+(defmacro %> (&rest xs) `(fx-pred > , at xs))
+
+;;; XXX Geschwindigkeit dieser Definitionen untersuchen!
+
+(defmacro rune-op (op &rest xs)
+ `(code-rune (,op ,@(mapcar (lambda (x) `(rune-code ,x)) xs))))
+(defmacro rune-pred (op &rest xs)
+ `(,op ,@(mapcar (lambda (x) `(rune-code ,x)) xs)))
+
+(defmacro %rune+ (&rest xs) `(rune-op + , at xs))
+(defmacro %rune- (&rest xs) `(rune-op - , at xs))
+(defmacro %rune* (&rest xs) `(rune-op * , at xs))
+(defmacro %rune/ (&rest xs) `(rune-op floor , at xs))
+(defmacro %rune-and (&rest xs) `(rune-op logand , at xs))
+(defmacro %rune-ior (&rest xs) `(rune-op logior , at xs))
+(defmacro %rune-xor (&rest xs) `(rune-op logxor , at xs))
+(defmacro %rune-ash (a b) `(code-rune (ash (rune-code ,a) ,b)))
+(defmacro %rune-mod (&rest xs) `(rune-op mod , at xs))
+
+(defmacro %rune= (&rest xs) `(rune-pred = , at xs))
+(defmacro %rune<= (&rest xs) `(rune-pred <= , at xs))
+(defmacro %rune>= (&rest xs) `(rune-pred >= , at xs))
+(defmacro %rune< (&rest xs) `(rune-pred < , at xs))
+(defmacro %rune> (&rest xs) `(rune-pred > , at xs))
+
+;;;; ---------------------------------------------------------------------------
+;;;; rod hashtable
+;;;;
+
+;;; make-rod-hashtable
+;;; rod-hash-get hashtable rod &optional start end -> value ; successp
+;;; (setf (rod-hash-get hashtable rod &optional start end) new-value
+;;;
+
+(defstruct (rod-hashtable (:constructor make-rod-hashtable/low))
+ size ;size of table
+ table ;
+ )
+
+(defun make-rod-hashtable (&key (size 200))
+ (setf size (nearest-greater-prime size))
+ (make-rod-hashtable/low
+ :size size
+ :table (make-array size :initial-element nil)))
+
+(eval-when (:compile-toplevel :load-toplevel :execute)
+ (defconstant +fixnum-bits+
+ (1- (integer-length most-positive-fixnum))
+ "Pessimistic approximation of the number of bits of fixnums.")
+
+ (defconstant +fixnum-mask+
+ (1- (expt 2 +fixnum-bits+))
+ "Pessimistic approximation of the largest bit-mask, still being a fixnum."))
+
+(definline stir (a b)
+ (%and +fixnum-mask+
+ (%xor (%ior (%ash (%and a #.(ash +fixnum-mask+ -5)) 5)
+ (%ash a #.(- 5 +fixnum-bits+)))
+ b)))
+
+(definline rod-hash (rod start end)
+ "Compute a hash code out of a rod."
+ (let ((res (%- end start)))
+ (do ((i start (%+ i 1)))
+ ((%= i end))
+ (declare (type fixnum i))
+ (setf res (stir res (rune-code (%rune rod i)))))
+ res))
+
+(definline rod=* (x y &key (start1 0) (end1 (length x))
+ (start2 0) (end2 (length y)))
+ (and (%= (%- end1 start1) (%- end2 start2))
+ (do ((i start1 (%+ i 1))
+ (j start2 (%+ j 1)))
+ ((%= i end1)
+ t)
+ (unless (rune= (%rune x i) (%rune y j))
+ (return nil)))))
+
+(definline rod=** (x y start1 end1 start2 end2)
+ (and (%= (%- end1 start1) (%- end2 start2))
+ (do ((i start1 (%+ i 1))
+ (j start2 (%+ j 1)))
+ ((%= i end1)
+ t)
+ (unless (rune= (%rune x i) (%rune y j))
+ (return nil)))))
+
+(defun rod-hash-get (hashtable rod &optional (start 0) (end (length rod)))
+ (declare (type (simple-array rune (*)) rod))
+ (let ((j (%mod (rod-hash rod start end)
+ (rod-hashtable-size hashtable))))
+ (dolist (q (svref (rod-hashtable-table hashtable) j)
+ (values nil nil nil))
+ (declare (type cons q))
+ (when (rod=** (car q) rod 0 (length (the (simple-array rune (*)) (car q))) start end)
+ (return (values (cdr q) t (car q)))))))
+
+(defun rod-hash-set (new-value hashtable rod &optional (start 0) (end (length rod)))
+ (let ((j (%mod (rod-hash rod start end)
+ (rod-hashtable-size hashtable)))
+ (key nil))
+ (dolist (q (svref (rod-hashtable-table hashtable) j)
+ (progn
+ (setf key (rod-subseq* rod start end))
+ (push (cons key new-value)
+ (aref (rod-hashtable-table hashtable) j))))
+ (when (rod=* (car q) rod :start2 start :end2 end)
+ (setf key (car q))
+ (setf (cdr q) new-value)
+ (return)))
+ (values new-value key)))
+
+#-rune-is-character
+(defun rod-subseq* (source start &optional (end (length source)))
+ (unless (and (typep start 'fixnum) (>= start 0))
+ (error "~S is not a non-negative fixnum." start))
+ (unless (and (typep end 'fixnum) (>= end start))
+ (error "END argument, ~S, is not a fixnum no less than START, ~S." end start))
+ (when (> start (length source))
+ (error "START argument, ~S, should be no greater than length of rod." start))
+ (when (> end (length source))
+ (error "END argument, ~S, should be no greater than length of rod." end))
+ (locally
+ (declare (type fixnum start end))
+ (let ((res (make-rod (- end start))))
+ (declare (type rod res))
+ (do ((i (- (- end start) 1) (the fixnum (- i 1))))
+ ((< i 0) res)
+ (declare (type fixnum i))
+ (setf (%rune res i) (aref source (the fixnum (+ i start))))))))
+
+#+rune-is-character
+(defun rod-subseq* (source start &optional (end (length source)))
+ (subseq source start end))
+
+(deftype ufixnum () `(unsigned-byte ,(integer-length most-positive-fixnum)))
+
+#-rune-is-character
+(defun rod-subseq** (source start &optional (end (length source)))
+ (declare (type (simple-array rune (*)) source)
+ (type ufixnum start)
+ (type ufixnum end)
+ (optimize (speed 3) (safety 0)))
+ (let ((res (make-array (%- end start) :element-type 'rune)))
+ (declare (type (simple-array rune (*)) res))
+ (let ((i (%- end start)))
+ (declare (type ufixnum i))
+ (loop
+ (setf i (- i 1))
+ (when (= i 0)
+ (return))
+ (setf (%rune res i) (%rune source (the ufixnum (+ i start))))))
+ res))
+
+#+rune-is-character
+(defun rod-subseq** (source start &optional (end (length source)))
+ (subseq source start end))
+
+(defun (setf rod-hash-get) (new-value hashtable rod &optional (start 0) (end (length rod)))
+ (rod-hash-set new-value hashtable rod start end))
+
+(defun intern-name (rod &optional (start 0) (end (length rod)))
+ (multiple-value-bind (value successp key) (rod-hash-get (name-hashtable *ctx*) rod start end)
+ (declare (ignore value))
+ (if successp
+ key
+ (nth-value 1 (rod-hash-set t (name-hashtable *ctx*) rod start end)))))
+
+;;;; ---------------------------------------------------------------------------
+;;;;
+;;;; rod collector
+;;;;
+
+(defvar *scratch-pad*)
+(defvar *scratch-pad-2*)
+(defvar *scratch-pad-3*)
+(defvar *scratch-pad-4*)
+
+(declaim (type (simple-array rune (*))
+ *scratch-pad* *scratch-pad-2* *scratch-pad-3* *scratch-pad-4*))
+
+(defmacro with-scratch-pads ((&optional) &body body)
+ `(let ((*scratch-pad* (make-array 1024 :element-type 'rune))
+ (*scratch-pad-2* (make-array 1024 :element-type 'rune))
+ (*scratch-pad-3* (make-array 1024 :element-type 'rune))
+ (*scratch-pad-4* (make-array 1024 :element-type 'rune)))
+ , at body))
+
+(defmacro %put-unicode-char (code-var put)
+ `(progn
+ (cond ((%> ,code-var #xFFFF)
+ (,put (the rune (code-rune (%+ #xD7C0 (%ash ,code-var -10)))))
+ (,put (the rune (code-rune (%ior #xDC00 (%and ,code-var #x03FF))))))
+ (t
+ (,put (code-rune ,code-var))))))
+
+(defun adjust-array-by-copying (old-array new-size)
+ "Adjust an array by copying and thus ensures, that result is a SIMPLE-ARRAY."
+ (let ((res (make-array new-size :element-type (array-element-type old-array))))
+ (replace res old-array
+ :start1 0 :end1 (length old-array)
+ :start2 0 :end2 (length old-array))
+ res))
+
+(defmacro with-rune-collector-aux (scratch collect body mode)
+ (let ((rod (gensym))
+ (n (gensym))
+ (i (gensym))
+ (b (gensym)))
+ `(let ((,n (length ,scratch))
+ (,i 0)
+ (,b ,scratch))
+ (declare (type fixnum ,n ,i))
+ (macrolet
+ ((,collect (x)
+ `((lambda (x)
+ (locally
+ (declare #.*fast*)
+ (when (%>= ,',i ,',n)
+ (setf ,',n (* 2 ,',n))
+ (setf ,',b
+ (setf ,',scratch
+ (adjust-array-by-copying ,',scratch ,',n))))
+ (setf (aref (the (simple-array rune (*)) ,',b) ,',i) x)
+ (incf ,',i)))
+ ,x)))
+ , at body
+ ,(ecase mode
+ (:intern
+ `(intern-name ,b 0 ,i))
+ (:copy
+ `(let ((,rod (make-rod ,i)))
+ (while (not (%= ,i 0))
+ (setf ,i (%- ,i 1))
+ (setf (%rune ,rod ,i)
+ (aref (the (simple-array rune (*)) ,b) ,i)))
+ ,rod))
+ (:raw
+ `(values ,b 0 ,i))
+ )))))
+
+'(defmacro with-rune-collector-aux (scratch collect body mode)
+ (let ((rod (gensym))
+ (n (gensym))
+ (i (gensym))
+ (b (gensym)))
+ `(let ((,n (length ,scratch))
+ (,i 0))
+ (declare (type fixnum ,n ,i))
+ (macrolet
+ ((,collect (x)
+ `((lambda (x)
+ (locally
+ (declare #.*fast*)
+ (when (%>= ,',i ,',n)
+ (setf ,',n (* 2 ,',n))
+ (setf ,',scratch
+ (setf ,',scratch
+ (adjust-array-by-copying ,',scratch ,',n))))
+ (setf (aref (the (simple-array rune (*)) ,',scratch) ,',i) x)
+ (incf ,',i)))
+ ,x)))
+ , at body
+ ,(ecase mode
+ (:intern
+ `(intern-name ,scratch 0 ,i))
+ (:copy
+ `(let ((,rod (make-rod ,i)))
+ (while (%> ,i 0)
+ (setf ,i (%- ,i 1))
+ (setf (%rune ,rod ,i)
+ (aref (the (simple-array rune (*)) ,scratch) ,i)))
+ ,rod))
+ (:raw
+ `(values ,scratch 0 ,i))
+ )))))
+
+(defmacro with-rune-collector ((collect) &body body)
+ `(with-rune-collector-aux *scratch-pad* ,collect ,body :copy))
+
+(defmacro with-rune-collector-2 ((collect) &body body)
+ `(with-rune-collector-aux *scratch-pad-2* ,collect ,body :copy))
+
+(defmacro with-rune-collector-3 ((collect) &body body)
+ `(with-rune-collector-aux *scratch-pad-3* ,collect ,body :copy))
+
+(defmacro with-rune-collector-4 ((collect) &body body)
+ `(with-rune-collector-aux *scratch-pad-4* ,collect ,body :copy))
+
+(defmacro with-rune-collector/intern ((collect) &body body)
+ `(with-rune-collector-aux *scratch-pad* ,collect ,body :intern))
+
+(defmacro with-rune-collector/raw ((collect) &body body)
+ `(with-rune-collector-aux *scratch-pad* ,collect ,body :raw))
+
+#|
+(defmacro while-reading-runes ((reader stream-in) &rest body)
+ ;; Thou shalt not leave body via a non local exit
+ (let ((stream (make-symbol "STREAM"))
+ (rptr (make-symbol "RPTR"))
+ (fptr (make-symbol "FPTR"))
+ (buf (make-symbol "BUF")) )
+ `(let* ((,stream ,stream-in)
+ (,rptr (xstream-read-ptr ,stream))
+ (,fptr (xstream-fill-ptr ,stream))
+ (,buf (xstream-buffer ,stream)))
+ (declare (type fixnum ,rptr ,fptr)
+ (type xstream ,stream))
+ (macrolet ((,reader (res-var)
+ `(cond ((%= ,',rptr ,',fptr)
+ (setf (xstream-read-ptr ,',stream) ,',rptr)
+ (setf ,res-var (xstream-underflow ,',stream))
+ (setf ,',rptr (xstream-read-ptr ,',stream))
+ (setf ,',fptr (xstream-fill-ptr ,',stream))
+ (setf ,',buf (xstream-buffer ,',stream)))
+ (t
+ (setf ,res-var
+ (aref (the (simple-array read-element (*)) ,',buf)
+ (the fixnum ,',rptr)))
+ (setf ,',rptr (%+ ,',rptr 1))))))
+ (prog1
+ (let () .,body)
+ (setf (xstream-read-ptr ,stream) ,rptr) )))))
+|#
+
+;;;; ---------------------------------------------------------------------------
+;;;; DTD
+;;;;
+
+(define-condition xml-parse-error (simple-error) ())
+(define-condition well-formedness-violation (xml-parse-error) ())
+(define-condition validity-error (xml-parse-error) ())
+
+;; We make some effort to signal end of file as a special condition, but we
+;; don't actually try very hard. Not sure whether we should. Right now I
+;; would prefer not to document this class.
+(define-condition end-of-xstream (well-formedness-violation) ())
+
+(defun describe-xstream (x s)
+ (format s " Line ~D, column ~D in ~A~%"
+ (xstream-line-number x)
+ (xstream-column-number x)
+ (let ((name (xstream-name x)))
+ (cond
+ ((null name)
+ "<anonymous stream>")
+ ((eq :main (stream-name-entity-kind name))
+ (stream-name-uri name))
+ (t
+ name)))))
+
+(defun %error (class stream message)
+ (let* ((zmain (if *ctx* (main-zstream *ctx*) nil))
+ (zstream (if (zstream-p stream) stream zmain))
+ (xstream (if (xstream-p stream) stream nil))
+ (s (make-string-output-stream)))
+ (write-line message s)
+ (when xstream
+ (write-line "Location:" s)
+ (describe-xstream xstream s))
+ (when zstream
+ (let ((stack
+ (remove xstream (remove :stop (zstream-input-stack zstream)))))
+ (when stack
+ (write-line "Context:" s)
+ (dolist (x stack)
+ (describe-xstream x s)))))
+ (when (and zmain (not (eq zstream zmain)))
+ (let ((stack
+ (remove xstream (remove :stop (zstream-input-stack zmain)))))
+ (when stack
+ (write-line "Context in main document:" s)
+ (dolist (x stack)
+ (describe-xstream x s)))))
+ (error class
+ :format-control "~A"
+ :format-arguments (list (get-output-stream-string s)))))
+
+(defun validity-error (fmt &rest args)
+ (%error 'validity-error
+ nil
+ (format nil "Document not valid: ~?" fmt args)))
+
+(defun wf-error (stream fmt &rest args)
+ (%error 'well-formedness-violation
+ stream
+ (format nil "Document not well-formed: ~?" fmt args)))
+
+(defun eox (stream &optional x &rest args)
+ (%error 'end-of-xstream
+ stream
+ (format nil "End of file~@[: ~?~]" x args)))
+
+(defvar *validate* t)
+(defvar *external-subset-p* nil)
+
+(defun validate-start-element (ctx name)
+ (when *validate*
+ (let* ((pair (car (model-stack ctx)))
+ (newval (funcall (car pair) name)))
+ (unless newval
+ (validity-error "(03) Element Valid: ~A" (rod-string name)))
+ (setf (car pair) newval)
+ (let ((e (find-element name (dtd ctx))))
+ (unless e
+ (validity-error "(03) Element Valid: no definition for ~A"
+ (rod-string name)))
+ (maybe-compile-cspec e)
+ (push (copy-cons (elmdef-compiled-cspec e)) (model-stack ctx))))))
+
+(defun copy-cons (x)
+ (cons (car x) (cdr x)))
+
+(defun validate-end-element (ctx name)
+ (when *validate*
+ (let ((pair (car (model-stack ctx))))
+ (unless (eq (funcall (car pair) nil) t)
+ (validity-error "(03) Element Valid: ~A" (rod-string name)))
+ (pop (model-stack ctx)))))
+
+(defun validate-characters (ctx rod)
+ (when *validate*
+ (let ((pair (car (model-stack ctx))))
+ (unless (funcall (cdr pair) rod)
+ (validity-error "(03) Element Valid: unexpected PCDATA")))))
+
+(defun standalone-check-necessary-p (def)
+ (and *validate*
+ (standalone-p *ctx*)
+ (etypecase def
+ (elmdef (elmdef-external-p def))
+ (attdef (attdef-external-p def)))))
+
+;; attribute validation, defaulting, and normalization -- except for for
+;; uniqueness checks, which are done after namespaces have been declared
+(defun process-attributes (ctx name attlist)
+ (let ((e (find-element name (dtd ctx))))
+ (cond
+ (e
+ (dolist (ad (elmdef-attributes e)) ;handle default values
+ (unless (get-attribute (attdef-name ad) attlist)
+ (case (attdef-default ad)
+ (:IMPLIED)
+ (:REQUIRED
+ (when *validate*
+ (validity-error "(18) Required Attribute: ~S not specified"
+ (rod-string (attdef-name ad)))))
+ (t
+ (when (standalone-check-necessary-p ad)
+ (validity-error "(02) Standalone Document Declaration: missing attribute value"))
+ (push (sax:make-attribute :qname (attdef-name ad)
+ :value (cadr (attdef-default ad))
+ :specified-p nil)
+ attlist)))))
+ (dolist (a attlist) ;normalize non-CDATA values
+ (let* ((qname (sax:attribute-qname a))
+ (adef (find-attribute e qname)))
+ (when adef
+ (when (and *validate*
+ sax:*namespace-processing*
+ (eq (attdef-type adef) :ID)
+ (find #/: (sax:attribute-value a)))
+ (validity-error "colon in ID attribute"))
+ (unless (eq (attdef-type adef) :CDATA)
+ (let ((canon (canon-not-cdata-attval (sax:attribute-value a))))
+ (when (and (standalone-check-necessary-p adef)
+ (not (rod= (sax:attribute-value a) canon)))
+ (validity-error "(02) Standalone Document Declaration: attribute value not normalized"))
+ (setf (sax:attribute-value a) canon))))))
+ (when *validate* ;maybe validate attribute values
+ (dolist (a attlist)
+ (validate-attribute ctx e a))))
+ ((and *validate* attlist)
+ (validity-error "(04) Attribute Value Type: no definition for element ~A"
+ (rod-string name)))))
+ attlist)
+
+(defun get-attribute (name attributes)
+ (member name attributes :key #'sax:attribute-qname :test #'rod=))
+
+(defun validate-attribute (ctx e a)
+ (when (sax:attribute-specified-p a) ;defaults checked by DEFINE-ATTRIBUTE
+ (let* ((qname (sax:attribute-qname a))
+ (adef
+ (or (find-attribute e qname)
+ (validity-error "(04) Attribute Value Type: not declared: ~A"
+ (rod-string qname)))))
+ (validate-attribute* ctx adef (sax:attribute-value a)))))
+
+(defun validate-attribute* (ctx adef value)
+ (let ((type (attdef-type adef))
+ (default (attdef-default adef)))
+ (when (and (listp default)
+ (eq (car default) :FIXED)
+ (not (rod= value (cadr default))))
+ (validity-error "(20) Fixed Attribute Default: expected ~S but got ~S"
+ (rod-string (cadr default))
+ (rod-string value)))
+ (ecase (if (listp type) (car type) type)
+ (:ID
+ (unless (valid-name-p value)
+ (validity-error "(08) ID: not a name: ~S" (rod-string value)))
+ (when (eq (gethash value (id-table ctx)) t)
+ (validity-error "(08) ID: ~S not unique" (rod-string value)))
+ (setf (gethash value (id-table ctx)) t))
+ (:IDREF
+ (validate-idref ctx value))
+ (:IDREFS
+ (let ((names (split-names value)))
+ (unless names
+ (validity-error "(11) IDREF: malformed names"))
+ (mapc (curry #'validate-idref ctx) names)))
+ (:NMTOKEN
+ (validate-nmtoken value))
+ (:NMTOKENS
+ (let ((tokens (split-names value)))
+ (unless tokens
+ (validity-error "(13) Name Token: malformed NMTOKENS"))
+ (mapc #'validate-nmtoken tokens)))
+ (:ENUMERATION
+ (unless (member value (cdr type) :test #'rod=)
+ (validity-error "(17) Enumeration: value not declared: ~S"
+ (rod-string value))))
+ (:NOTATION
+ (unless (member value (cdr type) :test #'rod=)
+ (validity-error "(14) Notation Attributes: ~S" (rod-string value))))
+ (:ENTITY
+ (validate-entity value))
+ (:ENTITIES
+ (let ((names (split-names value)))
+ (unless names
+ (validity-error "(13) Name Token: malformed NMTOKENS"))
+ (mapc #'validate-entity names)))
+ (:CDATA))))
+
+(defun validate-idref (ctx value)
+ (unless (valid-name-p value)
+ (validity-error "(11) IDREF: not a name: ~S" (rod-string value)))
+ (unless (gethash value (id-table ctx))
+ (setf (gethash value (id-table ctx)) nil)))
+
+(defun validate-nmtoken (value)
+ (unless (valid-nmtoken-p value)
+ (validity-error "(13) Name Token: not a NMTOKEN: ~S"
+ (rod-string value))))
+
+(defstruct (entdef (:constructor)))
+
+(defstruct (internal-entdef
+ (:include entdef)
+ (:constructor make-internal-entdef (value))
+ (:conc-name #:entdef-))
+ (value (error "missing argument") :type rod)
+ (expansion nil)
+ (external-subset-p *external-subset-p*))
+
+(defstruct (external-entdef
+ (:include entdef)
+ (:constructor make-external-entdef (extid ndata))
+ (:conc-name #:entdef-))
+ (extid (error "missing argument") :type extid)
+ (ndata nil :type (or rod null)))
+
+(defun validate-entity (value)
+ (unless (valid-name-p value)
+ (validity-error "(12) Entity Name: not a name: ~S" (rod-string value)))
+ (let ((def (let ((*validate*
+ ;; Similarly the entity refs are internal and
+ ;; don't need normalization ... the unparsed
+ ;; entities (and entities) aren't "references"
+ ;; -- sun/valid/sa03.xml
+ nil))
+ (get-entity-definition value :general (dtd *ctx*)))))
+ (unless (and (typep def 'external-entdef) (entdef-ndata def))
+ ;; unparsed entity
+ (validity-error "(12) Entity Name: ~S" (rod-string value)))))
+
+(defun split-names (rod)
+ (flet ((whitespacep (x)
+ (or (rune= x #/U+0009)
+ (rune= x #/U+000A)
+ (rune= x #/U+000D)
+ (rune= x #/U+0020))))
+ (if (let ((n (length rod)))
+ (and (not (zerop n))
+ (or (whitespacep (rune rod 0))
+ (whitespacep (rune rod (1- n))))))
+ nil
+ (split-sequence-if #'whitespacep rod :remove-empty-subseqs t))))
+
+(defun zstream-base-sysid (zstream)
+ (let ((base-sysid
+ (dolist (k (zstream-input-stack zstream))
+ (let ((base-sysid (stream-name-uri (xstream-name k))))
+ (when base-sysid (return base-sysid))))))
+ base-sysid))
+
+(defun absolute-uri (sysid source-stream)
+ (let ((base-sysid (zstream-base-sysid source-stream)))
+ ;; XXX is the IF correct?
+ (if base-sysid
+ (puri:merge-uris sysid base-sysid)
+ sysid)))
+
+(defstruct (extid (:constructor make-extid (public system)))
+ (public nil :type (or rod null))
+ (system (error "missing argument") :type (or puri:uri null)))
+
+(defun absolute-extid (source-stream extid)
+ (let ((sysid (extid-system extid))
+ (result (copy-extid extid)))
+ (setf (extid-system result) (absolute-uri sysid source-stream))
+ result))
+
+(defun define-entity (source-stream name kind def)
+ (setf name (intern-name name))
+ (when (and sax:*namespace-processing* (find #/: name))
+ (wf-error source-stream "colon in entity name"))
+ (let ((table
+ (ecase kind
+ (:general (dtd-gentities (dtd *ctx*)))
+ (:parameter (dtd-pentities (dtd *ctx*))))))
+ (unless (gethash name table)
+ (when (and source-stream (handler *ctx*))
+ (report-entity (handler *ctx*) kind name def))
+ (when (typep def 'external-entdef)
+ (setf (entdef-extid def)
+ (absolute-extid source-stream (entdef-extid def))))
+ (setf (gethash name table)
+ (cons *external-subset-p* def)))))
+
+(defun get-entity-definition (entity-name kind dtd)
+ (unless dtd
+ (wf-error nil "entity not defined: ~A" (rod-string entity-name)))
+ (destructuring-bind (extp &rest def)
+ (gethash entity-name
+ (ecase kind
+ (:general (dtd-gentities dtd))
+ (:parameter (dtd-pentities dtd)))
+ '(nil))
+ (when (and *validate* (standalone-p *ctx*) extp)
+ (validity-error "(02) Standalone Document Declaration: entity reference: ~S"
+ (rod-string entity-name)))
+ def))
+
+(defun entity->xstream (zstream entity-name kind &optional internalp)
+ ;; `zstream' is for error messages
+ (let ((def (get-entity-definition entity-name kind (dtd *ctx*))))
+ (unless def
+ (wf-error zstream "Entity '~A' is not defined." (rod-string entity-name)))
+ (let (r)
+ (etypecase def
+ (internal-entdef
+ (when (and (standalone-p *ctx*)
+ (entdef-external-subset-p def))
+ (wf-error
+ zstream
+ "entity declared in external subset, but document is standalone"))
+ (setf r (make-rod-xstream (entdef-value def)))
+ (setf (xstream-name r)
+ (make-stream-name :entity-name entity-name
+ :entity-kind kind
+ :uri nil)))
+ (external-entdef
+ (when internalp
+ (wf-error zstream
+ "entity not internal: ~A" (rod-string entity-name)))
+ (when (entdef-ndata def)
+ (wf-error zstream
+ "reference to unparsed entity: ~A"
+ (rod-string entity-name)))
+ (setf r (xstream-open-extid (extid-using-catalog (entdef-extid def))))
+ (setf (stream-name-entity-name (xstream-name r)) entity-name
+ (stream-name-entity-kind (xstream-name r)) kind)))
+ r)))
+
+(defun checked-get-entdef (name type)
+ (let ((def (get-entity-definition name type (dtd *ctx*))))
+ (unless def
+ (wf-error nil "Entity '~A' is not defined." (rod-string name)))
+ def))
+
+(defun xstream-open-extid (extid)
+ (let* ((sysid (extid-system extid))
+ (stream
+ (or (funcall (or (entity-resolver *ctx*) (constantly nil))
+ (extid-public extid)
+ (extid-system extid))
+ (open (uri-to-pathname sysid)
+ :element-type '(unsigned-byte 8)
+ :direction :input))))
+ (make-xstream stream
+ :name (make-stream-name :uri sysid)
+ :initial-speed 1)))
+
+(defun call-with-entity-expansion-as-stream (zstream cont name kind internalp)
+ ;; `zstream' is for error messages
+ (let ((in (entity->xstream zstream name kind internalp)))
+ (unwind-protect
+ (funcall cont in)
+ (close-xstream in))))
+
+(defun ensure-dtd ()
+ (unless (dtd *ctx*)
+ (setf (dtd *ctx*) (make-dtd))
+ (define-default-entities)))
+
+(defun define-default-entities ()
+ (define-entity nil #"lt" :general (make-internal-entdef #"<"))
+ (define-entity nil #"gt" :general (make-internal-entdef #">"))
+ (define-entity nil #"amp" :general (make-internal-entdef #"&"))
+ (define-entity nil #"apos" :general (make-internal-entdef #"'"))
+ (define-entity nil #"quot" :general (make-internal-entdef #"\"")))
+
+(defstruct attdef
+ ;; an attribute definition
+ element ;name of element this attribute belongs to
+ name ;name of attribute
+ type ;type of attribute; either one of :CDATA, :ID, :IDREF, :IDREFS,
+ ; :ENTITY, :ENTITIES, :NMTOKEN, :NMTOKENS, or
+ ; (:NOTATION <name>*)
+ ; (:ENUMERATION <name>*)
+ default ;default value of attribute:
+ ; :REQUIRED, :IMPLIED, (:FIXED content) or (:DEFAULT content)
+ (external-p *external-subset-p*)
+ )
+
+(defstruct elmdef
+ ;; an element definition
+ name ;name of the element
+ content ;content model [*]
+ attributes ;list of defined attributes
+ compiled-cspec ;cons of validation function for contentspec
+ (external-p *external-subset-p*)
+ )
+
+;; [*] in XML it is possible to define attributes before the element
+;; itself is defined and since we hang attribute definitions into the
+;; relevant element definitions, the `content' slot indicates whether an
+;; element was actually defined. It is NIL until set to a content model
+;; when the element type declaration is processed.
+
+(defun %make-rod-hash-table ()
+ ;; XXX with portable hash tables, this is the only way to case-sensitively
+ ;; use rods. However, EQUALP often has horrible performance! Most Lisps
+ ;; provide extensions for user-defined equality, we should use them! There
+ ;; is also a home-made hash table for rods defined below, written by
+ ;; Gilbert (I think). We could also use that one, but I would prefer the
+ ;; first method, even if it's unportable.
+ (make-hash-table :test
+ #+rune-is-character 'equal
+ #-rune-is-character 'equalp))
+
+(defstruct dtd
+ (elements (%make-rod-hash-table)) ;elmdefs
+ (gentities (%make-rod-hash-table)) ;general entities
+ (pentities (%make-rod-hash-table)) ;parameter entities
+ (notations (%make-rod-hash-table)))
+
+(defun make-dtd-cache ()
+ (puri:make-uri-space))
+
+(defvar *cache-all-dtds* nil)
+(defvar *dtd-cache* (make-dtd-cache))
+
+(defun remdtd (uri dtd-cache)
+ (setf uri (puri:intern-uri uri dtd-cache))
+ (prog1
+ (and (getf (puri:uri-plist uri) 'dtd) t)
+ (puri:unintern-uri uri dtd-cache)))
+
+(defun clear-dtd-cache (dtd-cache)
+ (puri:unintern-uri t dtd-cache))
+
+(defun getdtd (uri dtd-cache)
+ (getf (puri:uri-plist (puri:intern-uri uri dtd-cache)) 'dtd))
+
+(defun (setf getdtd) (newval uri dtd-cache)
+ (setf (getf (puri:uri-plist (puri:intern-uri uri dtd-cache)) 'dtd) newval)
+ newval)
+
+
+;;;;
+
+(defun find-element (name dtd)
+ (gethash name (dtd-elements dtd)))
+
+(defun define-element (dtd element-name &optional content-model)
+ (let ((e (find-element element-name dtd)))
+ (cond
+ ((null e)
+ (setf (gethash element-name (dtd-elements dtd))
+ (make-elmdef :name element-name :content content-model)))
+ ((null content-model)
+ e)
+ (t
+ (when *validate*
+ (when (elmdef-content e)
+ (validity-error "(05) Unique Element Type Declaration"))
+ (when (eq content-model :EMPTY)
+ (dolist (ad (elmdef-attributes e))
+ (let ((type (attdef-type ad)))
+ (when (and (listp type) (eq (car type) :NOTATION))
+ (validity-error "(16) No Notation on Empty Element: ~S"
+ (rod-string element-name)))))))
+ (sax:element-declaration (handler *ctx*) element-name content-model)
+ (setf (elmdef-content e) content-model)
+ (setf (elmdef-external-p e) *external-subset-p*)
+ e))))
+
+(defvar *redefinition-warning* nil)
+
+(defun define-attribute (dtd element name type default)
+ (let ((adef (make-attdef :element element
+ :name name
+ :type type
+ :default default))
+ (e (or (find-element element dtd)
+ (define-element dtd element))))
+ (when (and *validate* (listp default))
+ (unless (eq (attdef-type adef) :CDATA)
+ (setf (second default) (canon-not-cdata-attval (second default))))
+ (validate-attribute* *ctx* adef (second default)))
+ (cond ((find-attribute e name)
+ (when *redefinition-warning*
+ (warn "Attribute \"~A\" of \"~A\" not redefined."
+ (rod-string name)
+ (rod-string element))))
+ (t
+ (when *validate*
+ (when (eq type :ID)
+ (when (find :ID (elmdef-attributes e) :key #'attdef-type)
+ (validity-error "(09) One ID per Element Type: element ~A"
+ (rod-string element)))
+ (unless (member default '(:REQUIRED :IMPLIED))
+ (validity-error "(10) ID Attribute Default: ~A"
+ (rod-string element))))
+ (flet ((notationp (type)
+ (and (listp type) (eq (car type) :NOTATION))))
+ (when (notationp type)
+ (when (find-if #'notationp (elmdef-attributes e)
+ :key #'attdef-type)
+ (validity-error "(15) One Notation Per Element Type: ~S"
+ (rod-string element)))
+ (when (eq (elmdef-content e) :EMPTY)
+ (validity-error "(16) No Notation on Empty Element: ~S"
+ (rod-string element))))))
+ (sax:attribute-declaration (handler *ctx*) element name type default)
+ (push adef (elmdef-attributes e))))))
+
+(defun find-attribute (elmdef name)
+ (find name (elmdef-attributes elmdef) :key #'attdef-name :test #'rod=))
+
+(defun define-notation (dtd name id)
+ (let ((ns (dtd-notations dtd)))
+ (when (gethash name ns)
+ (validity-error "(24) Unique Notation Name: ~S" (rod-string name)))
+ (setf (gethash name ns) id)))
+
+(defun find-notation (name dtd)
+ (gethash name (dtd-notations dtd)))
+
+;;;; ---------------------------------------------------------------------------
+;;;; z streams and lexer
+;;;;
+
+(defstruct zstream
+ token-category
+ token-semantic
+ input-stack)
+
+(defun read-token (input)
+ (cond ((zstream-token-category input)
+ (multiple-value-prog1
+ (values (zstream-token-category input)
+ (zstream-token-semantic input))
+ (setf (zstream-token-category input) nil
+ (zstream-token-semantic input) nil)))
+ (t
+ (read-token-2 input))))
+
+(defun peek-token (input)
+ (cond ((zstream-token-category input)
+ (values
+ (zstream-token-category input)
+ (zstream-token-semantic input)))
+ (t
+ (multiple-value-bind (c s) (read-token input)
+ (setf (zstream-token-category input) c
+ (zstream-token-semantic input) s))
+ (values (zstream-token-category input)
+ (zstream-token-semantic input)))))
+
+(defun read-token-2 (input)
+ (cond ((null (zstream-input-stack input))
+ (values :eof nil))
+ (t
+ (let ((c (peek-rune (car (zstream-input-stack input)))))
+ (cond ((eq c :eof)
+ (cond ((eq (cadr (zstream-input-stack input)) :stop)
+ (values :eof nil))
+ (t
+ (close-xstream (pop (zstream-input-stack input)))
+ (if (null (zstream-input-stack input))
+ (values :eof nil)
+ (values :S nil) ;fake #x20 after PE expansion
+ ))))
+ (t
+ (read-token-3 input)))))))
+
+(defvar *data-behaviour*
+ ) ;either :DTD or :DOC
+
+(defun read-token-3 (zinput)
+ (let ((input (car (zstream-input-stack zinput))))
+ ;; PI Comment
+ (let ((c (read-rune input)))
+ (cond
+ ;; first the common tokens
+ ((rune= #/< c)
+ (read-token-after-|<| zinput input))
+ ;; now dispatch
+ (t
+ (ecase *data-behaviour*
+ (:DTD
+ (cond ((rune= #/\[ c) :\[)
+ ((rune= #/\] c) :\])
+ ((rune= #/\( c) :\()
+ ((rune= #/\) c) :\))
+ ((rune= #/\| c) :\|)
+ ((rune= #/\> c) :\>)
+ ((rune= #/\" c) :\")
+ ((rune= #/\' c) :\')
+ ((rune= #/\, c) :\,)
+ ((rune= #/\? c) :\?)
+ ((rune= #/\* c) :\*)
+ ((rune= #/\+ c) :\+)
+ ((name-rune-p c)
+ (unread-rune c input)
+ (values :nmtoken (read-name-token input)))
+ ((rune= #/# c)
+ (let ((q (read-name-token input)))
+ (cond ((rod= q '#.(string-rod "REQUIRED")) :|#REQUIRED|)
+ ((rod= q '#.(string-rod "IMPLIED")) :|#IMPLIED|)
+ ((rod= q '#.(string-rod "FIXED")) :|#FIXED|)
+ ((rod= q '#.(string-rod "PCDATA")) :|#PCDATA|)
+ (t
+ (wf-error zinput "Unknown token: ~S." q)))))
+ ((or (rune= c #/U+0020)
+ (rune= c #/U+0009)
+ (rune= c #/U+000D)
+ (rune= c #/U+000A))
+ (values :S nil))
+ ((rune= #/% c)
+ (cond ((name-start-rune-p (peek-rune input))
+ ;; an entity reference
+ (read-pe-reference zinput))
+ (t
+ (values :%))))
+ (t
+ (wf-error zinput "Unexpected character ~S." c))))
+ (:DOC
+ (cond
+ ((rune= c #/&)
+ (multiple-value-bind (kind data) (read-entity-like input)
+ (cond ((eq kind :ENTITY-REFERENCE)
+ (values :ENTITY-REF data))
+ ((eq kind :CHARACTER-REFERENCE)
+ (values :CDATA
+ (with-rune-collector (collect)
+ (%put-unicode-char data collect)))))))
+ (t
+ (unread-rune c input)
+ (values :CDATA (read-cdata input)))))))))))
+
+(definline check-rune (input actual expected)
+ (unless (eql actual expected)
+ (wf-error input "expected #/~A but found #/~A"
+ (rune-char expected)
+ (rune-char actual))))
+
+(defun read-pe-reference (zinput)
+ (let* ((input (car (zstream-input-stack zinput)))
+ (nam (read-name-token input)))
+ (check-rune input #/\; (read-rune input))
+ (cond (*expand-pe-p*
+ ;; no external entities here!
+ (let ((i2 (entity->xstream zinput nam :parameter)))
+ (zstream-push i2 zinput))
+ (values :S nil) ;space before inserted PE expansion.
+ )
+ (t
+ (values :PE-REFERENCE nam)) )))
+
+(defun read-token-after-|<| (zinput input)
+ (let ((d (read-rune input)))
+ (cond ((eq d :eof)
+ (eox input "EOF after '<'"))
+ ((rune= #/! d)
+ (read-token-after-|<!| input))
+ ((rune= #/? d)
+ (multiple-value-bind (target content) (read-pi input)
+ (cond ((rod= target '#.(string-rod "xml"))
+ (values :xml-decl (cons target content)))
+ ((rod-equal target '#.(string-rod "XML"))
+ (wf-error zinput
+ "You lost -- no XML processing instructions."))
+ ((and sax:*namespace-processing* (position #/: target))
+ (wf-error zinput
+ "Processing instruction target ~S is not a ~
+ valid NcName."
+ (mu target)))
+ (t
+ (values :PI (cons target content))))))
+ ((rune= #// d)
+ (let ((c (peek-rune input)))
+ (cond ((name-start-rune-p c)
+ (read-tag-2 zinput input :etag))
+ (t
+ (wf-error zinput
+ "Expecting name start rune after \"</\".")))))
+ ((name-start-rune-p d)
+ (unread-rune d input)
+ (read-tag-2 zinput input :stag))
+ (t
+ (wf-error zinput "Expected '!' or '?' after '<' in DTD.")))))
+
+(defun read-token-after-|<!| (input)
+ (let ((d (read-rune input)))
+ (cond ((eq d :eof)
+ (eox input "EOF after \"<!\"."))
+ ((name-start-rune-p d)
+ (unread-rune d input)
+ (let ((name (read-name-token input)))
+ (cond ((rod= name '#.(string-rod "ELEMENT")) :|<!ELEMENT|)
+ ((rod= name '#.(string-rod "ENTITY")) :|<!ENTITY|)
+ ((rod= name '#.(string-rod "ATTLIST")) :|<!ATTLIST|)
+ ((rod= name '#.(string-rod "NOTATION")) :|<!NOTATION|)
+ ((rod= name '#.(string-rod "DOCTYPE")) :|<!DOCTYPE|)
+ (t
+ (wf-error input"`<!~A' unknown." (rod-string name))))))
+ ((rune= #/\[ d)
+ (values :|<![| nil))
+ ((rune= #/- d)
+ (setf d (read-rune input))
+ (cond ((rune= #/- d)
+ (values
+ :COMMENT
+ (read-comment-content input)))
+ (t
+ (wf-error input"Bad character ~S after \"<!-\"" d))))
+ (t
+ (wf-error input "Bad character ~S after \"<!\"" d)))))
+
+(definline read-S? (input)
+ (while (member (peek-rune input) '(#/U+0020 #/U+0009 #/U+000A #/U+000D)
+ :test #'eql)
+ (consume-rune input)))
+
+(defun read-attribute-list (zinput input imagine-space-p)
+ (cond ((or imagine-space-p
+ (let ((c (peek-rune input)))
+ (and (not (eq c :eof))
+ (space-rune-p c))))
+ (read-S? input)
+ (cond ((eq (peek-rune input) :eof)
+ nil)
+ ((name-start-rune-p (peek-rune input))
+ (cons (read-attribute zinput input)
+ (read-attribute-list zinput input nil)))
+ (t
+ nil)))
+ (t
+ nil)))
+
+(defun read-entity-like (input)
+ "Read an entity reference off the xstream `input'. Returns two values:
+ either :ENTITY-REFERENCE <interned-rod> in case of a named entity
+ or :CHARACTER-REFERENCE <integer> in case of character references.
+ The initial #\\& is considered to be consumed already."
+ (let ((c (peek-rune input)))
+ (cond ((eq c :eof)
+ (eox input "EOF after '&'"))
+ ((rune= c #/#)
+ (values :CHARACTER-REFERENCE (read-character-reference input)))
+ (t
+ (unless (name-start-rune-p (peek-rune input))
+ (wf-error input "Expecting name after &."))
+ (let ((name (read-name-token input)))
+ (setf c (read-rune input))
+ (unless (rune= c #/\;)
+ (wf-error input "Expected \";\"."))
+ (values :ENTITY-REFERENCE name))))))
+
+(defun read-tag-2 (zinput input kind)
+ (let ((name (read-name-token input))
+ (atts nil))
+ (setf atts (read-attribute-list zinput input nil))
+
+ ;; check for double attributes
+ (do ((q atts (cdr q)))
+ ((null q))
+ (cond ((find (caar q) (cdr q) :key #'car)
+ (wf-error zinput "Attribute ~S has two definitions in element ~S."
+ (rod-string (caar q))
+ (rod-string name)))))
+
+ (cond ((eq (peek-rune input) #/>)
+ (consume-rune input)
+ (values kind (cons name atts)))
+ ((eq (peek-rune input) #//)
+ (consume-rune input)
+ (check-rune input #/> (read-rune input))
+ (values :ztag (cons name atts)))
+ (t
+ (wf-error zinput "syntax error in read-tag-2.")) )))
+
+(defun read-attribute (zinput input)
+ (unless (name-start-rune-p (peek-rune input))
+ (wf-error zinput "Expected name."))
+ ;; arg thanks to the post mortem nature of name space declarations,
+ ;; we could only process the attribute values post mortem.
+ (let ((name (read-name-token input)))
+ (while (let ((c (peek-rune input)))
+ (and (not (eq c :eof))
+ (or (rune= c #/U+0020)
+ (rune= c #/U+0009)
+ (rune= c #/U+000A)
+ (rune= c #/U+000D))))
+ (consume-rune input))
+ (unless (eq (read-rune input) #/=)
+ (wf-error zinput "Expected \"=\"."))
+ (while (let ((c (peek-rune input)))
+ (and (not (eq c :eof))
+ (or (rune= c #/U+0020)
+ (rune= c #/U+0009)
+ (rune= c #/U+000A)
+ (rune= c #/U+000D))))
+ (consume-rune input))
+ (cons name (read-att-value-2 input))))
+
+(defun canon-not-cdata-attval (value)
+ ;; | If the declared value is not CDATA, then the XML processor must
+ ;; | further process the normalized attribute value by discarding any
+ ;; | leading and trailing space (#x20) characters, and by replacing
+ ;; | sequences of space (#x20) characters by a single space (#x20)
+ ;; | character.
+ (with-rune-collector (collect)
+ (let ((gimme-20 nil)
+ (anything-seen-p nil))
+ (map nil (lambda (c)
+ (cond ((rune= c #/u+0020)
+ (setf gimme-20 t))
+ (t
+ (when (and anything-seen-p gimme-20)
+ (collect #/u+0020))
+ (setf gimme-20 nil)
+ (setf anything-seen-p t)
+ (collect c))))
+ value))))
+
+(definline data-rune-p (rune)
+ ;; any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
+ ;;
+ ;; FIXME: das halte ich fuer verkehrt. Surrogates als Unicode-Zeichen
+ ;; sind verboten. Das liegt hier aber nicht vor, denn wir arbeiten
+ ;; ja tatsaechlich mit UTF-16. Verboten ist es nur, wenn wir ein
+ ;; solches Zeichen beim Dekodieren finden, das wird aber eben
+ ;; in encodings.lisp bereits geprueft. --david
+ (let ((c (rune-code rune)))
+ (or (= c #x9) (= c #xA) (= c #xD)
+ (<= #x20 c #xD7FF)
+ (<= #xE000 c #xFFFD)
+ (<= #xD800 c #xDBFF)
+ (<= #xDC00 c #xDFFF))))
+
+(defun read-att-value (zinput input mode &optional canon-space-p (delim nil))
+ (with-rune-collector-2 (collect)
+ (labels ((muffle (input delim)
+ (let (c)
+ (loop
+ (setf c (read-rune input))
+ (cond ((eql delim c)
+ (return))
+ ((eq c :eof)
+ (eox input "EOF"))
+ ((rune= c #/&)
+ (setf c (peek-rune input))
+ (cond ((eql c :eof)
+ (eox input))
+ ((rune= c #/#)
+ (let ((c (read-character-reference input)))
+ (%put-unicode-char c collect)))
+ (t
+ (unless (name-start-rune-p (peek-rune input))
+ (wf-error zinput "Expecting name after &."))
+ (let ((name (read-name-token input)))
+ (setf c (read-rune input))
+ (check-rune input c #/\;)
+ (ecase mode
+ (:ATT
+ (recurse-on-entity
+ zinput name :general
+ (lambda (zinput)
+ (muffle (car (zstream-input-stack zinput))
+ :eof))
+ t))
+ (:ENT
+ ;; bypass, but never the less we
+ ;; need to check for legal
+ ;; syntax.
+ ;; Must it be defined?
+ ;; allerdings: unparsed sind verboten
+ (collect #/&)
+ (map nil (lambda (x) (collect x)) name)
+ (collect #/\; )))))))
+ ((and (eq mode :ENT) (rune= c #/%))
+ (let ((d (peek-rune input)))
+ (when (eq d :eof)
+ (eox input))
+ (unless (name-start-rune-p d)
+ (wf-error zinput "Expecting name after %.")))
+ (let ((name (read-name-token input)))
+ (setf c (read-rune input))
+ (check-rune input c #/\;)
+ (cond (*expand-pe-p*
+ (recurse-on-entity
+ zinput name :parameter
+ (lambda (zinput)
+ (muffle (car (zstream-input-stack zinput))
+ :eof))))
+ (t
+ (wf-error zinput "No PE here.")))))
+ ((and (eq mode :ATT) (rune= c #/<))
+ (wf-error zinput "unexpected #\/<"))
+ ((and canon-space-p (space-rune-p c))
+ (collect #/space))
+ ((not (data-rune-p c))
+ (wf-error zinput "illegal char: ~S." c))
+ (t
+ (collect c)))))))
+ (declare (dynamic-extent #'muffle))
+ (muffle input (or delim
+ (let ((delim (read-rune input)))
+ (unless (member delim '(#/\" #/\') :test #'eql)
+ (wf-error zinput "invalid attribute delimiter"))
+ delim))))))
+
+(defun read-character-reference (input)
+ ;; The #/& is already read
+ (let ((res
+ (let ((c (read-rune input)))
+ (check-rune input c #/#)
+ (setq c (read-rune input))
+ (cond ((eql c :eof)
+ (eox input))
+ ((eql c #/x)
+ ;; hexadecimal
+ (setq c (read-rune input))
+ (when (eql c :eof)
+ (eox input))
+ (unless (digit-rune-p c 16)
+ (wf-error input "garbage in character reference"))
+ (prog1
+ (parse-integer
+ (with-output-to-string (sink)
+ (write-char (rune-char c) sink)
+ (while (progn
+ (setq c (read-rune input))
+ (when (eql c :eof)
+ (eox input))
+ (digit-rune-p c 16))
+ (write-char (rune-char c) sink)))
+ :radix 16)
+ (check-rune input c #/\;)))
+ ((rune<= #/0 c #/9)
+ ;; decimal
+ (prog1
+ (parse-integer
+ (with-output-to-string (sink)
+ (write-char (rune-char c) sink)
+ (while (progn
+ (setq c (read-rune input))
+ (when (eql c :eof)
+ (eox input))
+ (rune<= #/0 c #/9))
+ (write-char (rune-char c) sink)))
+ :radix 10)
+ (check-rune input c #/\;)))
+ (t
+ (wf-error input "Bad char in numeric character entity."))))))
+ (unless (code-data-char-p res)
+ (wf-error
+ input
+ "expansion of numeric character reference (#x~X) is no data char."
+ res))
+ res))
+
+(defun read-pi (input)
+ ;; "<?" is already read
+ (let (name)
+ (let ((c (peek-rune input)))
+ (unless (name-start-rune-p c)
+ (wf-error input "Expecting name after '<?'"))
+ (setf name (read-name-token input)))
+ (cond
+ ((member (peek-rune input) '(#/U+0020 #/U+0009 #/U+000A #/U+000D)
+ :test #'eql)
+ (values name (read-pi-content input)))
+ (t
+ (unless (and (eql (read-rune input) #/?)
+ (eql (read-rune input) #/>))
+ (wf-error input "malformed processing instruction"))
+ (values name "")))))
+
+(defun read-pi-content (input)
+ (read-S? input)
+ (let (d)
+ (with-rune-collector (collect)
+ (block nil
+ (tagbody
+ state-1
+ (setf d (read-rune input))
+ (when (eq d :eof)
+ (eox input))
+ (unless (data-rune-p d)
+ (wf-error input "Illegal char: ~S." d))
+ (when (rune= d #/?) (go state-2))
+ (collect d)
+ (go state-1)
+ state-2 ;; #/? seen
+ (setf d (read-rune input))
+ (when (eq d :eof)
+ (eox input))
+ (unless (data-rune-p d)
+ (wf-error input "Illegal char: ~S." d))
+ (when (rune= d #/>) (return))
+ (when (rune= d #/?)
+ (collect #/?)
+ (go state-2))
+ (collect #/?)
+ (collect d)
+ (go state-1))))))
+
+(defun read-comment-content (input &aux d)
+ (with-rune-collector (collect)
+ (block nil
+ (tagbody
+ state-1
+ (setf d (read-rune input))
+ (when (eq d :eof)
+ (eox input))
+ (unless (data-rune-p d)
+ (wf-error input "Illegal char: ~S." d))
+ (when (rune= d #/-) (go state-2))
+ (collect d)
+ (go state-1)
+ state-2 ;; #/- seen
+ (setf d (read-rune input))
+ (when (eq d :eof)
+ (eox input))
+ (unless (data-rune-p d)
+ (wf-error input "Illegal char: ~S." d))
+ (when (rune= d #/-) (go state-3))
+ (collect #/-)
+ (collect d)
+ (go state-1)
+ state-3 ;; #/- #/- seen
+ (setf d (read-rune input))
+ (when (eq d :eof)
+ (eox input))
+ (unless (data-rune-p d)
+ (wf-error input "Illegal char: ~S." d))
+ (when (rune= d #/>) (return))
+ (wf-error input "'--' not allowed in a comment")
+ (when (rune= d #/-)
+ (collect #/-)
+ (go state-3))
+ (collect #/-)
+ (collect #/-)
+ (collect d)
+ (go state-1)))))
+
+(defun read-cdata-sect (input &aux d)
+ ;; <![CDATA[ is already read
+ ;; read anything up to ]]>
+ (with-rune-collector (collect)
+ (block nil
+ (tagbody
+ state-1
+ (setf d (read-rune input))
+ (when (eq d :eof)
+ (eox input))
+ (unless (data-rune-p d)
+ (wf-error input "Illegal char: ~S." d))
+ (when (rune= d #/\]) (go state-2))
+ (collect d)
+ (go state-1)
+ state-2 ;; #/] seen
+ (setf d (read-rune input))
+ (when (eq d :eof)
+ (eox input))
+ (unless (data-rune-p d)
+ (wf-error input "Illegal char: ~S." d))
+ (when (rune= d #/\]) (go state-3))
+ (collect #/\])
+ (collect d)
+ (go state-1)
+ state-3 ;; #/\] #/\] seen
+ (setf d (read-rune input))
+ (when (eq d :eof)
+ (eox input))
+ (unless (data-rune-p d)
+ (wf-error input "Illegal char: ~S." d))
+ (when (rune= d #/>)
+ (return))
+ (when (rune= d #/\])
+ (collect #/\])
+ (go state-3))
+ (collect #/\])
+ (collect #/\])
+ (collect d)
+ (go state-1)))))
+
+;; some character categories
+
+(defun space-rune-p (rune)
+ (declare (type rune rune))
+ (or (rune= rune #/U+0020)
+ (rune= rune #/U+0009)
+ (rune= rune #/U+000A)
+ (rune= rune #/U+000D)))
+
+(defun code-data-char-p (c)
+ ;; any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
+ (or (= c #x9) (= c #xA) (= c #xD)
+ (<= #x20 c #xD7FF)
+ (<= #xE000 c #xFFFD)
+ (<= #x10000 c #x10FFFF)))
+
+(defun pubid-char-p (c)
+ (or (rune= c #/u+0020) (rune= c #/u+000D) (rune= c #/u+000A)
+ (rune<= #/a c #/z)
+ (rune<= #/A c #/Z)
+ (rune<= #/0 c #/9)
+ (member c '(#/- #/' #/\( #/\) #/+ #/, #/. #//
+ #/: #/= #/? #/\; #/! #/* #/#
+ #/@ #/$ #/_ #/%))))
+
+
+(defun expect (input category)
+ (multiple-value-bind (cat sem) (read-token input)
+ (unless (eq cat category)
+ (wf-error input "Expected ~S saw ~S [~S]" category cat sem))
+ (values cat sem)))
+
+(defun consume-token (input)
+ (read-token input))
+
+;;;; ---------------------------------------------------------------------------
+;;;; Parser
+;;;;
+
+(defun p/S (input)
+ ;; S ::= (#x20 | #x9 | #xD | #xA)+
+ (expect input :S)
+ (while (eq (peek-token input) :S)
+ (consume-token input)))
+
+(defun p/S? (input)
+ ;; S ::= (#x20 | #x9 | #xD | #xA)+
+ (while (eq (peek-token input) :S)
+ (consume-token input)))
+
+(defun p/nmtoken (input)
+ (nth-value 1 (expect input :nmtoken)))
+
+(defun p/name (input)
+ (let ((result (p/nmtoken input)))
+ (unless (name-start-rune-p (elt result 0))
+ (wf-error input "Expected name."))
+ result))
+
+(defun p/attlist-decl (input)
+ ;; [52] AttlistDecl ::= '<!ATTLIST' S Name (S AttDef)* S? '>'
+ (let (elm-name)
+ (expect input :|<!ATTLIST|)
+ (p/S input)
+ (setf elm-name (p/nmtoken input))
+ (loop
+ (let ((tok (read-token input)))
+ (case tok
+ (:S
+ (p/S? input)
+ (cond ((eq (peek-token input) :>)
+ (consume-token input)
+ (return))
+ (t
+ (multiple-value-bind (name type default) (p/attdef input)
+ (define-attribute (dtd *ctx*) elm-name name type default)) )))
+ (:>
+ (return))
+ (otherwise
+ (wf-error input
+ "Expected either another AttDef or end of \"<!ATTLIST\". -- saw ~S."
+ tok)))))))
+
+(defun p/attdef (input)
+ ;; [53] AttDef ::= Name S AttType S DefaultDecl
+ (let (name type default)
+ (setf name (p/nmtoken input))
+ (p/S input)
+ (setf type (p/att-type input))
+ (p/S input)
+ (setf default (p/default-decl input))
+ (values name type default)))
+
+(defun p/list (input item-parser delimiter)
+ ;; Parse something like S? <item> (S? <delimiter> <item>)* S?
+ ;;
+ (declare (type function item-parser))
+ (let (res)
+ (p/S? input)
+ (setf res (list (funcall item-parser input)))
+ (loop
+ (p/S? input)
+ (cond ((eq (peek-token input) delimiter)
+ (consume-token input)
+ (p/S? input)
+ (push (funcall item-parser input) res))
+ (t
+ (return))))
+ (p/S? input)
+ (reverse res)))
+
+(defun p/att-type (input)
+ ;; [54] AttType ::= StringType | TokenizedType | EnumeratedType
+ ;; [55] StringType ::= 'CDATA'
+ ;; [56] TokenizedType ::= 'ID' /*VC: ID */
+ ;; /*VC: One ID per Element Type */
+ ;; /*VC: ID Attribute Default */
+ ;; | 'IDREF' /*VC: IDREF */
+ ;; | 'IDREFS' /*VC: IDREF */
+ ;; | 'ENTITY' /*VC: Entity Name */
+ ;; | 'ENTITIES' /*VC: Entity Name */
+ ;; | 'NMTOKEN' /*VC: Name Token */
+ ;; | 'NMTOKENS' /*VC: Name Token */
+ ;; [57] EnumeratedType ::= NotationType | Enumeration
+ ;; [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
+ ;; /* VC: Notation Attributes */
+ ;; [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')' /* VC: Enumeration */
+ (multiple-value-bind (cat sem) (read-token input)
+ (cond ((eq cat :nmtoken)
+ (cond ((rod= sem '#.(string-rod "CDATA")) :CDATA)
+ ((rod= sem '#.(string-rod "ID")) :ID)
+ ((rod= sem '#.(string-rod "IDREF")) :IDREFS)
+ ((rod= sem '#.(string-rod "IDREFS")) :IDREFS)
+ ((rod= sem '#.(string-rod "ENTITY")) :ENTITY)
+ ((rod= sem '#.(string-rod "ENTITIES")) :ENTITIES)
+ ((rod= sem '#.(string-rod "NMTOKEN")) :NMTOKEN)
+ ((rod= sem '#.(string-rod "NMTOKENS")) :NMTOKENS)
+ ((rod= sem '#.(string-rod "NOTATION"))
+ (let (names)
+ (p/S input)
+ (expect input :\()
+ (setf names (p/list input #'p/nmtoken :\| ))
+ (expect input :\))
+ (when *validate*
+ (setf (referenced-notations *ctx*)
+ (append names (referenced-notations *ctx*))))
+ (cons :NOTATION names)))
+ (t
+ (wf-error input "In p/att-type: ~S ~S." cat sem))))
+ ((eq cat :\()
+ ;; XXX Die Nmtoken-Syntax pruefen wir derzeit nur beim Validieren.
+ (let (names)
+ ;;(expect input :\()
+ (setf names (p/list input #'p/nmtoken :\| ))
+ (expect input :\))
+ (cons :ENUMERATION names)))
+ (t
+ (wf-error input "In p/att-type: ~S ~S." cat sem)) )))
+
+(defun p/default-decl (input)
+ ;; [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED'
+ ;; | (('#FIXED' S)? AttValue) /* VC: Required Attribute */
+ ;;
+ ;; /* VC: Attribute Default Legal */
+ ;; /* WFC: No < in Attribute Values */
+ ;; /* VC: Fixed Attribute Default */
+ (multiple-value-bind (cat sem) (peek-token input)
+ (cond ((eq cat :|#REQUIRED|)
+ (consume-token input) :REQUIRED)
+ ((eq cat :|#IMPLIED|)
+ (consume-token input) :IMPLIED)
+ ((eq cat :|#FIXED|)
+ (consume-token input)
+ (p/S input)
+ (list :FIXED (p/att-value input)))
+ ((or (eq cat :\') (eq cat :\"))
+ (list :DEFAULT (p/att-value input)))
+ (t
+ (wf-error input "p/default-decl: ~S ~S." cat sem)) )))
+;;;;
+
+;; [70] EntityDecl ::= GEDecl | PEDecl
+;; [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
+;; [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
+;; [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
+;; [74] PEDef ::= EntityValue | ExternalID
+;; [75] ExternalID ::= 'SYSTEM' S SystemLiteral
+;; | 'PUBLIC' S PubidLiteral S SystemLiteral
+;; [76] NDataDecl ::= S 'NDATA' S Name /* VC: Notation Declared */
+
+(defun p/entity-decl (input)
+ (let (name def kind)
+ (expect input :|<!ENTITY|)
+ (p/S input)
+ (cond ((eq (peek-token input) :%)
+ (setf kind :parameter)
+ (consume-token input)
+ (p/S input))
+ (t
+ (setf kind :general)))
+ (setf name (p/name input))
+ (p/S input)
+ (setf def (p/entity-def input kind))
+ (define-entity input name kind def)
+ (p/S? input)
+ (expect input :\>)))
+
+(defun report-entity (h kind name def)
+ (etypecase def
+ (external-entdef
+ (let ((extid (entdef-extid def))
+ (ndata (entdef-ndata def)))
+ (if ndata
+ (sax:unparsed-entity-declaration h
+ name
+ (extid-public extid)
+ (uri-rod (extid-system extid))
+ ndata)
+ (sax:external-entity-declaration h
+ kind
+ name
+ (extid-public extid)
+ (uri-rod (extid-system extid))))))
+ (internal-entdef
+ (sax:internal-entity-declaration h kind name (entdef-value def)))))
+
+(defun p/entity-def (input kind)
+ (multiple-value-bind (cat sem) (peek-token input)
+ (cond ((member cat '(:\" :\'))
+ (make-internal-entdef (p/entity-value input)))
+ ((and (eq cat :nmtoken)
+ (or (rod= sem '#.(string-rod "SYSTEM"))
+ (rod= sem '#.(string-rod "PUBLIC"))))
+ (let (extid ndata)
+ (setf extid (p/external-id input nil))
+ (when (eq kind :general) ;NDATA allowed at all?
+ (cond ((eq (peek-token input) :S)
+ (p/S? input)
+ (when (and (eq (peek-token input) :nmtoken)
+ (rod= (nth-value 1 (peek-token input))
+ '#.(string-rod "NDATA")))
+ (consume-token input)
+ (p/S input)
+ (setf ndata (p/nmtoken input))
+ (when *validate*
+ (push ndata (referenced-notations *ctx*)))))))
+ (make-external-entdef extid ndata)))
+ (t
+ (wf-error input "p/entity-def: ~S / ~S." cat sem)) )))
+
+(defun p/entity-value (input)
+ (let ((delim (if (eq (read-token input) :\") #/\" #/\')))
+ (read-att-value input
+ (car (zstream-input-stack input))
+ :ENT
+ nil
+ delim)))
+
+(defun p/att-value (input)
+ (let ((delim (if (eq (read-token input) :\") #/\" #/\')))
+ (read-att-value input
+ (car (zstream-input-stack input))
+ :ATT
+ t
+ delim)))
+
+(defun p/external-id (input &optional (public-only-ok-p nil))
+ ;; xxx public-only-ok-p
+ (multiple-value-bind (cat sem) (read-token input)
+ (cond ((and (eq cat :nmtoken) (rod= sem '#.(string-rod "SYSTEM")))
+ (p/S input)
+ (make-extid nil (p/system-literal input)))
+ ((and (eq cat :nmtoken) (rod= sem '#.(string-rod "PUBLIC")))
+ (let (pub sys)
+ (p/S input)
+ (setf pub (p/pubid-literal input))
+ (when (eq (peek-token input) :S)
+ (p/S input)
+ (when (member (peek-token input) '(:\" :\'))
+ (setf sys (p/system-literal input))))
+ (when (and (not public-only-ok-p)
+ (null sys))
+ (wf-error input "System identifier needed for this PUBLIC external identifier."))
+ (make-extid pub sys)))
+ (t
+ (wf-error input "Expected external-id: ~S / ~S." cat sem)))))
+
+
+;; [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
+;; [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
+;; [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9]
+;; | [-'()+,./:=?;!*#@$_%]
+
+(defun p/id (input)
+ (multiple-value-bind (cat) (read-token input)
+ (cond ((member cat '(:\" :\'))
+ (let ((delim (if (eq cat :\") #/\" #/\')))
+ (with-rune-collector (collect)
+ (loop
+ (let ((c (read-rune (car (zstream-input-stack input)))))
+ (cond ((eq c :eof)
+ (eox input "EOF in system literal."))
+ ((rune= c delim)
+ (return))
+ (t
+ (collect c))))))))
+ (t
+ (wf-error input "Expect either \" or \'.")))))
+
+;; it is important to cache the orginal URI rod, since the re-serialized
+;; uri-string can be different from the one parsed originally.
+(defun uri-rod (uri)
+ (if uri
+ (or (getf (puri:uri-plist uri) 'original-rod)
+ (rod (puri:render-uri uri nil)))
+ nil))
+
+(defun safe-parse-uri (str)
+ ;; puri doesn't like strings starting with file:///, although that is a very
+ ;; common is practise. Cut it away, we don't distinguish between scheme
+ ;; :FILE and NIL anway.
+ (when (eql (search "file://" str) 0)
+ (setf str (subseq str (length "file://"))))
+ (puri:parse-uri (coerce str 'simple-string)))
+
+(defun p/system-literal (input)
+ (let* ((rod (p/id input))
+ (result (safe-parse-uri (rod-string rod))))
+ (setf (getf (puri:uri-plist result) 'original-rod) rod)
+ result))
+
+(defun p/pubid-literal (input)
+ (let ((result (p/id input)))
+ (unless (every #'pubid-char-p result)
+ (wf-error input "Illegal pubid: ~S." (rod-string result)))
+ result))
+
+
+;;;;
+
+(defun p/element-decl (input)
+ (let (name content)
+ (expect input :|<!ELEMENT|)
+ (p/S input)
+ (setf name (p/nmtoken input))
+ (p/S input)
+ (setf content (normalize-mixed-cspec (p/cspec input)))
+ (unless (legal-content-model-p content *validate*)
+ (wf-error input "Malformed or invalid content model: ~S." (mu content)))
+ (p/S? input)
+ (expect input :\>)
+ (when *validate*
+ (define-element (dtd *ctx*) name content))
+ (list :element name content)))
+
+(defun maybe-compile-cspec (e)
+ (or (elmdef-compiled-cspec e)
+ (setf (elmdef-compiled-cspec e)
+ (let ((cspec (elmdef-content e)))
+ (unless cspec
+ (validity-error "(03) Element Valid: no definition for ~A"
+ (rod-string (elmdef-name e))))
+ (multiple-value-call #'cons
+ (compile-cspec cspec (standalone-check-necessary-p e)))))))
+
+(defun make-root-model (name)
+ (cons (lambda (actual-name)
+ (if (rod= actual-name name)
+ (constantly :dummy)
+ nil))
+ (constantly t)))
+
+;;; content spec validation:
+;;;
+;;; Given a `contentspec', COMPILE-CSPEC returns as multiple values two
+;;; functions A and B of one argument to be called for every
+;;; A. child element
+;;; B. text child node
+;;;
+;;; Function A will be called with
+;;; - the element name rod as its argument. If that element may appear
+;;; at the current position, a new function to be called for the next
+;;; child is returned. Otherwise NIL is returned.
+;;; - argument NIL at the end of the element, it must then return T or NIL
+;;; to indicate whether the end tag is valid.
+;;;
+;;; Function B will be called with the character data rod as its argument, it
+;;; returns a boolean indicating whether this text node is allowed.
+;;;
+;;; That is, if one of the functions ever returns NIL, the node is
+;;; rejected as invalid.
+
+(defun cmodel-done (actual-value)
+ (null actual-value))
+
+(defun compile-cspec (cspec &optional standalone-check)
+ (cond
+ ((atom cspec)
+ (ecase cspec
+ (:EMPTY (values #'cmodel-done (constantly nil)))
+ (:PCDATA (values #'cmodel-done (constantly t)))
+ (:ANY
+ (values (labels ((doit (name) (if name #'doit t))) #'doit)
+ (constantly t)))))
+ ((and (eq (car cspec) '*)
+ (let ((subspec (second cspec)))
+ (and (eq (car subspec) 'or) (eq (cadr subspec) :PCDATA))))
+ (values (compile-mixed (second cspec))
+ (constantly t)))
+ (t
+ (values (compile-content-model cspec)
+ (lambda (rod)
+ (when standalone-check
+ (validity-error "(02) Standalone Document Declaration: whitespace"))
+ (every #'white-space-rune-p rod))))))
+
+(defun compile-mixed (cspec)
+ ;; das koennten wir theoretisch auch COMPILE-CONTENT-MODEL erledigen lassen
+ (let ((allowed-names (cddr cspec)))
+ (labels ((doit (actual-name)
+ (cond
+ ((null actual-name) t)
+ ((member actual-name allowed-names :test #'rod=) #'doit)
+ (t nil))))
+ #'doit)))
+
+(defun compile-content-model (cspec &optional (continuation #'cmodel-done))
+ (if (vectorp cspec)
+ (lambda (actual-name)
+ (if (and actual-name (rod= cspec actual-name))
+ continuation
+ nil))
+ (ecase (car cspec)
+ (and
+ (labels ((traverse (seq)
+ (compile-content-model (car seq)
+ (if (cdr seq)
+ (traverse (cdr seq))
+ continuation))))
+ (traverse (cdr cspec))))
+ (or
+ (let ((options (mapcar (rcurry #'compile-content-model continuation)
+ (cdr cspec))))
+ (lambda (actual-name)
+ (some (rcurry #'funcall actual-name) options))))
+ (?
+ (let ((maybe (compile-content-model (second cspec) continuation)))
+ (lambda (actual-name)
+ (or (funcall maybe actual-name)
+ (funcall continuation actual-name)))))
+ (*
+ (let (maybe-continuation)
+ (labels ((recurse (actual-name)
+ (if (null actual-name)
+ (funcall continuation actual-name)
+ (or (funcall maybe-continuation actual-name)
+ (funcall continuation actual-name)))))
+ (setf maybe-continuation
+ (compile-content-model (second cspec) #'recurse))
+ #'recurse)))
+ (+
+ (let ((it (cadr cspec)))
+ (compile-content-model `(and ,it (* ,it)) continuation))))))
+
+(defun setp (list &key (test 'eql))
+ (equal list (remove-duplicates list :test test)))
+
+(defun legal-content-model-p (cspec &optional validate)
+ (or (eq cspec :PCDATA)
+ (eq cspec :ANY)
+ (eq cspec :EMPTY)
+ (and (consp cspec)
+ (eq (car cspec) '*)
+ (consp (cadr cspec))
+ (eq (car (cadr cspec)) 'or)
+ (eq (cadr (cadr cspec)) :PCDATA)
+ (every #'vectorp (cddr (cadr cspec)))
+ (if (and validate (not (setp (cddr (cadr cspec)) :test #'rod=)))
+ (validity-error "VC: No Duplicate Types (07)")
+ t))
+ (labels ((walk (x)
+ (cond ((member x '(:PCDATA :ANY :EMPTY))
+ nil)
+ ((atom x) t)
+ ((and (walk (car x))
+ (walk (cdr x)))))))
+ (walk cspec))))
+
+;; wir fahren besser, wenn wir machen:
+
+;; cspec ::= 'EMPTY' | 'ANY' | '#PCDATA'
+;; | Name
+;; | cs
+;; cs ::= '(' S? cspec ( S? '|' S? cspec)* S? ')' ('?' | '*' | '+')?
+;; und eine post factum analyse
+
+(defun p/cspec (input &optional recursivep)
+ (let ((term
+ (let ((names nil) op-cat op res stream)
+ (multiple-value-bind (cat sem) (peek-token input)
+ (cond ((eq cat :nmtoken)
+ (consume-token input)
+ (cond ((rod= sem '#.(string-rod "EMPTY"))
+ :EMPTY)
+ ((rod= sem '#.(string-rod "ANY"))
+ :ANY)
+ ((not recursivep)
+ (wf-error input "invalid content spec"))
+ (t
+ sem)))
+ ((eq cat :\#PCDATA)
+ (consume-token input)
+ :PCDATA)
+ ((eq cat :\()
+ (setf stream (car (zstream-input-stack input)))
+ (consume-token input)
+ (p/S? input)
+ (setq names (list (p/cspec input t)))
+ (p/S? input)
+ (cond ((member (peek-token input) '(:\| :\,))
+ (setf op-cat (peek-token input))
+ (setf op (if (eq op-cat :\,) 'and 'or))
+ (while (eq (peek-token input) op-cat)
+ (consume-token input)
+ (p/S? input)
+ (push (p/cspec input t) names)
+ (p/S? input))
+ (setf res (cons op (reverse names))))
+ (t
+ (setf res (cons 'and names))))
+ (p/S? input)
+ (expect input :\))
+ (when *validate*
+ (unless (eq stream (car (zstream-input-stack input)))
+ (validity-error "(06) Proper Group/PE Nesting")))
+ res)
+ (t
+ (wf-error input "p/cspec - ~s / ~s" cat sem)))))))
+ (cond ((eq (peek-token input) :?) (consume-token input) (list '? term))
+ ((eq (peek-token input) :+) (consume-token input) (list '+ term))
+ ((eq (peek-token input) :*) (consume-token input) (list '* term))
+ (t
+ term))))
+
+(defun normalize-mixed-cspec (cspec)
+ ;; der Parser oben funktioniert huebsch fuer die children-Regel, aber
+ ;; fuer Mixed ist das Ergebnis nicht praktisch, denn dort wollen wir
+ ;; eigentlich auf eine Liste von Namen in einheitlichem Format hinaus.
+ ;; Dazu normalisieren wir einfach in eine der beiden folgenden Formen:
+ ;; (* (or :PCDATA ...rods...)) -- und zwar exakt so!
+ ;; :PCDATA -- sonst ganz trivial
+ (flet ((trivialp (c)
+ (and (consp c)
+ (and (eq (car c) 'and)
+ (eq (cadr c) :PCDATA)
+ (null (cddr c))))))
+ (if (or (trivialp cspec) ;(and PCDATA)
+ (and (consp cspec) ;(* (and PCDATA))
+ (and (eq (car cspec) '*)
+ (null (cddr cspec))
+ (trivialp (cadr cspec)))))
+ :PCDATA
+ cspec)))
+
+;; [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
+
+
+;; [52] AttlistDecl ::= '<!ATTLIST' S Name AttDefs S? '>'
+;; [52] AttlistDecl ::= '<!ATTLIST' S Name S? '>'
+;; [53] AttDefs ::= S Name S AttType S DefaultDecl AttDefs
+;; [53] AttDefs ::=
+
+(defun p/notation-decl (input)
+ (let (name id)
+ (expect input :|<!NOTATION|)
+ (p/S input)
+ (setf name (p/name input))
+ (p/S input)
+ (setf id (p/external-id input t))
+ (p/S? input)
+ (expect input :\>)
+ (sax:notation-declaration (handler *ctx*)
+ name
+ (if (extid-public id)
+ (normalize-public-id (extid-public id))
+ nil)
+ (uri-rod (extid-system id)))
+ (when (and sax:*namespace-processing* (find #/: name))
+ (wf-error input "colon in notation name"))
+ (when *validate*
+ (define-notation (dtd *ctx*) name id))
+ (list :notation-decl name id)))
+
+(defun normalize-public-id (rod)
+ (with-rune-collector (collect)
+ (let ((gimme-20 nil)
+ (anything-seen-p nil))
+ (map nil (lambda (c)
+ (cond
+ ((or (rune= c #/u+0009)
+ (rune= c #/u+000A)
+ (rune= c #/u+000D)
+ (rune= c #/u+0020))
+ (setf gimme-20 t))
+ (t
+ (when (and anything-seen-p gimme-20)
+ (collect #/u+0020))
+ (setf gimme-20 nil)
+ (setf anything-seen-p t)
+ (collect c))))
+ rod))))
+
+;;;
+
+(defun p/conditional-sect (input)
+ (expect input :<!\[ )
+ (let ((stream (car (zstream-input-stack input))))
+ (p/S? input)
+ (multiple-value-bind (cat sem) (read-token input)
+ (cond ((and (eq cat :nmtoken)
+ (rod= sem '#.(string-rod "INCLUDE")))
+ (p/include-sect input stream))
+ ((and (eq cat :nmtoken)
+ (rod= sem '#.(string-rod "IGNORE")))
+ (p/ignore-sect input stream))
+ (t
+ (wf-error input "Expected INCLUDE or IGNORE after \"<![\"."))))))
+
+(defun p/cond-expect (input cat initial-stream)
+ (expect input cat)
+ (when *validate*
+ (unless (eq (car (zstream-input-stack input)) initial-stream)
+ (validity-error "(21) Proper Conditional Section/PE Nesting"))))
+
+(defun p/include-sect (input initial-stream)
+ ;; <![INCLUDE is already read.
+ (p/S? input)
+ (p/cond-expect input :\[ initial-stream)
+ (p/ext-subset-decl input)
+ (p/cond-expect input :\] initial-stream)
+ (p/cond-expect input :\] initial-stream)
+ (p/cond-expect input :\> initial-stream))
+
+(defun p/ignore-sect (input initial-stream)
+ ;; <![IGNORE is already read.
+ ;; XXX Is VC 21 being checked for nested sections?
+ (p/S? input)
+ (p/cond-expect input :\[ initial-stream)
+ (let ((input (car (zstream-input-stack input))))
+ (let ((level 0))
+ (do ((c1 (read-rune input) (read-rune input))
+ (c2 #/U+0000 c1)
+ (c3 #/U+0000 c2))
+ ((= level -1))
+ (declare (type fixnum level))
+ (cond ((eq c1 :eof)
+ (eox input "EOF in <![IGNORE ... >")))
+ (cond ((and (rune= c3 #/<) (rune= c2 #/!) (rune= c1 #/\[))
+ (incf level)))
+ (cond ((and (rune= c3 #/\]) (rune= c2 #/\]) (rune= c1 #/>))
+ (decf level))) )))
+ (unless (eq (car (zstream-input-stack input)) initial-stream)
+ (validity-error "(21) Proper Conditional Section/PE Nesting")))
+
+(defun p/ext-subset-decl (input)
+ ;; ( markupdecl | conditionalSect | S )*
+ (loop
+ (case (let ((*expand-pe-p* nil)) (peek-token input))
+ (:|<![| (let ((*expand-pe-p* t)) (p/conditional-sect input)))
+ (:S (consume-token input))
+ (:eof (return))
+ ((:|<!ELEMENT| :|<!ATTLIST| :|<!ENTITY| :|<!NOTATION| :PI :COMMENT)
+ (let ((*expand-pe-p* t)
+ (*external-subset-p* t))
+ (p/markup-decl input)))
+ ((:PE-REFERENCE)
+ (let ((name (nth-value 1 (read-token input))))
+ (recurse-on-entity input name :parameter
+ (lambda (input)
+ (etypecase (checked-get-entdef name :parameter)
+ (external-entdef
+ (p/ext-subset input))
+ (internal-entdef
+ (p/ext-subset-decl input)))
+ (unless (eq :eof (peek-token input))
+ (wf-error input "Trailing garbage."))))))
+ (otherwise (return)))) )
+
+(defun p/markup-decl (input)
+ (peek-token input)
+ (let ((stream (car (zstream-input-stack input))))
+ (multiple-value-prog1
+ (p/markup-decl-unsafe input)
+ (when *validate*
+ (unless (eq stream (car (zstream-input-stack input)))
+ (validity-error "(01) Proper Declaration/PE Nesting"))))))
+
+(defun p/markup-decl-unsafe (input)
+ ;; markupdecl ::= elementdecl | AttlistDecl /* VC: Proper Declaration/PE Nesting */
+ ;; | EntityDecl | NotationDecl
+ ;; | PI | Comment /* WFC: PEs in Internal Subset */
+ (let ((token (peek-token input))
+ (*expand-pe-p* (and *expand-pe-p* *external-subset-p*)))
+ (case token
+ (:|<!ELEMENT| (p/element-decl input))
+ (:|<!ATTLIST| (p/attlist-decl input))
+ (:|<!ENTITY| (p/entity-decl input))
+ (:|<!NOTATION| (p/notation-decl input))
+ (:PI
+ (let ((sem (nth-value 1 (read-token input))))
+ (sax:processing-instruction (handler *ctx*) (car sem) (cdr sem))))
+ (:COMMENT (consume-token input))
+ (otherwise
+ (wf-error input "p/markup-decl ~S" (peek-token input))))))
+
+(defun setup-encoding (input xml-header)
+ (when (xml-header-encoding xml-header)
+ (let ((enc (find-encoding (xml-header-encoding xml-header))))
+ (cond (enc
+ (setf (xstream-encoding (car (zstream-input-stack input)))
+ enc))
+ (t
+ (warn "There is no such encoding: ~S." (xml-header-encoding xml-header)))))))
+
+(defun set-full-speed (input)
+ (let ((xstream (car (zstream-input-stack input))))
+ (when xstream
+ (set-to-full-speed xstream))))
+
+(defun p/ext-subset (input)
+ (cond ((eq (peek-token input) :xml-decl)
+ (let ((hd (parse-text-decl (cdr (nth-value 1 (peek-token input))))))
+ (setup-encoding input hd))
+ (consume-token input)))
+ (set-full-speed input)
+ (p/ext-subset-decl input)
+ (unless (eq (peek-token input) :eof)
+ (wf-error input "Trailing garbage - ~S." (peek-token input))))
+
+(defvar *catalog* nil)
+
+(defun extid-using-catalog (extid)
+ (if *catalog*
+ (let ((sysid
+ (resolve-extid (extid-public extid)
+ (extid-system extid)
+ *catalog*)))
+ (if sysid
+ (make-extid nil sysid)
+ extid))
+ extid))
+
+(defun p/doctype-decl (input &optional dtd-extid)
+ (let ()
+ (let ((*expand-pe-p* nil)
+ name extid)
+ (expect input :|<!DOCTYPE|)
+ (p/S input)
+ (setq name (p/nmtoken input))
+ (when *validate*
+ (setf (model-stack *ctx*) (list (make-root-model name))))
+ (when (eq (peek-token input) :S)
+ (p/S input)
+ (unless (or (eq (peek-token input) :\[ )
+ (eq (peek-token input) :\> ))
+ (setf extid (p/external-id input t))))
+ (when dtd-extid
+ (setf extid dtd-extid))
+ (p/S? input)
+ (sax:start-dtd (handler *ctx*)
+ name
+ (and extid (extid-public extid))
+ (and extid (uri-rod (extid-system extid))))
+ (when (eq (peek-token input) :\[ )
+ (when (disallow-internal-subset *ctx*)
+ (wf-error input "document includes an internal subset"))
+ (ensure-dtd)
+ (consume-token input)
+ (sax:start-internal-subset (handler *ctx*))
+ (while (progn (p/S? input)
+ (not (eq (peek-token input) :\] )))
+ (if (eq (peek-token input) :PE-REFERENCE)
+ (let ((name (nth-value 1 (read-token input))))
+ (recurse-on-entity input name :parameter
+ (lambda (input)
+ (etypecase (checked-get-entdef name :parameter)
+ (external-entdef
+ (p/ext-subset input))
+ (internal-entdef
+ (p/ext-subset-decl input)))
+ (unless (eq :eof (peek-token input))
+ (wf-error input "Trailing garbage.")))))
+ (let ((*expand-pe-p* t))
+ (p/markup-decl input))))
+ (consume-token input)
+ (sax:end-internal-subset (handler *ctx*))
+ (p/S? input))
+ (expect input :>)
+ (when extid
+ (let* ((effective-extid
+ (extid-using-catalog (absolute-extid input extid)))
+ (sysid (extid-system effective-extid))
+ (fresh-dtd-p (null (dtd *ctx*)))
+ (cached-dtd
+ (and fresh-dtd-p
+ (not (standalone-p *ctx*))
+ (getdtd sysid *dtd-cache*))))
+ (cond
+ (cached-dtd
+ (setf (dtd *ctx*) cached-dtd)
+ (report-cached-dtd cached-dtd))
+ (t
+ (let* ((xi2 (xstream-open-extid effective-extid))
+ (zi2 (make-zstream :input-stack (list xi2))))
+ (ensure-dtd)
+ (p/ext-subset zi2)
+ (when (and fresh-dtd-p
+ *cache-all-dtds*
+ *validate*
+ (not (standalone-p *ctx*)))
+ (setf (getdtd sysid *dtd-cache*) (dtd *ctx*))))))))
+ (sax:end-dtd (handler *ctx*))
+ (let ((dtd (dtd *ctx*)))
+ (sax:entity-resolver
+ (handler *ctx*)
+ (lambda (name handler) (resolve-entity name handler dtd)))
+ (sax::dtd (handler *ctx*) dtd))
+ (list :DOCTYPE name extid))))
+
+(defun report-cached-dtd (dtd)
+ (maphash (lambda (k v)
+ (report-entity (handler *ctx*) :general k (cdr v)))
+ (dtd-gentities dtd))
+ (maphash (lambda (k v)
+ (report-entity (handler *ctx*) :parameter k (cdr v)))
+ (dtd-pentities dtd))
+ (maphash (lambda (k v)
+ (sax:notation-declaration
+ (handler *ctx*)
+ k
+ (if (extid-public v)
+ (normalize-public-id (extid-public v))
+ nil)
+ (uri-rod (extid-system v))))
+ (dtd-notations dtd)))
+
+(defun p/misc*-2 (input)
+ ;; Misc*
+ (while (member (peek-token input) '(:COMMENT :PI :S))
+ (case (peek-token input)
+ (:COMMENT
+ (sax:comment (handler *ctx*) (nth-value 1 (peek-token input))))
+ (:PI
+ (sax:processing-instruction
+ (handler *ctx*)
+ (car (nth-value 1 (peek-token input)))
+ (cdr (nth-value 1 (peek-token input))))))
+ (consume-token input)))
+
+(defun p/document
+ (input handler
+ &key validate dtd root entity-resolver disallow-internal-subset
+ (recode t))
+ ;; check types of user-supplied arguments for better error messages:
+ (check-type validate boolean)
+ (check-type recode boolean)
+ (check-type dtd (or null extid))
+ (check-type root (or null rod))
+ (check-type entity-resolver (or null function symbol))
+ (check-type disallow-internal-subset boolean)
+ #+rune-is-integer
+ (when recode
+ (setf handler (make-recoder handler #'rod-to-utf8-string)))
+ (let ((*ctx*
+ (make-context :handler handler
+ :main-zstream input
+ :entity-resolver entity-resolver
+ :disallow-internal-subset disallow-internal-subset))
+ (*validate* validate))
+ (sax:start-document handler)
+ ;; document ::= XMLDecl? Misc* (doctypedecl Misc*)? element Misc*
+ ;; Misc ::= Comment | PI | S
+ ;; xmldecl::='<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
+ ;; sddecl::= S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"'))
+ ;;
+ ;; we will use the attribute-value parser for the xml decl.
+ (let ((*data-behaviour* :DTD))
+ ;; optional XMLDecl?
+ (cond ((eq (peek-token input) :xml-decl)
+ (let ((hd (parse-xml-decl (cdr (nth-value 1 (peek-token input))))))
+ (setf (standalone-p *ctx*) (eq (xml-header-standalone-p hd) :yes))
+ (setup-encoding input hd))
+ (read-token input)))
+ (set-full-speed input)
+ ;; Misc*
+ (p/misc*-2 input)
+ ;; (doctypedecl Misc*)?
+ (cond
+ ((eq (peek-token input) :<!DOCTYPE)
+ (p/doctype-decl input dtd)
+ (p/misc*-2 input))
+ (dtd
+ (let ((dummy (string->xstream "<!DOCTYPE dummy>")))
+ (setf (xstream-name dummy)
+ (make-stream-name
+ :entity-name "dummy doctype"
+ :entity-kind :main
+ :uri (zstream-base-sysid input)))
+ (p/doctype-decl (make-zstream :input-stack (list dummy)) dtd)))
+ ((and validate (not dtd))
+ (validity-error "invalid document: no doctype")))
+ (ensure-dtd)
+ ;; Override expected root element if asked to
+ (when root
+ (setf (model-stack *ctx*) (list (make-root-model root))))
+ ;; element
+ (let ((*data-behaviour* :DOC))
+ (p/element input))
+ ;; optional Misc*
+ (p/misc*-2 input)
+ (unless (eq (peek-token input) :eof)
+ (wf-error input "Garbage at end of document."))
+ (when *validate*
+ (maphash (lambda (k v)
+ (unless v
+ (validity-error "(11) IDREF: ~S not defined" (rod-string k))))
+ (id-table *ctx*))
+
+ (dolist (name (referenced-notations *ctx*))
+ (unless (find-notation name (dtd *ctx*))
+ (validity-error "(23) Notation Declared: ~S" (rod-string name)))))
+ (sax:end-document handler))))
+
+(defun p/element (input)
+ (multiple-value-bind (cat sem) (read-token input)
+ (case cat
+ ((:stag :ztag))
+ (:eof (eox input))
+ (t (wf-error input "element expected")))
+ (destructuring-bind (&optional name &rest raw-attrs) sem
+ (validate-start-element *ctx* name)
+ (let* ((attrs
+ (process-attributes *ctx* name (build-attribute-list raw-attrs)))
+ (*namespace-bindings* *namespace-bindings*)
+ new-namespaces)
+ (when sax:*namespace-processing*
+ (setf new-namespaces (declare-namespaces attrs))
+ (mapc #'set-attribute-namespace attrs))
+ (multiple-value-bind (uri prefix local-name)
+ (if sax:*namespace-processing*
+ (decode-qname name)
+ (values nil nil nil))
+ (declare (ignore prefix))
+ (check-attribute-uniqueness attrs)
+ (unless (or sax:*include-xmlns-attributes*
+ (null sax:*namespace-processing*))
+ (setf attrs
+ (remove-if (compose #'xmlns-attr-p #'sax:attribute-qname)
+ attrs)))
+ (cond
+ ((eq cat :ztag)
+ (sax:start-element (handler *ctx*) uri local-name name attrs)
+ (sax:end-element (handler *ctx*) uri local-name name))
+
+ ((eq cat :stag)
+ (sax:start-element (handler *ctx*) uri local-name name attrs)
+ (p/content input)
+ (multiple-value-bind (cat2 sem2) (read-token input)
+ (unless (and (eq cat2 :etag)
+ (eq (car sem2) name))
+ (wf-error input "Bad nesting. ~S / ~S"
+ (mu name)
+ (mu (cons cat2 sem2))))
+ (when (cdr sem2)
+ (wf-error input "no attributes allowed in end tag")))
+ (sax:end-element (handler *ctx*) uri local-name name))
+
+ (t
+ (wf-error input "Expecting element, got ~S." cat))))
+ (undeclare-namespaces new-namespaces))
+ (validate-end-element *ctx* name))))
+
+(defun p/content (input)
+ ;; [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
+ (multiple-value-bind (cat sem) (peek-token input)
+ (case cat
+ ((:stag :ztag)
+ (p/element input)
+ (p/content input))
+ ((:CDATA)
+ (consume-token input)
+ (when (search #"]]>" sem)
+ (wf-error input "']]>' not allowed in CharData"))
+ (validate-characters *ctx* sem)
+ (sax:characters (handler *ctx*) sem)
+ (p/content input))
+ ((:ENTITY-REF)
+ (let ((name sem))
+ (consume-token input)
+ (append
+ (recurse-on-entity input name :general
+ (lambda (input)
+ (prog1
+ (etypecase (checked-get-entdef name :general)
+ (internal-entdef (p/content input))
+ (external-entdef (p/ext-parsed-ent input)))
+ (unless (eq (peek-token input) :eof)
+ (wf-error input "Trailing garbage. - ~S"
+ (peek-token input))))))
+ (p/content input))))
+ ((:<!\[)
+ (consume-token input)
+ (cons
+ (let ((input (car (zstream-input-stack input))))
+ (unless (and (rune= #/C (read-rune input))
+ (rune= #/D (read-rune input))
+ (rune= #/A (read-rune input))
+ (rune= #/T (read-rune input))
+ (rune= #/A (read-rune input))
+ (rune= #/\[ (read-rune input)))
+ (wf-error input "After '<![', 'CDATA[' is expected."))
+ (validate-characters *ctx* #"hack") ;anything other than whitespace
+ (sax:start-cdata (handler *ctx*))
+ (sax:characters (handler *ctx*) (read-cdata-sect input))
+ (sax:end-cdata (handler *ctx*)))
+ (p/content input)))
+ ((:PI)
+ (consume-token input)
+ (sax:processing-instruction (handler *ctx*) (car sem) (cdr sem))
+ (p/content input))
+ ((:COMMENT)
+ (consume-token input)
+ (sax:comment (handler *ctx*) sem)
+ (p/content input))
+ (otherwise
+ nil))))
+
+;; [78] extParsedEnt ::= TextDecl? contentw
+;; [79] extPE ::= TextDecl? extSubsetDecl
+
+(defstruct xml-header
+ version
+ encoding
+ (standalone-p nil))
+
+(defun p/ext-parsed-ent (input)
+ ;; [78] extParsedEnt ::= '<?xml' VersionInfo? EncodingDecl S? '?>' content
+ (when (eq (peek-token input) :xml-decl)
+ (let ((hd (parse-text-decl (cdr (nth-value 1 (peek-token input))))))
+ (setup-encoding input hd))
+ (consume-token input))
+ (set-full-speed input)
+ (p/content input))
+
+(defun parse-xml-decl (content)
+ (let* ((res (make-xml-header))
+ (i (make-rod-xstream content))
+ (z (make-zstream :input-stack (list i)))
+ (atts (read-attribute-list z i t)))
+ (unless (eq (peek-rune i) :eof)
+ (wf-error i "Garbage at end of XMLDecl."))
+ ;; versioninfo muss da sein
+ ;; dann ? encodingdecl
+ ;; dann ? sddecl
+ ;; dann ende
+ (unless (eq (caar atts) (intern-name '#.(string-rod "version")))
+ (wf-error i "XMLDecl needs version."))
+ (unless (and (>= (length (cdar atts)) 1)
+ (every (lambda (x)
+ (or (rune<= #/a x #/z)
+ (rune<= #/A x #/Z)
+ (rune<= #/0 x #/9)
+ (rune= x #/_)
+ (rune= x #/.)
+ (rune= x #/:)
+ (rune= x #/-)))
+ (cdar atts)))
+ (wf-error i"Bad XML version number: ~S." (rod-string (cdar atts))))
+ (setf (xml-header-version res) (rod-string (cdar atts)))
+ (pop atts)
+ (when (eq (caar atts) (intern-name '#.(string-rod "encoding")))
+ (unless (and (>= (length (cdar atts)) 1)
+ (every (lambda (x)
+ (or (rune<= #/a x #/z)
+ (rune<= #/A x #/Z)
+ (rune<= #/0 x #/9)
+ (rune= x #/_)
+ (rune= x #/.)
+ (rune= x #/-)))
+ (cdar atts))
+ ((lambda (x)
+ (or (rune<= #/a x #/z)
+ (rune<= #/A x #/Z)))
+ (aref (cdar atts) 0)))
+ (wf-error i "Bad XML encoding name: ~S." (rod-string (cdar atts))))
+ (setf (xml-header-encoding res) (rod-string (cdar atts)))
+ (pop atts))
+ (when (eq (caar atts) (intern-name '#.(string-rod "standalone")))
+ (unless (or (rod= (cdar atts) '#.(string-rod "yes"))
+ (rod= (cdar atts) '#.(string-rod "no")))
+ (wf-error i "XMLDecl's 'standalone' attribute must be exactly \"yes\" or \"no\" and not ~S."
+ (rod-string (cdar atts))))
+ (setf (xml-header-standalone-p res)
+ (if (rod-equal '#.(string-rod "yes") (cdar atts))
+ :yes
+ :no))
+ (pop atts))
+ (when atts
+ (wf-error i "Garbage in XMLDecl: ~A" (rod-string content)))
+ res))
+
+(defun parse-text-decl (content)
+ (let* ((res (make-xml-header))
+ (i (make-rod-xstream content))
+ (z (make-zstream :input-stack (list i)))
+ (atts (read-attribute-list z i t)))
+ (unless (eq (peek-rune i) :eof)
+ (wf-error i "Garbage at end of TextDecl"))
+ ;; versioninfo optional
+ ;; encodingdecl muss da sein
+ ;; dann ende
+ (when (eq (caar atts) (intern-name '#.(string-rod "version")))
+ (unless (and (>= (length (cdar atts)) 1)
+ (every (lambda (x)
+ (or (rune<= #/a x #/z)
+ (rune<= #/A x #/Z)
+ (rune<= #/0 x #/9)
+ (rune= x #/_)
+ (rune= x #/.)
+ (rune= x #/:)
+ (rune= x #/-)))
+ (cdar atts)))
+ (wf-error i "Bad XML version number: ~S." (rod-string (cdar atts))))
+ (setf (xml-header-version res) (rod-string (cdar atts)))
+ (pop atts))
+ (unless (eq (caar atts) (intern-name '#.(string-rod "encoding")))
+ (wf-error i "TextDecl needs encoding."))
+ (unless (and (>= (length (cdar atts)) 1)
+ (every (lambda (x)
+ (or (rune<= #/a x #/z)
+ (rune<= #/A x #/Z)
+ (rune<= #/0 x #/9)
+ (rune= x #/_)
+ (rune= x #/.)
+ (rune= x #/-)))
+ (cdar atts))
+ ((lambda (x)
+ (or (rune<= #/a x #/z)
+ (rune<= #/A x #/Z)
+ (rune<= #/0 x #/9)))
+ (aref (cdar atts) 0)))
+ (wf-error i "Bad XML encoding name: ~S." (rod-string (cdar atts))))
+ (setf (xml-header-encoding res) (rod-string (cdar atts)))
+ (pop atts)
+ (when atts
+ (wf-error i "Garbage in TextDecl: ~A" (rod-string content)))
+ res))
+
+;;;; ---------------------------------------------------------------------------
+;;;; mu
+;;;;
+
+(defun mu (x)
+ (cond ((stringp x) x)
+ ((vectorp x) (rod-string x))
+ ((consp x)
+ (cons (mu (car x)) (mu (cdr x))))
+ (x)))
+
+;;;; ---------------------------------------------------------------------------
+;;;; User interface ;;;;
+
+(defun specific-or (component &optional (alternative nil))
+ (if (eq component :unspecific)
+ alternative
+ component))
+
+(defun string-or (str &optional (alternative nil))
+ (if (zerop (length str))
+ alternative
+ str))
+
+(defun make-uri (&rest initargs &key path query &allow-other-keys)
+ (apply #'make-instance
+ 'puri:uri
+ :path (and path (escape-path path))
+ :query (and query (escape-query query))
+ initargs))
+
+(defun escape-path (list)
+ (puri::render-parsed-path list t))
+
+(defun escape-query (pairs)
+ (flet ((escape (str)
+ (puri::encode-escaped-encoding str puri::*reserved-characters* t)))
+ (let ((first t))
+ (with-output-to-string (s)
+ (dolist (pair pairs)
+ (if first
+ (setf first nil)
+ (write-char #\& s))
+ (write-string (escape (car pair)) s)
+ (write-char #\= s)
+ (write-string (escape (cdr pair)) s))))))
+
+(defun uri-parsed-query (uri)
+ (flet ((unescape (str)
+ (puri::decode-escaped-encoding str t puri::*reserved-characters*)))
+ (let ((str (puri:uri-query uri)))
+ (cond
+ (str
+ (let ((pairs '()))
+ (dolist (s (split-sequence-if (lambda (x) (eql x #\&)) str))
+ (destructuring-bind (name value)
+ (split-sequence-if (lambda (x) (eql x #\=)) s)
+ (push (cons (unescape name) (unescape value)) pairs)))
+ (reverse pairs)))
+ (t
+ nil)))))
+
+(defun query-value (name alist)
+ (cdr (assoc name alist :test #'equal)))
+
+(defun pathname-to-uri (pathname)
+ (let ((path
+ (append (pathname-directory pathname)
+ (list
+ (if (specific-or (pathname-type pathname))
+ (concatenate 'string
+ (pathname-name pathname)
+ "."
+ (pathname-type pathname))
+ (pathname-name pathname))))))
+ (if (eq (car path) :relative)
+ (make-uri :path path)
+ (make-uri :scheme :file
+ :host (concatenate 'string
+ (string-or (host-namestring pathname))
+ "+"
+ (specific-or (pathname-device pathname)))
+ :path path))))
+
+(defun parse-name.type (str)
+ (if str
+ (let ((i (position #\. str :from-end t)))
+ (if i
+ (values (subseq str 0 i) (subseq str (1+ i)))
+ (values str nil)))
+ (values nil nil)))
+
+(defun uri-to-pathname (uri)
+ (let ((scheme (puri:uri-scheme uri))
+ (path (puri:uri-parsed-path uri)))
+ (unless (member scheme '(nil :file))
+ (error 'xml-parse-error
+ :format-control "URI scheme ~S not supported"
+ :format-arguments (list scheme)))
+ (if (eq (car path) :relative)
+ (multiple-value-bind (name type)
+ (parse-name.type (car (last path)))
+ (make-pathname :directory (butlast path)
+ :name name
+ :type type))
+ (multiple-value-bind (name type)
+ (parse-name.type (car (last (cdr path))))
+ (destructuring-bind (host device)
+ (split-sequence-if (lambda (x) (eql x #\+))
+ (or (puri:uri-host uri) "+"))
+ (make-pathname :host (string-or host)
+ :device (string-or device)
+ :directory (cons :absolute (butlast (cdr path)))
+ :name name
+ :type type))))))
+
+(defun parse-xstream (xstream handler &rest args)
+ (let ((*ctx* nil))
+ (handler-case
+ (let ((zstream (make-zstream :input-stack (list xstream))))
+ (peek-rune xstream)
+ (with-scratch-pads ()
+ (apply #'p/document zstream handler args)))
+ (runes-encoding:encoding-error (c)
+ (wf-error xstream "~A" c)))))
+
+(defun parse-file (filename handler &rest args)
+ (with-open-xfile (input filename)
+ (setf (xstream-name input)
+ (make-stream-name
+ :entity-name "main document"
+ :entity-kind :main
+ :uri (pathname-to-uri filename)))
+ (apply #'parse-xstream input handler args)))
+
+(defun resolve-synonym-stream (stream)
+ (while (typep stream 'synonym-stream)
+ (setf stream (symbol-value (synonym-stream-symbol stream))))
+ stream)
+
+(defun safe-stream-sysid (stream)
+ (if (and (typep (resolve-synonym-stream stream) 'file-stream)
+ ;; ignore-errors, because sb-bsd-sockets creates instances of
+ ;; FILE-STREAMs that aren't
+ (ignore-errors (pathname stream)))
+ (pathname-to-uri (pathname stream))
+ nil))
+
+(defun parse-stream (stream handler &rest args)
+ (let ((xstream
+ (make-xstream
+ stream
+ :name (make-stream-name
+ :entity-name "main document"
+ :entity-kind :main
+ :uri (safe-stream-sysid stream))
+ :initial-speed 1)))
+ (apply #'parse-xstream xstream handler args)))
+
+(defun parse-dtd-file (filename &optional handler)
+ (with-open-file (s filename :element-type '(unsigned-byte 8))
+ (parse-dtd-stream s handler)))
+
+(defun parse-dtd-stream (stream &optional handler)
+ (let ((input (make-xstream stream)))
+ (setf (xstream-name input)
+ (make-stream-name
+ :entity-name "dtd"
+ :entity-kind :main
+ :uri (safe-stream-sysid stream)))
+ (let ((zstream (make-zstream :input-stack (list input)))
+ (*ctx* (make-context :handler handler))
+ (*validate* t)
+ (*data-behaviour* :DTD))
+ (with-scratch-pads ()
+ (ensure-dtd)
+ (peek-rune input)
+ (p/ext-subset zstream)
+ (dtd *ctx*)))))
+
+(defun parse-rod (string handler &rest args)
+ (apply #'parse-xstream (string->xstream string) handler args))
+
+(defun string->xstream (string)
+ (make-rod-xstream (string-rod string)))
+
+(defclass octet-input-stream
+ (trivial-gray-stream-mixin fundamental-binary-input-stream)
+ ((octets :initarg :octets)
+ (pos :initform 0)))
+
+(defmethod close ((stream octet-input-stream) &key abort)
+ (declare (ignore abort))
+ (open-stream-p stream))
+
+(defmethod stream-read-byte ((stream octet-input-stream))
+ (with-slots (octets pos) stream
+ (if (>= pos (length octets))
+ :eof
+ (prog1
+ (elt octets pos)
+ (incf pos)))))
+
+(defmethod stream-read-sequence
+ ((stream octet-input-stream) sequence start end &key &allow-other-keys)
+ (with-slots (octets pos) stream
+ (let* ((length (min (- end start) (- (length octets) pos)))
+ (end1 (+ start length))
+ (end2 (+ pos length)))
+ (replace sequence octets :start1 start :end1 end1 :start2 pos :end2 end2)
+ (setf pos end2)
+ end1)))
+
+(defun make-octet-input-stream (octets)
+ (make-instance 'octet-input-stream :octets octets))
+
+(defun parse-octets (octets handler &rest args)
+ (apply #'parse-stream (make-octet-input-stream octets) handler args))
+
+;;;;
+
+(defun zstream-push (new-xstream zstream)
+ (cond ((find-if (lambda (x)
+ (and (xstream-p x)
+ (eql (stream-name-entity-name (xstream-name x))
+ (stream-name-entity-name (xstream-name new-xstream)))
+ (eql (stream-name-entity-kind (xstream-name x))
+ (stream-name-entity-kind (xstream-name new-xstream)))))
+ (zstream-input-stack zstream))
+ (wf-error zstream "Infinite recursion.")))
+ (push new-xstream (zstream-input-stack zstream))
+ zstream)
+
+(defun recurse-on-entity (zstream name kind continuation &optional internalp)
+ (assert (not (zstream-token-category zstream)))
+ (call-with-entity-expansion-as-stream
+ zstream
+ (lambda (new-xstream)
+ (push :stop (zstream-input-stack zstream))
+ (zstream-push new-xstream zstream)
+ (prog1
+ (funcall continuation zstream)
+ (assert (eq (peek-token zstream) :eof))
+ (assert (eq (pop (zstream-input-stack zstream)) new-xstream))
+ (close-xstream new-xstream)
+ (assert (eq (pop (zstream-input-stack zstream)) :stop))
+ (setf (zstream-token-category zstream) nil)
+ '(consume-token zstream)) )
+ name
+ kind
+ internalp))
+
+#||
+(defmacro read-data-until* ((predicate input res res-start res-end) &body body)
+ ;; fast variant -- for now disabled for no apparent reason
+ ;; -> res, res-start, res-end
+ `(let* ((rptr (xstream-read-ptr ,input))
+ (p0 rptr)
+ (fptr (xstream-fill-ptr ,input))
+ (buf (xstream-buffer ,input))
+ ,res ,res-start ,res-end)
+ (declare (type fixnum rptr fptr p0)
+ (type (simple-array read-element (*)) buf))
+ (loop
+ (cond ((%= rptr fptr)
+ ;; underflow -- hmm inject the scratch-pad with what we
+ ;; read and continue, while using read-rune and collecting
+ ;; d.h. besser wäre hier auch while-reading zu benutzen.
+ (setf (xstream-read-ptr ,input) rptr)
+ (multiple-value-setq (,res ,res-start ,res-end)
+ (with-rune-collector/raw (collect)
+ (do ((i p0 (%+ i 1)))
+ ((%= i rptr))
+ (collect (%rune buf i)))
+ (let (c)
+ (loop
+ (cond ((%= rptr fptr)
+ (setf (xstream-read-ptr ,input) rptr)
+ (setf c (peek-rune input))
+ (cond ((eq c :eof)
+ (return)))
+ (setf rptr (xstream-read-ptr ,input)
+ fptr (xstream-fill-ptr ,input)
+ buf (xstream-buffer ,input)))
+ (t
+ (setf c (%rune buf rptr))))
+ (cond ((,predicate c)
+ ;; we stop
+ (setf (xstream-read-ptr ,input) rptr)
+ (return))
+ (t
+ ;; we continue
+ (collect c)
+ (setf rptr (%+ rptr 1))) )))))
+ (return))
+ ((,predicate (%rune buf rptr))
+ ;; we stop
+ (setf (xstream-read-ptr ,input) rptr)
+ (setf ,res buf ,res-start p0 ,res-end rptr)
+ (return) )
+ (t
+ we continue
+ (sf rptr (%+ rptr 1))) ))
+ , at body ))
+||#
+
+(defmacro read-data-until* ((predicate input res res-start res-end) &body body)
+ "Read data from `input' until `predicate' applied to the read char
+ turns true. Then execute `body' with `res', `res-start', `res-end'
+ bound to denote a subsequence (of RUNEs) containing the read portion.
+ The rune upon which `predicate' turned true is neither consumed from
+ the stream, nor included in `res'.
+
+ Keep the predicate short, this it may be included more than once into
+ the macro's expansion."
+ ;;
+ (let ((input-var (gensym))
+ (collect (gensym))
+ (c (gensym)))
+ `(LET ((,input-var ,input))
+ (MULTIPLE-VALUE-BIND (,res ,res-start ,res-end)
+ (WITH-RUNE-COLLECTOR/RAW (,collect)
+ (LOOP
+ (LET ((,c (PEEK-RUNE ,input-var)))
+ (COND ((EQ ,c :EOF)
+ ;; xxx error message
+ (RETURN))
+ ((FUNCALL ,predicate ,c)
+ (RETURN))
+ (t
+ (,collect ,c)
+ (CONSUME-RUNE ,input-var))))))
+ (LOCALLY
+ , at body)))))
+
+(defun read-name-token (input)
+ (read-data-until* ((lambda (rune)
+ (declare (type rune rune))
+ (not (name-rune-p rune)))
+ input
+ r rs re)
+ (intern-name r rs re)))
+
+(defun read-cdata (input)
+ (read-data-until* ((lambda (rune)
+ (declare (type rune rune))
+ (when (and (%rune< rune #/U+0020)
+ (not (or (%rune= rune #/U+0009)
+ (%rune= rune #/U+000a)
+ (%rune= rune #/U+000d))))
+ (wf-error input "code point invalid: ~A" rune))
+ (or (%rune= rune #/<) (%rune= rune #/&)))
+ input
+ source start end)
+ (locally
+ (declare (type (simple-array rune (*)) source)
+ (type ufixnum start)
+ (type ufixnum end)
+ (optimize (speed 3) (safety 0)))
+ (let ((res (make-array (%- end start) :element-type 'rune)))
+ (declare (type (simple-array rune (*)) res))
+ (let ((i (%- end start)))
+ (declare (type ufixnum i))
+ (loop
+ (setf i (- i 1))
+ (setf (%rune res i) (%rune source (the ufixnum (+ i start))))
+ (when (= i 0)
+ (return))))
+ res))))
+
+(defun internal-entity-expansion (name)
+ (let ((def (get-entity-definition name :general (dtd *ctx*))))
+ (unless def
+ (wf-error nil "Entity '~A' is not defined." (rod-string name)))
+ (unless (typep def 'internal-entdef)
+ (wf-error nil "Entity '~A' is not an internal entity." name))
+ (or (entdef-expansion def)
+ (setf (entdef-expansion def) (find-internal-entity-expansion name)))))
+
+(defun find-internal-entity-expansion (name)
+ (let ((zinput (make-zstream)))
+ (with-rune-collector-3 (collect)
+ (labels ((muffle (input)
+ (let (c)
+ (loop
+ (setf c (read-rune input))
+ (cond ((eq c :eof)
+ (return))
+ ((rune= c #/&)
+ (setf c (peek-rune input))
+ (cond ((eql c :eof)
+ (eox input))
+ ((rune= c #/#)
+ (let ((c (read-character-reference input)))
+ (%put-unicode-char c collect)))
+ (t
+ (unless (name-start-rune-p c)
+ (wf-error zinput "Expecting name after &."))
+ (let ((name (read-name-token input)))
+ (setf c (read-rune input))
+ (check-rune input c #/\;)
+ (recurse-on-entity
+ zinput name :general
+ (lambda (zinput)
+ (muffle (car (zstream-input-stack zinput)))))))))
+ ((rune= c #/<)
+ (wf-error zinput "unexpected #\/<"))
+ ((space-rune-p c)
+ (collect #/space))
+ ((not (data-rune-p c))
+ (wf-error zinput "illegal char: ~S." c))
+ (t
+ (collect c)))))))
+ (declare (dynamic-extent #'muffle))
+ (recurse-on-entity
+ zinput name :general
+ (lambda (zinput)
+ (muffle (car (zstream-input-stack zinput))))) ))))
+
+(defun resolve-entity (name handler dtd)
+ (let ((*validate* nil))
+ (if (get-entity-definition name :general dtd)
+ (let* ((*ctx* (make-context :handler handler :dtd dtd))
+ (input (make-zstream))
+ (*data-behaviour* :DOC))
+ (with-scratch-pads ()
+ (recurse-on-entity
+ input name :general
+ (lambda (input)
+ (prog1
+ (etypecase (checked-get-entdef name :general)
+ (internal-entdef (p/content input))
+ (external-entdef (p/ext-parsed-ent input)))
+ (unless (eq (peek-token input) :eof)
+ (wf-error input "Trailing garbage. - ~S"
+ (peek-token input))))))))
+ nil)))
+
+(defun read-att-value-2 (input)
+ (let ((delim (read-rune input)))
+ (when (eql delim :eof)
+ (eox input))
+ (unless (member delim '(#/\" #/\') :test #'eql)
+ (wf-error input
+ "Bad attribute value delimiter ~S, must be either #\\\" or #\\\'."
+ (rune-char delim)))
+ (with-rune-collector-4 (collect)
+ (loop
+ (let ((c (read-rune input)))
+ (cond ((eq c :eof)
+ (eox input "EOF"))
+ ((rune= c delim)
+ (return))
+ ((rune= c #/<)
+ (wf-error input "'<' not allowed in attribute values"))
+ ((rune= #/& c)
+ (multiple-value-bind (kind sem) (read-entity-like input)
+ (ecase kind
+ (:CHARACTER-REFERENCE
+ (%put-unicode-char sem collect))
+ (:ENTITY-REFERENCE
+ (let* ((exp (internal-entity-expansion sem))
+ (n (length exp)))
+ (declare (type (simple-array rune (*)) exp))
+ (do ((i 0 (%+ i 1)))
+ ((%= i n))
+ (collect (%rune exp i))))))))
+ ((space-rune-p c)
+ (collect #/u+0020))
+ (t
+ (collect c))))))))
+
+;;;;;;;;;;;;;;;;;
+
+;;; Namespace stuff
+
+;; We already know that name is part of a valid XML name, so all we
+;; have to check is that the first rune is a name-start-rune and that
+;; there is not colon in it.
+(defun nc-name-p (name)
+ (and (plusp (length name))
+ (name-start-rune-p (rune name 0))
+ (notany #'(lambda (rune) (rune= #/: rune)) name)))
+
+(defun split-qname (qname)
+ (declare (type runes:simple-rod qname))
+ (let ((pos (position #/: qname)))
+ (if pos
+ (let ((prefix (subseq qname 0 pos))
+ (local-name (subseq qname (1+ pos))))
+ (when (zerop pos)
+ (wf-error nil "empty namespace prefix"))
+ (if (nc-name-p local-name)
+ (values prefix local-name)
+ (wf-error nil "~S is not a valid NcName."
+ (rod-string local-name))))
+ (values () qname))))
+
+(defun decode-qname (qname)
+ "decode-qname name => namespace-uri, prefix, local-name"
+ (declare (type runes:simple-rod qname))
+ (multiple-value-bind (prefix local-name) (split-qname qname)
+ (let ((uri (find-namespace-binding prefix)))
+ (if uri
+ (values uri prefix local-name)
+ (values nil nil qname)))))
+
+
+(defun find-namespace-binding (prefix)
+ (cdr (or (assoc (or prefix #"") *namespace-bindings* :test #'rod=)
+ (wf-error nil "Undeclared namespace prefix: ~A" (rod-string prefix)))))
+
+;; FIXME: Should probably be refactored by adding :start and :end to rod=/rod-equal
+(defun rod-starts-with (prefix rod)
+ (and (<= (length prefix) (length rod))
+ (dotimes (i (length prefix) t)
+ (unless (rune= (rune prefix i) (rune rod i))
+ (return nil)))))
+
+(defun xmlns-attr-p (attr-name)
+ (rod-starts-with #.(string-rod "xmlns") attr-name))
+
+(defun attrname->prefix (attrname)
+ (if (< 5 (length attrname))
+ (subseq attrname 6)
+ nil))
+
+(defun find-namespace-declarations (attributes)
+ (loop
+ for attribute in attributes
+ for qname = (sax:attribute-qname attribute)
+ when (xmlns-attr-p qname)
+ collect (cons (attrname->prefix qname) (sax:attribute-value attribute))))
+
+(defun declare-namespaces (attributes)
+ (let ((ns-decls (find-namespace-declarations attributes)))
+ (dolist (ns-decl ns-decls)
+ ;; check some namespace validity constraints
+ (let ((prefix (car ns-decl))
+ (uri (cdr ns-decl)))
+ (cond
+ ((and (rod= prefix #"xml")
+ (not (rod= uri #"http://www.w3.org/XML/1998/namespace")))
+ (wf-error nil
+ "Attempt to rebind the prefix \"xml\" to ~S." (mu uri)))
+ ((and (rod= uri #"http://www.w3.org/XML/1998/namespace")
+ (not (rod= prefix #"xml")))
+ (wf-error nil
+ "The namespace ~
+ URI \"http://www.w3.org/XML/1998/namespace\" may not ~
+ be bound to the prefix ~S, only \"xml\" is legal."
+ (mu prefix)))
+ ((and (rod= prefix #"xmlns")
+ (rod= uri #"http://www.w3.org/2000/xmlns/"))
+ (wf-error nil
+ "Attempt to bind the prefix \"xmlns\" to its predefined ~
+ URI \"http://www.w3.org/2000/xmlns/\", which is ~
+ forbidden for no good reason."))
+ ((rod= prefix #"xmlns")
+ (wf-error nil
+ "Attempt to bind the prefix \"xmlns\" to the URI ~S, ~
+ but it may not be declared." (mu uri)))
+ ((rod= uri #"http://www.w3.org/2000/xmlns/")
+ (wf-error nil
+ "The namespace URI \"http://www.w3.org/2000/xmlns/\" may ~
+ not be bound to prefix ~S (or any other)." (mu prefix)))
+ ((and (rod= uri #"") prefix)
+ (wf-error nil
+ "Only the default namespace (the one without a prefix) ~
+ may be bound to an empty namespace URI, thus ~
+ undeclaring it."))
+ (t
+ (push (cons prefix (if (rod= #"" uri) nil uri))
+ *namespace-bindings*)
+ (sax:start-prefix-mapping (handler *ctx*)
+ (car ns-decl)
+ (cdr ns-decl))))))
+ ns-decls))
+
+(defun undeclare-namespaces (ns-decls)
+ (dolist (ns-decl ns-decls)
+ (sax:end-prefix-mapping (handler *ctx*) (car ns-decl))))
+
+(defun build-attribute-list (attr-alist)
+ ;; fixme: if there is a reason this function reverses attribute order,
+ ;; it should be documented.
+ (let (attributes)
+ (dolist (pair attr-alist)
+ (push (sax:make-attribute :qname (car pair)
+ :value (cdr pair)
+ :specified-p t)
+ attributes))
+ attributes))
+
+(defun check-attribute-uniqueness (attributes)
+ ;; 5.3 Uniqueness of Attributes
+ ;; In XML documents conforming to [the xmlns] specification, no
+ ;; tag may contain two attributes which:
+ ;; 1. have identical names, or
+ ;; 2. have qualified names with the same local part and with
+ ;; prefixes which have been bound to namespace names that are
+ ;; identical.
+ ;;
+ ;; 1. is checked by read-tag-2, so we only deal with 2 here
+ (loop for (attr-1 . rest) on attributes do
+ (when (and (sax:attribute-namespace-uri attr-1)
+ (find-if (lambda (attr-2)
+ (and (rod= (sax:attribute-namespace-uri attr-1)
+ (sax:attribute-namespace-uri attr-2))
+ (rod= (sax:attribute-local-name attr-1)
+ (sax:attribute-local-name attr-2))))
+ rest))
+ (wf-error nil
+ "Multiple definitions of attribute ~S in namespace ~S."
+ (mu (sax:attribute-local-name attr-1))
+ (mu (sax:attribute-namespace-uri attr-1))))))
+
+(defun set-attribute-namespace (attribute)
+ (let ((qname (sax:attribute-qname attribute)))
+ (if (and sax:*use-xmlns-namespace* (rod= qname #"xmlns"))
+ (setf (sax:attribute-namespace-uri attribute)
+ #"http://www.w3.org/2000/xmlns/")
+ (multiple-value-bind (prefix local-name) (split-qname qname)
+ (declare (ignorable local-name))
+ (when (and prefix ;; default namespace doesn't apply to attributes
+ (or (not (rod= #"xmlns" prefix))
+ sax:*use-xmlns-namespace*))
+ (multiple-value-bind (uri prefix local-name)
+ (decode-qname qname)
+ (declare (ignore prefix))
+ (setf (sax:attribute-namespace-uri attribute) uri)
+ (setf (sax:attribute-local-name attribute) local-name)))))))
+
+;;;;;;;;;;;;;;;;;
+
+;; System Identifier Protocol
+
+;; A system identifier is an object obeying to the system identifier
+;; protocol. Often something like an URL or a pathname.
+
+;; OPEN-SYS-ID sys-id [generic function]
+;;
+;; Opens the resource associated with the system identifier `sys-id'
+;; for reading and returns a stream. For now it is expected, that the
+;; stream is an octet stream (one of element type (unsigned-byte 8)).
+;;
+;; More precisely: The returned object only has to obey to the xstream
+;; controller protocol. (That is it has to provide implementations for
+;; READ-OCTETS and XSTREAM-CONTROLLER-CLOSE).
+
+;; MERGE-SYS-ID sys-id base [generic function]
+;;
+;; Merges two system identifiers. That is resolve `sys-id' relative to
+;; `base' yielding an absolute system identifier suitable for
+;; OPEN-SYS-ID.
+
+
+;;;;;;;;;;;;;;;;;
+;;; SAX validation handler
+
+(defclass validator ()
+ ((context :initarg :context :accessor context)
+ (cdatap :initform nil :accessor cdatap)))
+
+(defun make-validator (dtd root)
+ (make-instance 'validator
+ :context (make-context
+ :handler nil
+ :dtd dtd
+ :model-stack (list (make-root-model root)))))
+
+(macrolet ((with-context ((validator) &body body)
+ `(let ((*ctx* (context ,validator))
+ (*validate* t))
+ (with-scratch-pads () ;nicht schoen
+ , at body))))
+ (defmethod sax:start-element ((handler validator) uri lname qname attributes)
+ uri lname
+ (with-context (handler)
+ (validate-start-element *ctx* qname)
+ (process-attributes *ctx* qname attributes)))
+
+ (defmethod sax:start-cdata ((handler validator))
+ (setf (cdatap handler) t))
+
+ (defmethod sax:characters ((handler validator) data)
+ (with-context (handler)
+ (validate-characters *ctx* (if (cdatap handler) #"hack" data))))
+
+ (defmethod sax:end-cdata ((handler validator))
+ (setf (cdatap handler) nil))
+
+ (defmethod sax:end-element ((handler validator) uri lname qname)
+ uri lname
+ (with-context (handler)
+ (validate-end-element *ctx* qname))))
Added: vendor/cxml/xmlns-normalizer.lisp
===================================================================
--- vendor/cxml/xmlns-normalizer.lisp 2006-02-18 09:24:24 UTC (rev 1844)
+++ vendor/cxml/xmlns-normalizer.lisp 2006-02-18 09:34:15 UTC (rev 1845)
@@ -0,0 +1,131 @@
+;;;; xmlns-normalizer.lisp -- DOM 3-style namespace normalization
+;;;;
+;;;; This file is part of the CXML parser, released under Lisp-LGPL.
+;;;; See file COPYING for details.
+;;;;
+;;;; Copyright (c) 2005 David Lichteblau
+
+;;;; Hier eine Variante des reichlich furchtbaren Algorithmus zur
+;;;; Namespace-Normalisierung aus DOM 3 Core.[1]
+;;;;
+;;;; Gebraucht wir die Sache, weil Element- und Attributknoten in DOM
+;;;; zwar ein Prefix-Attribut speichern, massgeblich fuer ihren Namespace
+;;;; aber nur die URI sein soll. Und eine Anpassung der zugehoerigen
+;;;; xmlns-Attribute findet bei Veraenderungen im DOM-Baum nicht statt,
+;;;; bzw. wird dem Nutzer ueberlassen.
+;;;;
+;;;; Daher muss letztlich spaetestens beim Serialisieren eine
+;;;; Namespace-Deklaration fuer die angegebene URI nachgetragen und das
+;;;; Praefix ggf. umbenannt werden, damit am Ende doch etwas
+;;;; Namespace-konformes heraus kommt.
+;;;;
+;;;; Und das nennen sie dann Namespace-Support.
+;;;;
+;;;; [1] http://www.w3.org/TR/2004/REC-DOM-Level-3-Core-20040407/namespaces-algorithms.html#normalizeDocumentAlgo
+
+(in-package :cxml)
+
+(defclass namespace-normalizer (sax-proxy)
+ ((xmlns-stack :initarg :xmlns-stack :accessor xmlns-stack)))
+
+(defvar *xmlns-namespace* #"http://www.w3.org/2000/xmlns/")
+
+(defun make-namespace-normalizer (chained-handler)
+ (make-instance 'namespace-normalizer
+ :xmlns-stack (list (mapcar (lambda (cons)
+ (make-xmlns-attribute (car cons) (cdr cons)))
+ *namespace-bindings*))
+ :chained-handler chained-handler))
+
+(defun normalizer-find-prefix (handler prefix)
+ (block t
+ (dolist (bindings (xmlns-stack handler))
+ (dolist (attribute bindings)
+ (when (rod= (sax:attribute-local-name attribute) prefix)
+ (return-from t attribute))))))
+
+(defun normalizer-find-uri (handler uri)
+ (block t
+ (dolist (bindings (xmlns-stack handler))
+ (dolist (attribute bindings)
+ (when (and (rod= (sax:attribute-value attribute) uri)
+ ;; default-namespace interessiert uns nicht
+ (not (rod= (sax:attribute-qname attribute) #"xmlns")))
+ (return-from t attribute))))))
+
+(defun make-xmlns-attribute (prefix uri)
+ (if prefix
+ (sax:make-attribute
+ :qname (concatenate 'rod #"xmlns:" prefix)
+ :namespace-uri *xmlns-namespace*
+ :local-name prefix
+ :value uri)
+ (sax:make-attribute
+ :qname #"xmlns"
+ :namespace-uri *xmlns-namespace*
+ :local-name #"xmlns"
+ :value uri)))
+
+(defun rename-attribute (a new-prefix)
+ (setf (sax:attribute-qname a)
+ (concatenate 'rod new-prefix #":" (sax:attribute-local-name a))))
+
+(defmethod sax:start-element
+ ((handler namespace-normalizer) uri lname qname attrs)
+ (declare (ignore qname))
+ (when (null uri)
+ (setf uri #""))
+ (let ((normal-attrs '()))
+ (push nil (xmlns-stack handler))
+ (dolist (a attrs)
+ (if (rod= *xmlns-namespace* (sax:attribute-namespace-uri a))
+ (push a (car (xmlns-stack handler)))
+ (push a normal-attrs)))
+ (flet ((push-namespace (prefix uri)
+ (let ((new (make-xmlns-attribute prefix uri)))
+ (push new (car (xmlns-stack handler)))
+ (push new attrs))))
+ (multiple-value-bind (prefix local-name) (split-qname qname)
+ (setf lname local-name)
+ (let ((binding (normalizer-find-prefix handler prefix)))
+ (cond
+ ((null binding)
+ (unless (and (null prefix) (zerop (length uri)))
+ (push-namespace prefix uri)))
+ ((rod= (sax:attribute-value binding) uri))
+ ((member binding (car (xmlns-stack handler)))
+ (setf (sax:attribute-value binding) uri))
+ (t
+ (push-namespace prefix uri)))))
+ (dolist (a normal-attrs)
+ (let ((u (sax:attribute-namespace-uri a)))
+ (when u
+ (let* ((prefix (split-qname (sax:attribute-qname a)))
+ (prefix-binding
+ (when prefix
+ (normalizer-find-prefix handler prefix))))
+ (when (or (null prefix-binding)
+ (not (rod= (sax:attribute-value prefix-binding) u)))
+ (let ((uri-binding (normalizer-find-uri handler u)))
+ (cond
+ (uri-binding
+ (rename-attribute
+ a
+ (sax:attribute-local-name uri-binding)))
+ ((null prefix-binding)
+ (push-namespace prefix u))
+ (t
+ (loop
+ for i from 1
+ for prefix = (rod (format nil "NS~D" i))
+ unless (normalizer-find-prefix handler prefix)
+ do
+ (push-namespace prefix u)
+ (rename-attribute a prefix)
+ (return))))))))))))
+ (sax:start-element (proxy-chained-handler handler) uri lname qname attrs))
+
+(defmethod sax:end-element ((handler namespace-normalizer) uri lname qname)
+ (declare (ignore qname))
+ (pop (xmlns-stack handler))
+ (sax:end-element (proxy-chained-handler handler) (or uri #"") lname qname))
Added: vendor/cxml/xmls-compat.lisp
===================================================================
--- vendor/cxml/xmls-compat.lisp 2006-02-18 09:24:24 UTC (rev 1844)
+++ vendor/cxml/xmls-compat.lisp 2006-02-18 09:34:15 UTC (rev 1845)
@@ -0,0 +1,159 @@
+;;;; xml-compat.lisp -- XMLS-compatible data structures
+;;;;
+;;;; This file is part of the CXML parser, released under Lisp-LGPL.
+;;;; See file COPYING for details.
+;;;;
+;;;; Developed 2004 for headcraft - http://headcraft.de/
+;;;; Copyright: David Lichteblau
+
+;;;; XXX Der namespace-Support in xmls kommt mir zweifelhaft vor.
+;;;; Wir immitieren das soweit es gebraucht wurde bisher.
+
+(defpackage cxml-xmls
+ (:use :cl :runes)
+ (:export #:make-node #:node-name #:node-ns #:node-attrs #:node-children
+ #:make-xmls-builder #:map-node))
+
+(in-package :cxml-xmls)
+
+
+;;;; Knoten
+
+(defun make-node (&key name ns attrs children)
+ `(,(if ns (cons name ns) name)
+ ,attrs
+ , at children))
+
+(defun node-name (node)
+ (let ((car (car node)))
+ (if (consp car)
+ (car car)
+ car)))
+
+(defun (setf node-name) (newval node)
+ (let ((car (car node)))
+ (if (consp car)
+ (setf (car car) newval)
+ (setf (car node) newval))))
+
+(defun node-ns (node)
+ (let ((car (car node)))
+ (if (consp car)
+ (cdr car)
+ nil)))
+
+(defun (setf node-ns) (newval node)
+ (let ((car (car node)))
+ (if (consp car)
+ (setf (cdr car) newval)
+ (setf (car node) (cons car newval)))
+ newval))
+
+(defun node-attrs (node)
+ (cadr node))
+
+(defun (setf node-attrs) (newval node)
+ (setf (cadr node) newval))
+
+(defun node-children (node)
+ (cddr node))
+
+(defun (setf node-children) (newval node)
+ (setf (cddr node) newval))
+
+
+;;;; SAX-Handler (Parser)
+
+(defclass xmls-builder ()
+ ((element-stack :initform nil :accessor element-stack)
+ (root :initform nil :accessor root)
+ (include-default-values :initform t
+ :initarg :include-default-values
+ :accessor include-default-values)))
+
+(defun make-xmls-builder (&key (include-default-values t))
+ (make-instance 'xmls-builder :include-default-values include-default-values))
+
+(defmethod sax:end-document ((handler xmls-builder))
+ (root handler))
+
+(defmethod sax:start-element
+ ((handler xmls-builder) namespace-uri local-name qname attributes)
+ (declare (ignore namespace-uri))
+ (setf local-name (or local-name qname))
+ (let* ((attributes
+ (loop
+ for attr in attributes
+ when (or (sax:attribute-specified-p attr)
+ (include-default-values handler))
+ collect
+ (list (sax:attribute-qname attr)
+ (sax:attribute-value attr))))
+ (node (make-node :name local-name
+ :ns (let ((lq (length qname))
+ (ll (length local-name)))
+ (if (eql lq ll)
+ nil
+ (subseq qname 0 (- lq ll 1))))
+ :attrs attributes))
+ (parent (car (element-stack handler))))
+ (if parent
+ (push node (node-children parent))
+ (setf (root handler) node))
+ (push node (element-stack handler))))
+
+(defmethod sax:end-element
+ ((handler xmls-builder) namespace-uri local-name qname)
+ (declare (ignore namespace-uri local-name qname))
+ (let ((node (pop (element-stack handler))))
+ (setf (node-children node) (reverse (node-children node)))))
+
+(defmethod sax:characters ((handler xmls-builder) data)
+ (let* ((parent (car (element-stack handler)))
+ (prev (car (node-children parent))))
+ ;; Be careful to accept both rods and strings here, so that xmls can be
+ ;; used with strings even if cxml is configured to use octet string rods.
+ (if (typep prev '(or rod string))
+ ;; um entities herum wird SAX:CHARACTERS mehrfach aufgerufen fuer
+ ;; den gleichen Textknoten. Hier muessen wir den bestehenden Knoten
+ ;; erweitern, sonst ist das Dokument nicht normalisiert.
+ ;; (XXX Oder sollte man besser den Parser entsprechend aendern?)
+ (setf (car (node-children parent))
+ (concatenate `(vector ,(array-element-type prev))
+ prev
+ data))
+ (push data (node-children parent)))))
+
+
+;;;; SAX-Treiber (fuer Serialisierung)
+
+(defun map-node
+ (handler node
+ &key (include-xmlns-attributes sax:*include-xmlns-attributes*))
+ (sax:start-document handler)
+ (labels ((walk (node)
+ (let* ((attlist
+ (compute-attributes node include-xmlns-attributes))
+ (lname (rod (node-name node)))
+ (ns (rod (node-ns node)))
+ (qname (concatenate 'rod ns (rod ":") lname)))
+ ;; fixme: namespaces
+ (sax:start-element handler nil lname qname attlist)
+ (dolist (child (node-children node))
+ (typecase child
+ (list (walk child))
+ ((or string rod) (sax:characters handler (rod child)))))
+ (sax:end-element handler nil lname qname))))
+ (walk node))
+ (sax:end-document handler))
+
+(defun compute-attributes (node xmlnsp)
+ (remove nil
+ (mapcar (lambda (a)
+ (destructuring-bind (name value) a
+ (if (or xmlnsp (not (cxml::xmlns-attr-p (rod name))))
+ (sax:make-attribute :qname (rod name)
+ :value (rod value)
+ :specified-p t)
+ nil)))
+ (node-attrs node))))
More information about the Bknr-cvs
mailing list