From dlichteblau at common-lisp.net Sun Jul 1 17:25:40 2007
From: dlichteblau at common-lisp.net (dlichteblau)
Date: Sun, 1 Jul 2007 13:25:40 -0400 (EDT)
Subject: [cxml-cvs] CVS cxml/xml
Message-ID: <20070701172540.0942B1C0C9@common-lisp.net>
Update of /project/cxml/cvsroot/cxml/xml
In directory clnet:/tmp/cvs-serv26802
Modified Files:
unparse.lisp
Log Message:
escape % in internal entities
new function unparsed-internal-subset
use " to escape IDs containing '
--- /project/cxml/cvsroot/cxml/xml/unparse.lisp 2007/06/16 11:27:19 1.16
+++ /project/cxml/cvsroot/cxml/xml/unparse.lisp 2007/07/01 17:25:39 1.17
@@ -79,6 +79,7 @@
(name-for-dtd :accessor name-for-dtd)
(previous-notation :initform nil :accessor previous-notation)
(have-doctype :initform nil :accessor have-doctype)
+ (have-internal-subset :initform nil :accessor have-internal-subset)
(stack :initform nil :accessor stack)))
(defmethod initialize-instance :after ((instance sink) &key)
@@ -156,6 +157,9 @@
(%write-rod #"\"" sink)))))
(defmethod sax:start-internal-subset ((sink sink))
+ (when (have-internal-subset sink)
+ (error "duplicate internal subset"))
+ (setf (have-internal-subset sink) t)
(ensure-doctype sink)
(%write-rod #" [" sink)
(%write-rune #/U+000A sink))
@@ -164,6 +168,25 @@
(ensure-doctype sink)
(%write-rod #"]" sink))
+(defmethod sax:unparsed-internal-subset ((sink sink) str)
+ (when (have-internal-subset sink)
+ (error "duplicate internal subset"))
+ (setf (have-internal-subset sink) t)
+ (ensure-doctype sink)
+ (%write-rod #" [" sink)
+ (%write-rune #/U+000A sink)
+ (unparse-string str sink)
+ (%write-rod #"]" sink))
+
+;; for the benefit of the XML test suite, prefer ' over "
+(defun write-quoted-rod (x sink)
+ (let ((q (if (find #/' x) #/" #/'
+ ;; '" (thanks you Emacs indentation, the if ends here)
+ )))
+ (%write-rune q sink)
+ (%write-rod x sink)
+ (%write-rune q sink)))
+
(defmethod sax:notation-declaration ((sink sink) name public-id system-id)
(let ((prev (previous-notation sink)))
(when (and (and (canonical sink) (>= (canonical sink) 2))
@@ -175,19 +198,16 @@
(%write-rod name sink)
(cond
((zerop (length public-id))
- (%write-rod #" SYSTEM '" sink)
- (%write-rod system-id sink)
- (%write-rune #/' sink))
+ (%write-rod #" SYSTEM " sink)
+ (write-quoted-rod system-id sink))
((zerop (length system-id))
- (%write-rod #" PUBLIC '" sink)
- (%write-rod public-id sink)
- (%write-rune #/' sink))
+ (%write-rod #" PUBLIC " sink)
+ (write-quoted-rod public-id sink))
(t
- (%write-rod #" PUBLIC '" sink)
- (%write-rod public-id sink)
- (%write-rod #"' '" sink)
- (%write-rod system-id sink)
- (%write-rune #/' sink)))
+ (%write-rod #" PUBLIC " sink)
+ (write-quoted-rod public-id sink)
+ (%write-rod #" " sink)
+ (write-quoted-rod system-id sink)))
(%write-rune #/> sink)
(%write-rune #/U+000A sink))
@@ -198,19 +218,16 @@
(%write-rod name sink)
(cond
((zerop (length public-id))
- (%write-rod #" SYSTEM '" sink)
- (%write-rod system-id sink)
- (%write-rune #/' sink))
+ (%write-rod #" SYSTEM " sink)
+ (write-quoted-rod system-id sink))
((zerop (length system-id))
- (%write-rod #" PUBLIC '" sink)
- (%write-rod public-id sink)
- (%write-rune #/' sink))
+ (%write-rod #" PUBLIC " sink)
+ (write-quoted-rod public-id sink))
(t
- (%write-rod #" PUBLIC '" sink)
- (%write-rod public-id sink)
- (%write-rod #"' '" sink)
- (%write-rod system-id sink)
- (%write-rune #/' sink)))
+ (%write-rod #" PUBLIC " sink)
+ (write-quoted-rod public-id sink)
+ (%write-rod #" " sink)
+ (write-quoted-rod system-id sink)))
(%write-rod #" NDATA " sink)
(%write-rod notation-name sink)
(%write-rune #/> sink)
@@ -226,19 +243,16 @@
(%write-rod name sink)
(cond
((zerop (length public-id))
- (%write-rod #" SYSTEM '" sink)
- (%write-rod system-id sink)
- (%write-rune #/' sink))
+ (%write-rod #" SYSTEM " sink)
+ (write-quoted-rod system-id sink))
((zerop (length system-id))
- (%write-rod #" PUBLIC '" sink)
- (%write-rod public-id sink)
- (%write-rune #/' sink))
+ (%write-rod #" PUBLIC " sink)
+ (write-quoted-rod public-id sink))
(t
- (%write-rod #" PUBLIC '" sink)
- (%write-rod public-id sink)
- (%write-rod #"' '" sink)
- (%write-rod system-id sink)
- (%write-rune #/' sink)))
+ (%write-rod #" PUBLIC " sink)
+ (write-quoted-rod public-id sink)
+ (%write-rod #" " sink)
+ (write-quoted-rod system-id sink)))
(%write-rune #/> sink)
(%write-rune #/U+000A sink))
@@ -251,7 +265,7 @@
(%write-rod name sink)
(%write-rune #/U+0020 sink)
(%write-rune #/\" sink)
- (unparse-string value sink)
+ (unparse-dtd-string value sink)
(%write-rune #/\" sink)
(%write-rune #/> sink)
(%write-rune #/U+000A sink))
@@ -319,6 +333,7 @@
(when rest
(%write-rune #\| sink)))
(%write-rune #/\) sink)))
+ (%write-rune #/U+0020 sink)
(cond
((atom default)
(%write-rune #/# sink)
@@ -498,6 +513,22 @@
(t
(write-rune c ystream))))
+(defun unparse-dtd-string (str sink)
+ (let ((y (sink-ystream sink)))
+ (loop for rune across str do (unparse-dtd-char rune y))))
+
+(defun unparse-dtd-char (c ystream)
+ (cond ((rune= c #/%) (write-rod '#.(string-rod "%") ystream))
+ ((rune= c #/&) (write-rod '#.(string-rod "&") ystream))
+ ((rune= c #/<) (write-rod '#.(string-rod "<") ystream))
+ ((rune= c #/>) (write-rod '#.(string-rod ">") ystream))
+ ((rune= c #/\") (write-rod '#.(string-rod """) ystream))
+ ((rune= c #/U+0009) (write-rod '#.(string-rod " ") ystream))
+ ((rune= c #/U+000A) (write-rod '#.(string-rod "
") ystream))
+ ((rune= c #/U+000D) (write-rod '#.(string-rod "
") ystream))
+ (t
+ (write-rune c ystream))))
+
(defun %write-rune (c sink)
(write-rune c (sink-ystream sink)))
From dlichteblau at common-lisp.net Sun Jul 1 17:25:45 2007
From: dlichteblau at common-lisp.net (dlichteblau)
Date: Sun, 1 Jul 2007 13:25:45 -0400 (EDT)
Subject: [cxml-cvs] CVS cxml/xml
Message-ID: <20070701172545.E5B271C0C9@common-lisp.net>
Update of /project/cxml/cvsroot/cxml/xml
In directory clnet:/tmp/cvs-serv26838
Modified Files:
sax-handler.lisp
Log Message:
new function unparsed-internal-subset
--- /project/cxml/cvsroot/cxml/xml/sax-handler.lisp 2007/03/04 21:04:13 1.7
+++ /project/cxml/cvsroot/cxml/xml/sax-handler.lisp 2007/07/01 17:25:45 1.8
@@ -73,6 +73,7 @@
#:start-dtd
#:end-dtd
#:start-internal-subset
+ #:unparsed-internal-subset
#:end-internal-subset
#:unparsed-entity-declaration
#:external-entity-declaration
@@ -337,6 +338,11 @@
finished, if present.")
(:method ((handler t)) nil))
+(defgeneric unparsed-internal-subset (handler str)
+ (:documentation "Reports that an internal subset is present, but has not
+been parsed and is available as a string.")
+ (:method ((handler t) str) nil))
+
(defgeneric unparsed-entity-declaration
(handler name public-id system-id notation-name)
(:documentation
From dlichteblau at common-lisp.net Sun Jul 1 17:26:04 2007
From: dlichteblau at common-lisp.net (dlichteblau)
Date: Sun, 1 Jul 2007 13:26:04 -0400 (EDT)
Subject: [cxml-cvs] CVS cxml/xml
Message-ID: <20070701172604.7DC331C0CA@common-lisp.net>
Update of /project/cxml/cvsroot/cxml/xml
In directory clnet:/tmp/cvs-serv26914
Modified Files:
xmls-compat.lisp
Log Message:
removed out-of-date comment
--- /project/cxml/cvsroot/cxml/xml/xmls-compat.lisp 2007/06/16 11:07:58 1.4
+++ /project/cxml/cvsroot/cxml/xml/xmls-compat.lisp 2007/07/01 17:26:04 1.5
@@ -6,9 +6,6 @@
;;;; Developed 2004 for headcraft - http://headcraft.de/
;;;; Copyright: David Lichteblau
-;;;; XXX Der namespace-Support in xmls kommt mir zweifelhaft vor.
-;;;; Wir imitieren das soweit es gebraucht wurde bisher.
-
(defpackage cxml-xmls
(:use :cl :runes)
(:export #:make-node #:node-name #:node-ns #:node-attrs #:node-children
From dlichteblau at common-lisp.net Sun Jul 1 17:26:12 2007
From: dlichteblau at common-lisp.net (dlichteblau)
Date: Sun, 1 Jul 2007 13:26:12 -0400 (EDT)
Subject: [cxml-cvs] CVS cxml/test
Message-ID: <20070701172612.4D6831C0CA@common-lisp.net>
Update of /project/cxml/cvsroot/cxml/test
In directory clnet:/tmp/cvs-serv27018
Modified Files:
xmlconf.lisp
Log Message:
hacks for stp tests
--- /project/cxml/cvsroot/cxml/test/xmlconf.lisp 2007/06/16 10:03:11 1.15
+++ /project/cxml/cvsroot/cxml/test/xmlconf.lisp 2007/07/01 17:26:12 1.16
@@ -53,7 +53,7 @@
(when (plusp (length output))
(merge-pathnames output sub-directory)))))
-(defun serialize-document (document)
+(defmethod serialize-document ((document t))
(dom:map-document (cxml:make-octet-vector-sink :canonical 2)
document
:include-doctype :canonical-notations
@@ -195,6 +195,10 @@
:validate nil)
(error "well-formedness violation not detected")
nil)
+ #+fixme-stp-test
+ (error ()
+ (format t " unexpected-error")
+ t)
(cxml:well-formedness-violation ()
(format t " not-wf")
t))
@@ -207,6 +211,10 @@
:validate t)
(error "well-formedness violation not detected")
nil)
+ #+fixme-stp-test
+ (error ()
+ (format t " unexpected-error")
+ t)
(cxml:well-formedness-violation ()
(format t " not-wf")
t)
From dlichteblau at common-lisp.net Sun Jul 1 18:52:26 2007
From: dlichteblau at common-lisp.net (dlichteblau)
Date: Sun, 1 Jul 2007 14:52:26 -0400 (EDT)
Subject: [cxml-cvs] CVS cxml/xml
Message-ID: <20070701185226.D67C2450C0@common-lisp.net>
Update of /project/cxml/cvsroot/cxml/xml
In directory clnet:/tmp/cvs-serv13765
Modified Files:
unparse.lisp
Log Message:
new function cxml:doctype
--- /project/cxml/cvsroot/cxml/xml/unparse.lisp 2007/07/01 17:25:39 1.17
+++ /project/cxml/cvsroot/cxml/xml/unparse.lisp 2007/07/01 18:52:26 1.18
@@ -564,6 +564,12 @@
(defmacro with-namespace ((prefix uri) &body body)
`(invoke-with-namespace (lambda () , at body) ,prefix ,uri))
+(defun doctype (name public-id system-id &optional internal-subset)
+ (sax:start-dtd *sink* name public-id system-id)
+ (when internal-subset
+ (sax:unparsed-internal-subset *sink* internal-subset))
+ (sax:end-dtd *sink*))
+
(defun maybe-emit-start-tag ()
(when *current-element*
;; starting child node, need to emit opening tag of parent first:
From dlichteblau at common-lisp.net Sun Jul 1 18:52:34 2007
From: dlichteblau at common-lisp.net (dlichteblau)
Date: Sun, 1 Jul 2007 14:52:34 -0400 (EDT)
Subject: [cxml-cvs] CVS cxml/xml
Message-ID: <20070701185234.2503D4818B@common-lisp.net>
Update of /project/cxml/cvsroot/cxml/xml
In directory clnet:/tmp/cvs-serv13842
Modified Files:
package.lisp
Log Message:
new function cxml:doctype
--- /project/cxml/cvsroot/cxml/xml/package.lisp 2007/06/16 11:27:19 1.17
+++ /project/cxml/cvsroot/cxml/xml/package.lisp 2007/07/01 18:52:33 1.18
@@ -58,6 +58,7 @@
#:unparse-attribute
#:cdata
#:text
+ #:doctype
#:xml-parse-error
#:well-formedness-violation
From dlichteblau at common-lisp.net Sun Jul 1 18:56:09 2007
From: dlichteblau at common-lisp.net (dlichteblau)
Date: Sun, 1 Jul 2007 14:56:09 -0400 (EDT)
Subject: [cxml-cvs] CVS cxml/doc
Message-ID: <20070701185609.06DFB54171@common-lisp.net>
Update of /project/cxml/cvsroot/cxml/doc
In directory clnet:/tmp/cvs-serv14500
Modified Files:
sax.xml
Log Message:
new function cxml:doctype
--- /project/cxml/cvsroot/cxml/doc/sax.xml 2007/05/01 20:07:00 1.4
+++ /project/cxml/cvsroot/cxml/doc/sax.xml 2007/07/01 18:56:09 1.5
@@ -299,6 +299,7 @@
Function CXML:ATTRIBUTE* (prefix lname value) => value
Function CXML:TEXT (data) => data
Function CXML:CDATA (data) => data
+ Function CXML:doctype (name public-id system-id &optional internal-subset)
Convenience syntax for event-based serialization.
From dlichteblau at common-lisp.net Thu Jul 5 20:58:15 2007
From: dlichteblau at common-lisp.net (dlichteblau)
Date: Thu, 5 Jul 2007 16:58:15 -0400 (EDT)
Subject: [cxml-cvs] CVS cxml/doc
Message-ID: <20070705205815.E29A461051@common-lisp.net>
Update of /project/cxml/cvsroot/cxml/doc
In directory clnet:/tmp/cvs-serv5595/doc
Modified Files:
index.xml
Log Message:
Fixed build on non-Unicode lisps.
Fixed parsing on non-Unicode lisps.
Fixed Unicode detection on OpenMCL.
--- /project/cxml/cvsroot/cxml/doc/index.xml 2007/06/16 11:27:19 1.11
+++ /project/cxml/cvsroot/cxml/doc/index.xml 2007/07/05 20:58:15 1.12
@@ -59,6 +59,10 @@
Recent Changes
rel-2007-mm-dd
+ -
+ Fixed build on non-Unicode lisps. Fixed parsing on
+ non-Unicode lisps. Fixed Unicode detection on OpenMCL.
+
- Serialization no longer defaults to canonical form.
- Fixed octet array argument to make-source.
-
@@ -74,6 +78,8 @@
where normal streams are used instead of xstreams and ystreams
(albeit both SCL-specific at this point).
+ - new convenience serialization function cxml:doctype. Various
+ DTD serialization fixes.
rel-2007-05-26
From dlichteblau at common-lisp.net Thu Jul 5 20:58:15 2007
From: dlichteblau at common-lisp.net (dlichteblau)
Date: Thu, 5 Jul 2007 16:58:15 -0400 (EDT)
Subject: [cxml-cvs] CVS cxml
Message-ID: <20070705205815.C398E60031@common-lisp.net>
Update of /project/cxml/cvsroot/cxml
In directory clnet:/tmp/cvs-serv5595
Modified Files:
GNUmakefile cxml.asd runes.asd
Log Message:
Fixed build on non-Unicode lisps.
Fixed parsing on non-Unicode lisps.
Fixed Unicode detection on OpenMCL.
--- /project/cxml/cvsroot/cxml/GNUmakefile 2005/03/13 18:02:50 1.1.1.1
+++ /project/cxml/cvsroot/cxml/GNUmakefile 2007/07/05 20:58:15 1.2
@@ -4,5 +4,4 @@
.PHONY: clean
clean:
- touch dummy.fasl
- find . \( -name \*.fasl -o -name \*.x86f \) -print0 | xargs -0 rm
+ find . \( -name \*.fasl -o -name \*.x86f -o -name \*.lx64fsl \) -print0 | xargs -0 rm -f
--- /project/cxml/cvsroot/cxml/cxml.asd 2007/06/16 11:27:18 1.19
+++ /project/cxml/cvsroot/cxml/cxml.asd 2007/07/05 20:58:15 1.20
@@ -2,6 +2,9 @@
(:use :asdf :cl))
(in-package :cxml-system)
+;; force loading of runes.asd, which installs *features* this file depends on
+(find-system :runes)
+
(defclass closure-source-file (cl-source-file) ())
#+scl
--- /project/cxml/cvsroot/cxml/runes.asd 2007/06/30 21:24:34 1.3
+++ /project/cxml/cvsroot/cxml/runes.asd 2007/07/05 20:58:15 1.4
@@ -26,7 +26,9 @@
(progn
(format t " ok, characters have at least 16 bits.~%")
:rune-is-character))
- (unless (and (< x char-code-limit) (code-char x))
+ (unless (or (<= #xD800 x #xDFFF)
+ (and (< x char-code-limit) (code-char x)))
+ (print (code-char x))
(format t " no, reverting to octet strings.~%")
(return :rune-is-integer)))
*features*))
From dlichteblau at common-lisp.net Thu Jul 5 20:58:16 2007
From: dlichteblau at common-lisp.net (dlichteblau)
Date: Thu, 5 Jul 2007 16:58:16 -0400 (EDT)
Subject: [cxml-cvs] CVS cxml/xml
Message-ID: <20070705205816.4D50B6600B@common-lisp.net>
Update of /project/cxml/cvsroot/cxml/xml
In directory clnet:/tmp/cvs-serv5595/xml
Modified Files:
xml-parse.lisp
Log Message:
Fixed build on non-Unicode lisps.
Fixed parsing on non-Unicode lisps.
Fixed Unicode detection on OpenMCL.
--- /project/cxml/cvsroot/cxml/xml/xml-parse.lisp 2007/06/16 11:27:19 1.68
+++ /project/cxml/cvsroot/cxml/xml/xml-parse.lisp 2007/07/05 20:58:15 1.69
@@ -2771,7 +2771,7 @@
(write-char c out))))))
(defun compute-base (attrs)
- (let ((new (sax:find-attribute "xml:base" attrs))
+ (let ((new (sax:find-attribute #"xml:base" attrs))
(current (car (base-stack *ctx*))))
(if new
(puri:merge-uris (escape-uri (sax:attribute-value new)) current)
From dlichteblau at common-lisp.net Sat Jul 7 20:47:39 2007
From: dlichteblau at common-lisp.net (dlichteblau)
Date: Sat, 7 Jul 2007 16:47:39 -0400 (EDT)
Subject: [cxml-cvs] CVS cxml/doc
Message-ID: <20070707204739.E833D4E00F@common-lisp.net>
Update of /project/cxml/cvsroot/cxml/doc
In directory clnet:/tmp/cvs-serv1313/doc
Modified Files:
GNUmakefile index.xml sax.xml
Log Message:
new function cxml:parse
--- /project/cxml/cvsroot/cxml/doc/GNUmakefile 2007/03/04 18:30:40 1.2
+++ /project/cxml/cvsroot/cxml/doc/GNUmakefile 2007/07/07 20:47:38 1.3
@@ -2,4 +2,5 @@
%.html: %.xml html.xsl
xsltproc html.xsl $< >$@.tmp
+ chmod -w *.html
mv $@.tmp $@
--- /project/cxml/cvsroot/cxml/doc/index.xml 2007/07/05 20:58:15 1.12
+++ /project/cxml/cvsroot/cxml/doc/index.xml 2007/07/07 20:47:38 1.13
@@ -63,6 +63,7 @@
Fixed build on non-Unicode lisps. Fixed parsing on
non-Unicode lisps. Fixed Unicode detection on OpenMCL.
+ - New function cxml:parse.
- Serialization no longer defaults to canonical form.
- Fixed octet array argument to make-source.
-
@@ -83,11 +84,11 @@
rel-2007-05-26
- - cxml.asd has been split up into cxml.asd for the
+
- cxml.asd has been split up into cxml.asd for the
XML parser and runes.asd for the runes package, in
preparation of a complete split of the two systems. Future CXML
releases will use separate tarballs for runes
- and cxml.
+ and cxml.
- xml:base support (SAX and Klacks only, not yet used in DOM).
See documentation here and here.
--- /project/cxml/cvsroot/cxml/doc/sax.xml 2007/07/01 18:56:09 1.5
+++ /project/cxml/cvsroot/cxml/doc/sax.xml 2007/07/07 20:47:38 1.6
@@ -43,17 +43,63 @@
Parsing and Validating
+
+
+ Old-style convenience functions:
+
+
Function CXML:PARSE-FILE (pathname handler &key ...)
+
Same as cxml:parse with a pathname argument.
+ (But note that cxml:parse-file interprets string
+ arguments as namestrings, while cxml:parse expects
+ literal XML documents.)
+
+
Function CXML:PARSE-STREAM (stream handler &key ...)
+
Same as cxml:parse with a stream argument.
+
Function CXML:PARSE-OCTETS (octets handler &key ...)
+
Same as cxml:parse with an octet vector argument.
+
Function CXML:PARSE-ROD (rod handler &key ...)
+
Same as cxml:parse with a string argument.
+
+
+
+ New all-in-one parser interface:
+
+ Function CXML:PARSE (input handler &key ...)
-
Function CXML:PARSE-FILE (pathname handler &key ...)
- Function CXML:PARSE-STREAM (stream handler &key ...)
- Function CXML:PARSE-OCTETS (octets handler &key ...)
- Function CXML:PARSE-ROD (rod handler &key ...)
- Parse an XML document.
+ Parse an XML document, where input is a string, pathname, octet
+ vector, or stream.
Return values from this function depend on the SAX handler used.
Arguments:
- - pathname -- a Common Lisp pathname
+ -
+ input -- one of:
+
+ -
+ pathname -- a Common Lisp pathname.
+ Open the file specified by the pathname and create a source for
+ the resulting stream. See below for information on how to
+ close the stream.
+
+ - stream -- a Common Lisp stream with element-type
+ (unsigned-byte 8). See below for information on how to
+ close the stream.
+
+ -
+ octets -- an (unsigned-byte 8) array.
+ The array is parsed directly, and interpreted according to the
+ encoding it specifies.
+
+ -
+ string/rod -- a rod (or string on
+ unicode-capable implementations).
+ Parses an XML document from the input string that has already
+ undergone external-format decoding.
+
+
+
- stream -- a Common Lisp stream with element-type
(unsigned-byte 8)
- octets -- an (unsigned-byte 8) array
From dlichteblau at common-lisp.net Sat Jul 7 20:47:40 2007
From: dlichteblau at common-lisp.net (dlichteblau)
Date: Sat, 7 Jul 2007 16:47:40 -0400 (EDT)
Subject: [cxml-cvs] CVS cxml/xml
Message-ID: <20070707204740.A500F5416B@common-lisp.net>
Update of /project/cxml/cvsroot/cxml/xml
In directory clnet:/tmp/cvs-serv1313/xml
Modified Files:
package.lisp xml-parse.lisp
Log Message:
new function cxml:parse
--- /project/cxml/cvsroot/cxml/xml/package.lisp 2007/07/01 18:52:33 1.18
+++ /project/cxml/cvsroot/cxml/xml/package.lisp 2007/07/07 20:47:39 1.19
@@ -32,6 +32,7 @@
#:attribute-qname
#:attribute-value
+ #:parse
#:parse-file
#:parse-stream
#:parse-rod
--- /project/cxml/cvsroot/cxml/xml/xml-parse.lisp 2007/07/05 20:58:15 1.69
+++ /project/cxml/cvsroot/cxml/xml/xml-parse.lisp 2007/07/07 20:47:40 1.70
@@ -3094,6 +3094,32 @@
(setf (slot-value pathname 'lisp::host) "localhost"))
pathname))
+(defun parse
+ (input handler &rest args
+ &key validate dtd root entity-resolver disallow-internal-subset
+ recode pathname)
+ (declare (ignore validate dtd root entity-resolver disallow-internal-subset
+ recode))
+ (let ((args
+ (loop
+ for (name value) on args by #'cddr
+ unless (eq name :pathname)
+ append (list name value))))
+ (etypecase input
+ (xstream (apply #'make-xstream input handler args))
+ (pathname (apply #'parse-file input handler args))
+ (rod (apply #'parse-rod input handler args))
+ (array (apply #'parse-octets input handler args))
+ (stream
+ (let ((xstream (make-xstream input :speed 8192)))
+ (setf (xstream-name xstream)
+ (make-stream-name
+ :entity-name "main document"
+ :entity-kind :main
+ :uri (pathname-to-uri
+ (merge-pathnames (or pathname (pathname input))))))
+ (apply #'parse-xstream xstream handler args))))))
+
(defun parse-xstream (xstream handler &rest args)
(let ((*ctx* nil))
(handler-case
From dlichteblau at common-lisp.net Sat Jul 7 20:47:51 2007
From: dlichteblau at common-lisp.net (dlichteblau)
Date: Sat, 7 Jul 2007 16:47:51 -0400 (EDT)
Subject: [cxml-cvs] CVS cxml/doc
Message-ID: <20070707204751.664835D09E@common-lisp.net>
Update of /project/cxml/cvsroot/cxml/doc
In directory clnet:/tmp/cvs-serv1386/doc
Modified Files:
index.xml
Log Message:
new release
--- /project/cxml/cvsroot/cxml/doc/index.xml 2007/07/07 20:47:38 1.13
+++ /project/cxml/cvsroot/cxml/doc/index.xml 2007/07/07 20:47:51 1.14
@@ -57,7 +57,7 @@
Recent Changes
- rel-2007-mm-dd
+ rel-2007-07-07
-
Fixed build on non-Unicode lisps. Fixed parsing on
From dlichteblau at common-lisp.net Sun Jul 22 19:43:27 2007
From: dlichteblau at common-lisp.net (dlichteblau)
Date: Sun, 22 Jul 2007 15:43:27 -0400 (EDT)
Subject: [cxml-cvs] CVS cxml/xml
Message-ID: <20070722194327.AC7627208F@common-lisp.net>
Update of /project/cxml/cvsroot/cxml/xml
In directory clnet:/tmp/cvs-serv6596
Modified Files:
xml-parse.lisp
Log Message:
DTD/SAX fix
--- /project/cxml/cvsroot/cxml/xml/xml-parse.lisp 2007/07/07 20:47:40 1.70
+++ /project/cxml/cvsroot/cxml/xml/xml-parse.lisp 2007/07/22 19:43:27 1.71
@@ -1096,7 +1096,8 @@
(cond
((null e)
(setf (gethash element-name (dtd-elements dtd))
- (make-elmdef :name element-name :content content-model)))
+ (make-elmdef :name element-name :content content-model))
+ (sax:element-declaration (handler *ctx*) element-name content-model))
((null content-model)
e)
(t
From dlichteblau at common-lisp.net Sun Jul 22 19:44:01 2007
From: dlichteblau at common-lisp.net (dlichteblau)
Date: Sun, 22 Jul 2007 15:44:01 -0400 (EDT)
Subject: [cxml-cvs] CVS cxml/xml
Message-ID: <20070722194401.B0B5C73234@common-lisp.net>
Update of /project/cxml/cvsroot/cxml/xml
In directory clnet:/tmp/cvs-serv6676
Modified Files:
unparse.lisp
Log Message:
DTD serialization fixes
--- /project/cxml/cvsroot/cxml/xml/unparse.lisp 2007/07/01 18:52:26 1.18
+++ /project/cxml/cvsroot/cxml/xml/unparse.lisp 2007/07/22 19:44:01 1.19
@@ -145,15 +145,15 @@
(%write-rod #"") sink)))
@@ -449,6 +452,14 @@
(loop for c across data do (unparse-datachar c y))
(loop for c across data do (unparse-datachar-readable c y))))))))
+(defmethod sax:comment ((sink sink) data)
+ (maybe-close-tag sink)
+ (unless (canonical sink)
+ ;; XXX signal error if body is unprintable?
+ (%write-rod #"" sink)))
+
(defmethod sax:end-cdata ((sink sink))
(unless (eq (pop (stack sink)) :cdata)
(error "output does not nest: not in a cdata section")))
@@ -510,6 +521,7 @@
((rune= c #/<) (write-rod '#.(string-rod "<") ystream))
((rune= c #/>) (write-rod '#.(string-rod ">") ystream))
((rune= c #/\") (write-rod '#.(string-rod """) ystream))
+ ((rune= c #/U+000D) (write-rod '#.(string-rod "
") ystream))
(t
(write-rune c ystream))))
From dlichteblau at common-lisp.net Sun Jul 22 19:59:26 2007
From: dlichteblau at common-lisp.net (dlichteblau)
Date: Sun, 22 Jul 2007 15:59:26 -0400 (EDT)
Subject: [cxml-cvs] CVS cxml/doc
Message-ID: <20070722195926.4216C4B034@common-lisp.net>
Update of /project/cxml/cvsroot/cxml/doc
In directory clnet:/tmp/cvs-serv8223/doc
Modified Files:
GNUmakefile index.xml
Log Message:
UTF-8 fix, thanks to Francis Leboutte
--- /project/cxml/cvsroot/cxml/doc/GNUmakefile 2007/07/07 20:47:38 1.3
+++ /project/cxml/cvsroot/cxml/doc/GNUmakefile 2007/07/22 19:59:26 1.4
@@ -2,5 +2,5 @@
%.html: %.xml html.xsl
xsltproc html.xsl $< >$@.tmp
- chmod -w *.html
mv $@.tmp $@
+ chmod -w $@
--- /project/cxml/cvsroot/cxml/doc/index.xml 2007/07/07 20:47:51 1.14
+++ /project/cxml/cvsroot/cxml/doc/index.xml 2007/07/22 19:59:26 1.15
@@ -4,26 +4,13 @@
An XML parser written in Common Lisp.
- Closure XML was written by Gilbert Baumann
- (unk6 at rz.uni-karlsruhe.de) as part of the Closure web
- browser.
- Contributions to the parser by
-
-
- -
- Henrik Motakef (hmot at henrik-motakef.de)
- (SAX layer; namespace support)
-
- -
- David Lichteblau for knowledgeTools
- (conversion into an independent package; DOM bug fixing; validation)
- and headcraft
- (most september/october 2004 changes) and privately (changes
- since then).
-
-
+ Closure XML was written
+ by Gilbert
+ Baumann as part of the Closure web browser and is now
+ maintained by
+ David Lichteblau.
+ It is licensed under Lisp-LGPL.
+
CXML implements a
- CXML is licensed under Lisp-LGPL.
-
-
-
Send bug reports to cxml-devel at common-lisp.net
().
- See also
+ Add-on features
+
+ The following libraries are available as separate downloads:
+
- Relax NG validation is available as a separate
- project: cxml-rng.
+ ⬗
+ cxml-rng
+
+ Relax NG validation
+
+
+ ⬗
+ cxml-stp
+
+ STP, an alternative to DOM
Recent Changes
+ rel-2007-xx-yy
+
+ - Various DTD serialization fixes
+ - UTF-8 fix, thanks to Francis Leboutte
+
rel-2007-07-07
-
From dlichteblau at common-lisp.net Sun Jul 22 19:59:26 2007
From: dlichteblau at common-lisp.net (dlichteblau)
Date: Sun, 22 Jul 2007 15:59:26 -0400 (EDT)
Subject: [cxml-cvs] CVS cxml/dom
Message-ID: <20070722195926.A77D64B031@common-lisp.net>
Update of /project/cxml/cvsroot/cxml/dom
In directory clnet:/tmp/cvs-serv8223/dom
Modified Files:
dom-builder.lisp
Log Message:
UTF-8 fix, thanks to Francis Leboutte
--- /project/cxml/cvsroot/cxml/dom/dom-builder.lisp 2007/02/18 14:35:15 1.12
+++ /project/cxml/cvsroot/cxml/dom/dom-builder.lisp 2007/07/22 19:59:26 1.13
@@ -135,7 +135,7 @@
(cond
((eq (dom:node-type parent) :cdata-section)
(setf (dom:data parent) data))
- ((and last-child (eq (dom:node-type last-child) :text))
+ ((and last-child (eq (dom:node-type last-child) :text))
;; um entities herum wird SAX:CHARACTERS mehrfach aufgerufen fuer
;; den gleichen Textknoten. Hier muessen wir den bestehenden Knoten
;; erweitern, sonst ist das Dokument nicht normalisiert.
From dlichteblau at common-lisp.net Sun Jul 22 19:59:26 2007
From: dlichteblau at common-lisp.net (dlichteblau)
Date: Sun, 22 Jul 2007 15:59:26 -0400 (EDT)
Subject: [cxml-cvs] CVS cxml/runes
Message-ID: <20070722195926.DA27A4E050@common-lisp.net>
Update of /project/cxml/cvsroot/cxml/runes
In directory clnet:/tmp/cvs-serv8223/runes
Modified Files:
encodings.lisp
Log Message:
UTF-8 fix, thanks to Francis Leboutte
--- /project/cxml/cvsroot/cxml/runes/encodings.lisp 2005/11/28 22:22:51 1.6
+++ /project/cxml/cvsroot/cxml/runes/encodings.lisp 2007/07/22 19:59:26 1.7
@@ -250,7 +250,7 @@
(setf rptr (%+ rptr 1)))
((%<= #|#b11000000|# byte0 #b11011111)
- (cond ((< (%+ rptr 2) in-end)
+ (cond ((<= (%+ rptr 2) in-end)
(put
(dpb (ldb (byte 5 0) byte0) (byte 5 6)
(dpb (ldb (byte 6 0) (aref in (%+ rptr 1))) (byte 6 0)
@@ -260,7 +260,7 @@
(return))))
((%<= #|#b11100000|# byte0 #b11101111)
- (cond ((< (%+ rptr 3) in-end)
+ (cond ((<= (%+ rptr 3) in-end)
(put
(dpb (ldb (byte 4 0) byte0) (byte 4 12)
(dpb (ldb (byte 6 0) (aref in (%+ 1 rptr))) (byte 6 6)
@@ -271,7 +271,7 @@
(return))))
((%<= #|#b11110000|# byte0 #b11110111)
- (cond ((< (%+ rptr 4) in-end)
+ (cond ((<= (%+ rptr 4) in-end)
(put
(dpb (ldb (byte 3 0) byte0) (byte 3 18)
(dpb (ldb (byte 6 0) (aref in (%+ 1 rptr))) (byte 6 12)
@@ -283,7 +283,7 @@
(return))))
((%<= #|#b11111000|# byte0 #b11111011)
- (cond ((< (%+ rptr 5) in-end)
+ (cond ((<= (%+ rptr 5) in-end)
(put
(dpb (ldb (byte 2 0) byte0) (byte 2 24)
(dpb (ldb (byte 6 0) (aref in (%+ 1 rptr))) (byte 6 18)
@@ -296,7 +296,7 @@
(return))))
((%<= #|#b11111100|# byte0 #b11111101)
- (cond ((< (%+ rptr 6) in-end)
+ (cond ((<= (%+ rptr 6) in-end)
(put
(dpb (ldb (byte 1 0) byte0) (byte 1 30)
(dpb (ldb (byte 6 0) (aref in (%+ 1 rptr))) (byte 6 24)
From dlichteblau at common-lisp.net Sun Jul 22 19:59:27 2007
From: dlichteblau at common-lisp.net (dlichteblau)
Date: Sun, 22 Jul 2007 15:59:27 -0400 (EDT)
Subject: [cxml-cvs] CVS cxml/test
Message-ID: <20070722195927.949B84E057@common-lisp.net>
Update of /project/cxml/cvsroot/cxml/test
In directory clnet:/tmp/cvs-serv8223/test
Added Files:
misc.lisp
Log Message:
UTF-8 fix, thanks to Francis Leboutte
--- /project/cxml/cvsroot/cxml/test/misc.lisp 2007/07/22 19:59:27 NONE
+++ /project/cxml/cvsroot/cxml/test/misc.lisp 2007/07/22 19:59:27 1.1
;;;
;;; When I'll grow up, I'll be a complete test suite.
(deftest utf-8
(flet ((doit (from below)
(loop for code from from below below do
(when (and (code-char code)
(not (eql code #xfffe))
(not (eql code #xffff)))
(let* ((a (if (< code #x10000)
(format nil "abc~C" (code-char code))
(let* ((x (- code #x10000))
(lo (ldb (byte 10 0) x))
(hi (ldb (byte 10 10) x)))
(format nil "abc~C~C"
(code-char (logior #xD800 hi))
(code-char
(logior #xDC00 lo))))))
(b (cxml:utf8-string-to-rod
(cxml:rod-to-utf8-string
a))))
(unless (string= a b)
(format t "FAIL: ~S ~A ~A~%"
(code-char code)
(map 'vector #'char-code a)
(map 'vector #'char-code b))))))))
(doit 32 #xD800)
(doit #x10000 char-code-limit)
(values)))