From dlichteblau at common-lisp.net Thu Jun 23 16:25:49 2005 From: dlichteblau at common-lisp.net (David Lichteblau) Date: Thu, 23 Jun 2005 18:25:49 +0200 (CEST) Subject: [cxml-cvs] CVS update: Directory change: cxml/doc Message-ID: <20050623162549.A880E88160@common-lisp.net> Update of /project/cxml/cvsroot/cxml/doc In directory common-lisp.net:/tmp/cvs-serv31415/doc Log Message: Directory /project/cxml/cvsroot/cxml/doc added to the repository Date: Thu Jun 23 18:25:49 2005 Author: dlichteblau New directory cxml/doc added From dlichteblau at common-lisp.net Fri Jun 3 15:26:15 2005 From: dlichteblau at common-lisp.net (David Lichteblau) Date: Fri, 3 Jun 2005 17:26:15 +0200 (CEST) Subject: [cxml-cvs] CVS update: CVSROOT/loginfo Message-ID: <20050603152615.6F65E8875E@common-lisp.net> Update of /project/cxml/cvsroot/CVSROOT In directory common-lisp.net:/home/dlichteblau/CVSROOT Modified Files: loginfo Log Message: use cxml-cvs at common-lisp.net Date: Fri Jun 3 17:26:14 2005 Author: dlichteblau Index: CVSROOT/loginfo diff -u CVSROOT/loginfo:1.3 CVSROOT/loginfo:1.4 --- CVSROOT/loginfo:1.3 Sun Mar 13 19:39:19 2005 +++ CVSROOT/loginfo Fri Jun 3 17:26:14 2005 @@ -24,4 +24,4 @@ #DEFAULT (echo ""; id; echo %s; date; cat) >> $CVSROOT/CVSROOT/commitlog # or #DEFAULT (echo ""; id; echo %{sVv}; date; cat) >> $CVSROOT/CVSROOT/commitlog -DEFAULT /custom/bin/cvslog.py closure-cvs at common-lisp.net %{sVv} +DEFAULT /custom/bin/cvslog.py cxml-cvs at common-lisp.net %{sVv} From dlichteblau at common-lisp.net Sat Jun 25 13:56:56 2005 From: dlichteblau at common-lisp.net (David Lichteblau) Date: Sat, 25 Jun 2005 15:56:56 +0200 (CEST) Subject: [cxml-cvs] CVS update: cxml/doc/cxml.css cxml/doc/installation.html cxml/doc/using.html Message-ID: <20050625135656.BAA9E88528@common-lisp.net> Update of /project/cxml/cvsroot/cxml/doc In directory common-lisp.net:/tmp/cvs-serv10721/doc Added Files: cxml.css installation.html using.html Log Message: new release Date: Sat Jun 25 15:56:54 2005 Author: dlichteblau From dlichteblau at common-lisp.net Sat Jun 25 13:56:57 2005 From: dlichteblau at common-lisp.net (David Lichteblau) Date: Sat, 25 Jun 2005 15:56:57 +0200 (CEST) Subject: [cxml-cvs] CVS update: cxml/runes/runes.lisp Message-ID: <20050625135657.3666188526@common-lisp.net> Update of /project/cxml/cvsroot/cxml/runes In directory common-lisp.net:/tmp/cvs-serv10721/runes Modified Files: runes.lisp Log Message: new release Date: Sat Jun 25 15:56:56 2005 Author: dlichteblau Index: cxml/runes/runes.lisp diff -u cxml/runes/runes.lisp:1.2 cxml/runes/runes.lisp:1.3 --- cxml/runes/runes.lisp:1.2 Fri Mar 25 19:16:56 2005 +++ cxml/runes/runes.lisp Sat Jun 25 15:56:55 2005 @@ -147,12 +147,17 @@ (defun char-rune (char) (code-rune (char-code char))) -(defun rune-char (rune &optional (default #\?)) - (if (>= rune char-code-limit) - default - (or (code-char rune) default))) +(defparameter *invalid-rune* nil ;;#\? + "Rune to use as a replacement in RUNE-CHAR and ROD-STRING for runes not + representable as characters. If NIL, an error is signalled instead.") -(defun rod-string (rod &optional (default-char #\?)) +(defun rune-char (rune &optional (default *invalid-rune*)) + (or (if (>= rune char-code-limit) + default + (or (code-char rune) default)) + (error "rune cannot be represented as a character: ~A" rune))) + +(defun rod-string (rod &optional (default-char *invalid-rune*)) (map 'string (lambda (x) (rune-char x default-char)) rod)) (defun string-rod (string) From dlichteblau at common-lisp.net Sat Jun 25 13:57:00 2005 From: dlichteblau at common-lisp.net (David Lichteblau) Date: Sat, 25 Jun 2005 15:57:00 +0200 (CEST) Subject: [cxml-cvs] CVS update: cxml/test/domtest.lisp Message-ID: <20050625135700.63B5888529@common-lisp.net> Update of /project/cxml/cvsroot/cxml/test In directory common-lisp.net:/tmp/cvs-serv10721/test Modified Files: domtest.lisp Log Message: new release Date: Sat Jun 25 15:56:57 2005 Author: dlichteblau Index: cxml/test/domtest.lisp diff -u cxml/test/domtest.lisp:1.2 cxml/test/domtest.lisp:1.3 --- cxml/test/domtest.lisp:1.2 Wed Apr 6 23:14:41 2005 +++ cxml/test/domtest.lisp Sat Jun 25 15:56:57 2005 @@ -126,28 +126,29 @@ (map-child-elements 'list #'identity element)) (defun parse-java-literal (str) - (unless (stringp str) - (setf str (runes:rod-string str))) + (when (stringp str) + (setf str (runes:string-rod str))) (cond ((zerop (length str)) nil) - ((equal str "true") + ((runes:rod= str #"true") t) - ((equal str "false") + ((runes:rod= str #"false") nil) - ((digit-char-p (char str 0)) - (parse-integer str)) - ((char= (char str 0) #\") - (runes:rod - (with-output-to-string (out) - (with-input-from-string (in str) - (read-char in) - (for ((c = (read-char in)) - :until (char= c #\")) - (if (char= c #\\) - (ecase (read-char in) - ;; ... - (#\n (write-char #\newline out))) - (write-char c out))))))) + ((digit-char-p (runes:rune-char (elt str 0))) + (parse-integer (runes:rod-string str))) + ((runes:rune= (elt str 0) #.(runes:char-rune #\")) + (let ((v (make-array 1 :fill-pointer 0 :adjustable t))) + (for* ((i = 1 :then (1+ i)) + (c = (elt str i)) + :until (runes:rune= c #.(runes:char-rune #\"))) + (if (runes:rune= c #.(runes:char-rune #\\)) + (ecase (progn + (incf i) + (elt str i)) + ;; ... + (#/n (vector-push-extend #/newline v (length v)))) + (vector-push-extend c v (length v)))) + (coerce v 'runes::simple-rod))) (t (%intern str)))) @@ -613,7 +614,8 @@ document)) (defparameter *bad-tests* - '("hc_nodereplacechildnewchildexists.xml" + '("hc_elementnormalize2.xml" + "hc_nodereplacechildnewchildexists.xml" "characterdatadeletedatanomodificationallowederr.xml")) (defun run-all-tests (*directory* &optional verbose) @@ -635,7 +637,7 @@ (incf n))) (do-child-elements (member suite) (let ((href (runes:rod-string (dom:get-attribute member "href")))) - (unless (or (equal (dom:tag-name member) "metadata") + (unless (or (runes:rod= (dom:tag-name member) #"metadata") (member href *bad-tests* :test 'equal)) (format t "~&~D/~D ~A~%" i n href) (let ((lisp (slurp-test (merge-pathnames href test-directory)))) From dlichteblau at common-lisp.net Sat Jun 25 13:56:55 2005 From: dlichteblau at common-lisp.net (David Lichteblau) Date: Sat, 25 Jun 2005 15:56:55 +0200 (CEST) Subject: [cxml-cvs] CVS update: cxml/README.html cxml/cxml.asd Message-ID: <20050625135655.1C02688526@common-lisp.net> Update of /project/cxml/cvsroot/cxml In directory common-lisp.net:/tmp/cvs-serv10721 Modified Files: README.html cxml.asd Log Message: new release Date: Sat Jun 25 15:56:51 2005 Author: dlichteblau Index: cxml/README.html diff -u cxml/README.html:1.4 cxml/README.html:1.5 --- cxml/README.html:1.4 Wed Apr 20 21:57:59 2005 +++ cxml/README.html Sat Jun 25 15:56:50 2005 @@ -3,33 +3,41 @@
An XML parser written in Common Lisp.
@@ -47,15 +55,28 @@ (SAX layer; namespace support)+ CXML currently implements a namespace-aware, validating SAX-like + XML 1.0 + parser as well as the DOM Level 1 Core + interfaces. +
+ ++ CXML is licensed under (L)LGPL. +
+ +Send bug reports to cxml-devel at common-lisp.net ().
-$ export CVSROOT=:pserver:anonymous at common-lisp.net:/project/cxml/cvsroot -$ cvs login -Logging in to :pserver:anonymous at common-lisp.net:2401/project/cxml/cvsroot -CVS password: anonymous -$ cvs co cxml- -
- (David's tla archive is out of date.) -
- -patch-xyz (200-mm-dd)
+rel-2005-06-25
patch-357 (2004-10-10)
CXML provides three packages:
-- CXML should be portable to all Common Lisp implementations - supporting gray streams. Currently assumed to work are: -
-- Incomplete port: -
-- Optional configuration (skip this unless you know better): CXML - has full Unicode code support -- even on Lisps without Unicode - strings. On non-unicode aware Lisps, DOMString is - implemented as an array of character codes. CXML will auto-detect - at compile-time which string representation to use. To override - the auto-detection, you can set one of the features - :rune-is-character and :rune-is-octet before - loading cxml.asd. (fixme: feature - :rune-is-octet is of course misnamed, since it uses 16bit - runes, not 8bit runes. It will probably be renamed - to :rune-is-integer at some point.) -
- -- ASDF is used for - compilation. The following instructions assume that ASDF has - already been loaded. -
- -- Prerequisites. - CXML needs the puri library. -
- -- Compiling and loading CXML. - Register the .asd file, e.g. by symlinking it: -
-$ ln -sf `pwd`/cxml.asd /path/to/your/registry/-
Then compile CXML using:
-* (asdf:operate 'asdf:load-op :cxml)- -
- You can then try the quick-start example. -
- - -Check out the XML and DOM testsuites:
-$ export CVSROOT=:pserver:anonymous at dev.w3.org:/sources/public -$ cvs login # password is "anonymous" -$ cvs co 2001/XML-Test-Suite/xmlconf -$ cvs co -D '2005-05-06 23:00' 2001/DOM-Test-Suite -$ cd 2001/DOM-Test-Suite && ant dom1-dtd-
- Omit -D to get the latest version, which may not work - with cxml yet. The ant step is necessary to run the DOM - tests. -
-Usage and expected output:
-* (xmlconf:run-all-tests "/path/to/2001/XML-Test-Suite/xmlconf/") -0/556 tests failed; 1606 tests were skipped -* (domtest:run-all-tests "/path/to/2001/DOM-Test-Suite/") -0/450 tests failed; 71 tests were skipped- -
- fixme: Add an explanation of xml/sax-tests here. -
- -- fixme My parser does not understand the current testsuite - anymore. To fix this problem, revert the affected files - manually after check-out: -
- -$ cd 2001/XML-Test-Suite/xmlconf/ -xmltest$ patch -p0 -R </path/to/cxml/test/xmlconf-base.diff- -
- The log message for the changes reads "Removed unnecessary - xml:base attribute". If I understand correctly, only - DOM 3 parsers provide the baseURI attribute necessary for - understanding xmlconf.xml now. We don't have that - yet. -
- + - -- Make sure to install and load cxml first. -
- -Create a test file called example.xml:
-* (with-open-file (s "example.xml" :direction :output) - (write-string "<test a='b'><child/></test>" s))- -
Parse example.xml into a DOM tree (read - more):
-* (cxml:parse-file "example.xml" (dom:make-dom-builder)) -#<DOM-IMPL::DOCUMENT @ #x72206172> -;; save result for later: -* (defparameter *example* *) -*EXAMPLE*- -
Inspect the DOM tree (read more):
-* (dom:document-element *example*) -#<DOM-IMPL::ELEMENT test @ #x722b6ba2> -* (dom:tag-name (dom:document-element *example*)) -"test" -* (dom:child-nodes (dom:document-element *example*)) -#(#<DOM-IMPL::ELEMENT child @ #x722b6d8a>) -* (dom:get-attribute (dom:document-element *example*) "a") -"b"- -
Serialize the DOM document back into a stream (read more):
-(cxml:unparse-document *example* *standard-output*) -<test a="b"><child></child></test>- -
As an alternative to DOM, parse into xmls-compatible list - structure (read more):
-* (cxml:parse-file "example.xml" (cxml-xmls:make-xmls-builder)) -("test" (("a" "b")) ("child" NIL))- - -
-
- Common keyword arguments: -
--
-
-
(cxml:parse-file "test.xml" (dom:make-dom-builder))- - -
-
Keyword arguments:
-- The following canonical values are allowed: -
-- With an indentation level, pretty-print the XML by - inserting additional whitespace. Note that indentation - changes the document model and should only be used if whitespace - does not matter to the application. -
-- unparse-document-to-octets returns an (unsigned-byte - 8) array, whereas unparse-document writes - characters. unparse-document is useful together - with with-output-to-string. However, note that the - resulting document in both cases is UTF-8 encoded, so the - characters written by unparse-document are really UTF-8 - bytes encoded as characters. -
- --
- These function provide the low-level mechanism used by the DOM - serialization functions. To serialize a document without building - its DOM tree first, create a sink handle and call SAX functions on that - handle. sax:end-document returns the serialized form of - the document described by the SAX events. -
- --
- Example: -
-(with-xml-output (make-octet-stream-sink stream :indentation 2 :canonical nil) - (with-element "foo" - (attribute "xyz" "abc") - (with-element "bar" - (attribute "blub" "bla")) - (text "Hi there.")))-
- Prints this to stream, which must be an - (unsigned-byte 8) stream: -
-<foo xyz="abc"> - <bar blub="bla"></bar> - Hi there. -</foo>-
- (Note that these functions accept both strings and rods, so we - could write "foo" instead of #"foo" above.) -
- --
- xhtmlgen is included as contrib/xhtmlgen.lisp in - the cxml distribution. Example: -
-(let ((sink (cxml:make-character-stream-sink *standard-output*))) - (sax:start-document sink) - (xhtml-generator:write-doctype sink) - (xhtml-generator:with-html sink - (:html - (:head - (:title "Titel")) - (:body - ((:p "style" "font-weight: bold") - "Inhalt") - (:ul - (:li "Eins") - (:li "Zwei") - (:li "Drei"))))) - (sax:end-document sink))- - -
-
(let ((d (parse-file "~/test.xml" (dom:make-dom-builder))) - (x (parse-dtd-file "~/test.dtd"))) - (dom:map-document (cxml:make-validator x #"foo") d))- -
-
-
- Like other XML parsers written in Lisp, CXML can work with - documents represented as list structures. The specific model - implemented by cxml is compatible with the xmls parser. Xmls - list structures are a simpler and faster alternative to full DOM - document trees. They also serve as an example showing how to - implement user-defined document models as an independent layer - over the the base parser (c.f. xml/xmls-compat.lisp in - the cxml distribution). However, note that the list structures do - not include all information available in DOM documents and are - sometimes more difficult to work wth since many DOM functions - cannot be implemented on them. -
--
- Example: -
-(cxml:parse-file "test.xml" (cxml-xmls:make-xmls-builder))-
-
- Use this function to serialize XMLS data. For example, we could - define a replacement for xmls:write-xml like this: -
-(defun write-xml (stream node &key indent) - (let ((sink (cxml:make-character-stream-sink - stream :canonical nil :indentation indent))) - (cxml-xmls:map-node sink node)))-
-
- The node list's car can also be a cons of local name - and namespace prefix ns. - fixme: It is unclear to me how namespaces are meant to - work in xmls, since xmls documentation differs from how xmls - actually works in current releases. Usually applications need to - know both the namespace prefix and the namespace URI. We - currently follow the xmls implementation and use the - namespace prefix instead of following its documentation which - shows the URI. We do not follow xmls in munging xmlns attribute - values. Attributes themselves have namespaces and it is not clear - to me how that works in xmls. -
--
-
- - -- As explained above, the XML parser handles character encoding and - uses 16bit strings internally. Instead of using characters and strings - it uses runes and rods. This is seen as a - feature, but can be inconvenient. -
-- Note that the recoder approach does not work with the DOM - builder, since DOM is specified to use UTF-16. -
--
- Example. In a Lisp which ordinarily would use octet vector rods: -
-CL-USER(14): (cxml:parse-string "<test/>" (cxml-xmls:make-xmls-builder)) -(#(116 101 115 116) NIL)-
- Use a SAX recoder to get strings instead:: -
-CL-USER(17): (parse-string "<test/>" (cxml:make-recoder (cxml-xmls:make-xmls-builder))) -("test" NIL)- - -
- To avoid spending time parsing the same DTD over and over again, - CXML can cache DTD objects. The parser consults - cxml:*dtd-cache* whenever it is looking for an external - subset in a document which does not have an internal subset and - uses the cached DTD instance if one is present in the cache for - the System ID in question. -
-- Note that DTDs do not expire from the cache automatically. - (Future versions of CXML might introduce automatic checks for - outdated DTDs.) -
--
-
-
-
-
-
-
- fixme: thread-safety -
- - -- External entities (for example, DTDs) are referred to using their - Public and System IDs. Usually the System ID, a URI, is used to - locate the entity. CXML itself handles only file://-URIs, but - many System IDs in practical use are http://-URIs. There are two - different mechanims applications can use to allow CXML to locate - entities using arbitrary Public ID or System ID: -
-- This section describes XML Catalogs, the second solution. CXML - implements Oasis - XML Catalogs. -
--
-
-
-
-
- Example: -
-* (setf cxml:*catalog* nil) -* (cxml:parse-file "test.xhtml" nil) -=> Error: URI scheme :HTTP not supported - -* (setf cxml:*catalog* (cxml:make-catalog)) -* (cxml:parse-file "test.xhtml" nil) -;; no error! -NIL-
- Note that parsed catalog files are cached in the catalog object. - Catalog files cached do not expire automatically. To ensure that - all catalog files are parsed again, create a new catalog object. -
- - -- A SAX handler is an arbitrary objects that implements some of the - generic functions in the SAX package. Note that no default - handler class is necessary, because all generic functions have default - methods which do nothing. SAX functions are: -
- The entity declaration methods are similar to Java SAX - definitions, but parameter entities are distinguished from - general entities not by a % prefix to the name, but by - the kind argument, either :parameter or - :general. -
-- The arguments to sax:element-declaration and - sax:attribute-declaration differ significantly from their - Java counterparts. -
-- fixme: For more information on these functions refer to the docstrings. -
- - - -- CXML implements the DOM Level 1 Core interfaces. Explaining - DOM is better left to the specification, - so please refer to the official W3C documents for DOM. -
-- However, there is no "standard" DOM mapping for Lisp. DOM - is specified - in CORBA IDL, but it refrains from using object-oriented IDL - features, allowing for a much more natural Lisp implemenation than - the the ordinary IDL/Lisp mapping would. -
-- Differences between CXML's DOM and the direct IDL/Lisp mapping: -
-Example:
-XML(97): (dom:node-type - (dom:document-element - (cxml:parse-file "~/test.xml" (dom:make-dom-builder)))) -:ELEMENTIndex: cxml/cxml.asd diff -u cxml/cxml.asd:1.4 cxml/cxml.asd:1.5 --- cxml/cxml.asd:1.4 Fri May 13 21:57:38 2005 +++ cxml/cxml.asd Sat Jun 25 15:56:50 2005 @@ -98,7 +98,6 @@ (:file "dom-impl" :depends-on ("package")) (:file "dom-builder" :depends-on ("dom-impl")) (:file "unparse" :depends-on ("package")) - (:file "simple-dom" :depends-on ("package")) (:file "dom-sax" :depends-on ("package"))) :depends-on (:xml))