[s-xml-cvs] CVS update: s-xml/src/xml.lisp
Sven Van Caekenberghe
scaekenberghe at common-lisp.net
Thu Aug 18 14:00:56 UTC 2005
Update of /project/s-xml/cvsroot/s-xml/src
In directory common-lisp.net:/tmp/cvs-serv2384/src
Modified Files:
xml.lisp
Log Message:
first version with XML namespace parsing support
Date: Thu Aug 18 16:00:50 2005
Author: scaekenberghe
Index: s-xml/src/xml.lisp
diff -u s-xml/src/xml.lisp:1.7 s-xml/src/xml.lisp:1.8
--- s-xml/src/xml.lisp:1.7 Wed Aug 17 15:44:29 2005
+++ s-xml/src/xml.lisp Thu Aug 18 16:00:48 2005
@@ -1,14 +1,15 @@
;;;; -*- mode: lisp -*-
;;;;
-;;;; $Id: xml.lisp,v 1.7 2005/08/17 13:44:29 scaekenberghe Exp $
+;;;; $Id: xml.lisp,v 1.8 2005/08/18 14:00:48 scaekenberghe Exp $
;;;;
-;;;; This is a Common Lisp implementation of a very basic XML parser.
-;;;; The parser is non-validating and not at all complete (no CDATA).
+;;;; This is a Common Lisp implementation of a basic but usable XML parser.
+;;;; The parser is non-validating and not complete (no CDATA).
+;;;; Namespace and entities are handled.
;;;; The API into the parser is a pure functional parser hook model that comes from SSAX,
;;;; see also http://pobox.com/~oleg/ftp/Scheme/xml.html or http://ssax.sourceforge.net
;;;; Different DOM models are provided, an XSML, an LXML and a xml-element struct based one.
;;;;
-;;;; Copyright (C) 2002, 2004 Sven Van Caekenberghe, Beta Nine BVBA.
+;;;; Copyright (C) 2002, 2003, 2004, 2005 Sven Van Caekenberghe, Beta Nine BVBA.
;;;;
;;;; You are granted the rights to distribute and use this software
;;;; as governed by the terms of the Lisp Lesser General Public License
@@ -134,6 +135,100 @@
(error (parser-error "encountered unknown entity &~s;" (list entity) stream)))))
extendable-string)
+;;; namespace support
+
+(defclass xml-namespace ()
+ ((uri :documentation "The URI used to identify this namespace"
+ :accessor get-uri
+ :initarg :uri)
+ (prefix :documentation "The prefix assigned to this namespace"
+ :accessor get-prefix
+ :initarg :prefix
+ :initform nil)
+ (package :documentation "The Common Lisp package where this namespace's symbols are interned"
+ :accessor get-package
+ :initarg :package
+ :initform nil))
+ (:documentation "Describes an XML namespace and how it is handled"))
+
+(defvar *local-namespace* (make-instance 'xml-namespace
+ :uri "local"
+ :prefix ""
+ :package (find-package :keyword))
+ "The local (global default) XML namespace")
+
+(defvar *namespaces* (list *local-namespace*)
+ "Ordered list of XML namespaces currently in effect")
+
+(defun split-identifier (identifier)
+ "Split an identifier 'prefix:name' and return (values prefix identifier)"
+ (let ((colon-position (position #\: identifier :test #'char=)))
+ (if colon-position
+ (values (subseq identifier 0 colon-position)
+ (subseq identifier (1+ colon-position)))
+ (values nil identifier))))
+
+(defvar *require-existing-symbols* nil
+ "If t, each XML identifier must exist as symbol already")
+
+(defvar *auto-export-symbols* t
+ "If t, export newly interned symbols form their packages")
+
+(defun resolve-identifier (identifier namespaces &optional as-attribute)
+ "Resolve the string identifier in the list of namespaces"
+ (flet ((intern-symbol (string package)
+ (if *require-existing-symbols*
+ (let ((symbol (find-symbol string package)))
+ (or symbol
+ (error "Symbol ~s does not exist in ~s" string package)))
+ (let ((symbol (intern string package)))
+ (when (and *auto-export-symbols*
+ (not (eql package (find-package :keyword))))
+ (export symbol package))
+ symbol))))
+ (multiple-value-bind (prefix name)
+ (split-identifier identifier)
+ (if (or (null prefix) (string= prefix "xmlns"))
+ (if as-attribute
+ (intern (if (string= prefix "xmlns") identifier name) (get-package *local-namespace*))
+ (let ((default-namespace (find "" namespaces :key #'get-prefix :test #'string-equal)))
+ (intern-symbol name (get-package default-namespace))))
+ (let ((namespace (find prefix namespaces :key #'get-prefix :test #'string-equal)))
+ (if namespace
+ (intern-symbol name (get-package namespace))
+ (error "namespace not found for prefix ~s" prefix)))))))
+
+(defvar *auto-create-namespace-packages* t
+ "If t, new packages will be created for namespaces, if needed, named by the prefix")
+
+(defun extend-namespaces (attributes namespaces)
+ "Given possible 'xmlns[:prefix]' attributes, extend namespaces"
+ (let (default-namespace-uri)
+ (loop :for (key . value) :in attributes
+ :do (if (string= key "xmlns")
+ (setf default-namespace-uri value)
+ (multiple-value-bind (prefix name)
+ (split-identifier key)
+ (when (string= prefix "xmlns")
+ (push (make-instance 'xml-namespace
+ :uri value
+ :prefix name
+ :package (or (find-package name)
+ (if *auto-create-namespace-packages*
+ (make-package name :nicknames (list (string-upcase name)))
+ (error "Cannot find or create package ~s" name))))
+ namespaces)))))
+ (when default-namespace-uri
+ (let ((namespace (find default-namespace-uri namespaces :key #'get-uri :test #'string-equal)))
+ (if namespace
+ (push (make-instance 'xml-namespace
+ :uri (get-uri namespace)
+ :prefix ""
+ :package (get-package namespace))
+ namespaces)
+ (error "No prefix found for default namespace ~s" default-namespace-uri)))))
+ namespaces)
+
;;; the parser state
(defclass xml-parser-state ()
@@ -326,6 +421,7 @@
"Parse XML element attributes from stream positioned after the tag
identifier, returning the attributes as an assoc list, ending at
either a '>' or a '/' which is peeked and also returned"
+ (declare (special *namespaces*))
(let (char attributes)
(loop
;; skip whitespace separating items
@@ -333,7 +429,7 @@
;; start tag attributes ends with > or />
(when (and char (or (char= char #\>) (char= char #\/))) (return))
;; read the attribute key
- (let ((key (intern (parse-identifier stream (get-mini-buffer state)) :keyword)))
+ (let ((key (copy-seq (parse-identifier stream (get-mini-buffer state)))))
;; skip separating whitespace
(setf char (skip-whitespace stream))
;; require = sign (and consume it if present)
@@ -350,6 +446,7 @@
(defun parse-xml-element (stream state)
"Parse and return an XML element from stream, positioned after the opening '<'"
+ (declare (special *namespaces*))
;; opening < has been read
(when (char= (peek-char nil stream nil nil) #\!)
(skip-special-tag stream)
@@ -357,62 +454,67 @@
(let (char buffer open-tag parent-seed has-children)
(setf parent-seed (get-seed state))
;; read tag name (no whitespace between < and name ?)
- (setf open-tag (intern (parse-identifier stream (get-mini-buffer state)) :keyword))
+ (setf open-tag (copy-seq (parse-identifier stream (get-mini-buffer state))))
;; tag has been read, read attributes if any
(multiple-value-bind (attributes peeked-char)
(parse-xml-element-attributes stream state)
- (setf (get-seed state) (funcall (get-new-element-hook state)
- open-tag attributes (get-seed state)))
- (setf char peeked-char)
- (when (char= char #\/)
- ;; handle solitary tag of the form <tag .. />
- (read-char stream)
- (setf char (read-char stream nil nil))
- (if (char= #\> char)
- (progn
- (setf (get-seed state) (funcall (get-finish-element-hook state)
- open-tag attributes parent-seed (get-seed state)))
- (return-from parse-xml-element))
- (error (parser-error "expected >" nil stream))))
- ;; consume >
- (read-char stream)
- (loop
- (setf buffer (get-buffer state))
- ;; read whitespace into buffer
- (setf char (parse-whitespace stream buffer))
- ;; see what ended the whitespace scan
- (cond ((null char) (error (parser-error "encountered unexpected eof handling ~a" (list open-tag))))
- ((char= char #\<)
- ;; consume the <
- (read-char stream)
- (if (char= (peek-char nil stream nil nil) #\/)
- (progn
- ;; handle the matching closing tag </tag> and done
- ;; if we read whitespace as this (leaf) element's contents, it is significant
- (when (and (not has-children) (plusp (length buffer)))
- (setf (get-seed state) (funcall (get-text-hook state)
- (copy-seq buffer) (get-seed state))))
- (read-char stream)
- (let ((close-tag (intern (parse-identifier stream (get-mini-buffer state)) :keyword)))
- (unless (eq open-tag close-tag)
- (error (parser-error "found <~a> not matched by </~a> but by <~a>"
- (list open-tag open-tag close-tag) stream)))
- (unless (char= (read-char stream nil nil) #\>)
- (error (parser-error "expected >" nil stream)))
- (setf (get-seed state) (funcall (get-finish-element-hook state)
- open-tag attributes parent-seed (get-seed state))))
- (return))
- ;; handle child tag and loop, no hooks to call here
- ;; whitespace between child elements is skipped
- (progn
- (setf has-children t)
- (parse-xml-element stream state))))
- (t
- ;; no child tag, concatenate text to whitespace in buffer
- ;; handle text content and loop
- (setf char (parse-text stream state buffer))
- (setf (get-seed state) (funcall (get-text-hook state)
- (copy-seq buffer) (get-seed state)))))))))
+ (let ((*namespaces* (extend-namespaces attributes *namespaces*)))
+ (setf open-tag (resolve-identifier open-tag *namespaces*)
+ attributes (loop :for (key . value) :in attributes
+ :collect (cons (resolve-identifier key *namespaces* t) value)))
+ (setf (get-seed state) (funcall (get-new-element-hook state)
+ open-tag attributes (get-seed state)))
+ (setf char peeked-char)
+ (when (char= char #\/)
+ ;; handle solitary tag of the form <tag .. />
+ (read-char stream)
+ (setf char (read-char stream nil nil))
+ (if (char= #\> char)
+ (progn
+ (setf (get-seed state) (funcall (get-finish-element-hook state)
+ open-tag attributes parent-seed (get-seed state)))
+ (return-from parse-xml-element))
+ (error (parser-error "expected >" nil stream))))
+ ;; consume >
+ (read-char stream)
+ (loop
+ (setf buffer (get-buffer state))
+ ;; read whitespace into buffer
+ (setf char (parse-whitespace stream buffer))
+ ;; see what ended the whitespace scan
+ (cond ((null char) (error (parser-error "encountered unexpected eof handling ~a" (list open-tag))))
+ ((char= char #\<)
+ ;; consume the <
+ (read-char stream)
+ (if (char= (peek-char nil stream nil nil) #\/)
+ (progn
+ ;; handle the matching closing tag </tag> and done
+ ;; if we read whitespace as this (leaf) element's contents, it is significant
+ (when (and (not has-children) (plusp (length buffer)))
+ (setf (get-seed state) (funcall (get-text-hook state)
+ (copy-seq buffer) (get-seed state))))
+ (read-char stream)
+ (let ((close-tag (resolve-identifier (parse-identifier stream (get-mini-buffer state))
+ *namespaces*)))
+ (unless (eq open-tag close-tag)
+ (error (parser-error "found <~a> not matched by </~a> but by <~a>"
+ (list open-tag open-tag close-tag) stream)))
+ (unless (char= (read-char stream nil nil) #\>)
+ (error (parser-error "expected >" nil stream)))
+ (setf (get-seed state) (funcall (get-finish-element-hook state)
+ open-tag attributes parent-seed (get-seed state))))
+ (return))
+ ;; handle child tag and loop, no hooks to call here
+ ;; whitespace between child elements is skipped
+ (progn
+ (setf has-children t)
+ (parse-xml-element stream state))))
+ (t
+ ;; no child tag, concatenate text to whitespace in buffer
+ ;; handle text content and loop
+ (setf char (parse-text stream state buffer))
+ (setf (get-seed state) (funcall (get-text-hook state)
+ (copy-seq buffer) (get-seed state))))))))))
(defun start-parse-xml (stream &optional (state (make-instance 'xml-parser-state)))
"Parse and return a toplevel XML element from stream, using parser state"
More information about the S-xml-cvs
mailing list