[s-xml-cvs] CVS update: s-xml/src/xml.lisp

Sven Van Caekenberghe scaekenberghe at common-lisp.net
Thu Aug 18 14:00:56 UTC 2005


Update of /project/s-xml/cvsroot/s-xml/src
In directory common-lisp.net:/tmp/cvs-serv2384/src

Modified Files:
	xml.lisp 
Log Message:
first version with XML namespace parsing support

Date: Thu Aug 18 16:00:50 2005
Author: scaekenberghe

Index: s-xml/src/xml.lisp
diff -u s-xml/src/xml.lisp:1.7 s-xml/src/xml.lisp:1.8
--- s-xml/src/xml.lisp:1.7	Wed Aug 17 15:44:29 2005
+++ s-xml/src/xml.lisp	Thu Aug 18 16:00:48 2005
@@ -1,14 +1,15 @@
 ;;;; -*- mode: lisp -*-
 ;;;;
-;;;; $Id: xml.lisp,v 1.7 2005/08/17 13:44:29 scaekenberghe Exp $
+;;;; $Id: xml.lisp,v 1.8 2005/08/18 14:00:48 scaekenberghe Exp $
 ;;;;
-;;;; This is a Common Lisp implementation of a very basic XML parser.
-;;;; The parser is non-validating and not at all complete (no CDATA).
+;;;; This is a Common Lisp implementation of a basic but usable XML parser.
+;;;; The parser is non-validating and not complete (no CDATA).
+;;;; Namespace and entities are handled.
 ;;;; The API into the parser is a pure functional parser hook model that comes from SSAX,
 ;;;; see also http://pobox.com/~oleg/ftp/Scheme/xml.html or http://ssax.sourceforge.net
 ;;;; Different DOM models are provided, an XSML, an LXML and a xml-element struct based one.
 ;;;;
-;;;; Copyright (C) 2002, 2004 Sven Van Caekenberghe, Beta Nine BVBA.
+;;;; Copyright (C) 2002, 2003, 2004, 2005 Sven Van Caekenberghe, Beta Nine BVBA.
 ;;;;
 ;;;; You are granted the rights to distribute and use this software
 ;;;; as governed by the terms of the Lisp Lesser General Public License
@@ -134,6 +135,100 @@
 	(error (parser-error "encountered unknown entity &~s;" (list entity) stream)))))
   extendable-string)
 
+;;; namespace support
+
+(defclass xml-namespace ()
+  ((uri :documentation "The URI used to identify this namespace"
+        :accessor get-uri
+        :initarg :uri)
+   (prefix :documentation "The prefix assigned to this namespace"
+           :accessor get-prefix
+           :initarg :prefix
+           :initform nil)
+   (package :documentation "The Common Lisp package where this namespace's symbols are interned"
+            :accessor get-package
+            :initarg :package
+            :initform nil))
+  (:documentation "Describes an XML namespace and how it is handled"))
+
+(defvar *local-namespace* (make-instance 'xml-namespace 
+                                         :uri "local"
+                                         :prefix "" 
+                                         :package (find-package :keyword))
+  "The local (global default) XML namespace")
+
+(defvar *namespaces* (list *local-namespace*)
+  "Ordered list of XML namespaces currently in effect")
+
+(defun split-identifier (identifier)
+  "Split an identifier 'prefix:name' and return (values prefix identifier)"
+  (let ((colon-position (position #\: identifier :test #'char=)))
+    (if colon-position
+        (values (subseq identifier 0 colon-position)
+                (subseq identifier (1+ colon-position)))
+      (values nil identifier))))
+
+(defvar *require-existing-symbols* nil
+  "If t, each XML identifier must exist as symbol already")
+
+(defvar *auto-export-symbols* t
+  "If t, export newly interned symbols form their packages")
+
+(defun resolve-identifier (identifier namespaces &optional as-attribute)
+  "Resolve the string identifier in the list of namespaces"
+  (flet ((intern-symbol (string package)
+           (if *require-existing-symbols*
+               (let ((symbol (find-symbol string package)))
+                 (or symbol
+                     (error "Symbol ~s does not exist in ~s" string package)))
+             (let ((symbol (intern string package)))
+               (when (and *auto-export-symbols* 
+                          (not (eql package (find-package :keyword))))
+                 (export symbol package))
+               symbol))))
+    (multiple-value-bind (prefix name)
+        (split-identifier identifier)
+      (if (or (null prefix) (string= prefix "xmlns"))
+          (if as-attribute
+              (intern (if (string= prefix "xmlns") identifier name) (get-package *local-namespace*))
+            (let ((default-namespace (find "" namespaces :key #'get-prefix :test #'string-equal)))
+              (intern-symbol name (get-package default-namespace))))
+        (let ((namespace (find prefix namespaces :key #'get-prefix :test #'string-equal)))
+          (if namespace
+              (intern-symbol name (get-package namespace))
+            (error "namespace not found for prefix ~s" prefix)))))))
+
+(defvar *auto-create-namespace-packages* t
+  "If t, new packages will be created for namespaces, if needed, named by the prefix")
+
+(defun extend-namespaces (attributes namespaces)
+  "Given possible 'xmlns[:prefix]' attributes, extend namespaces"
+  (let (default-namespace-uri)
+    (loop :for (key . value) :in attributes
+          :do (if (string= key "xmlns")
+                  (setf default-namespace-uri value)
+                (multiple-value-bind (prefix name)
+                    (split-identifier key)
+                  (when (string= prefix "xmlns")
+                    (push (make-instance 'xml-namespace
+                                         :uri value
+                                         :prefix name
+                                         :package (or (find-package name)
+                                                      (if *auto-create-namespace-packages*
+                                                          (make-package name :nicknames (list (string-upcase name)))
+                                                        (error "Cannot find or create package ~s" name))))
+                          namespaces)))))
+    (when default-namespace-uri
+      (let ((namespace (find default-namespace-uri namespaces :key #'get-uri :test #'string-equal)))
+        (if namespace
+            (push (make-instance 'xml-namespace
+                                 :uri (get-uri namespace)
+                                 :prefix ""
+                                 :package (get-package namespace))
+                  namespaces)
+          (error "No prefix found for default namespace ~s" default-namespace-uri)))))
+  namespaces)
+
 ;;; the parser state
 
 (defclass xml-parser-state ()
@@ -326,6 +421,7 @@
   "Parse XML element attributes from stream positioned after the tag
   identifier, returning the attributes as an assoc list, ending at
   either a '>' or a '/' which is peeked and also returned"
+  (declare (special *namespaces*))
   (let (char attributes)
     (loop
      ;; skip whitespace separating items
@@ -333,7 +429,7 @@
      ;; start tag attributes ends with > or />
      (when (and char (or (char= char #\>) (char= char #\/))) (return))
      ;; read the attribute key
-     (let ((key (intern (parse-identifier stream (get-mini-buffer state)) :keyword)))
+     (let ((key (copy-seq (parse-identifier stream (get-mini-buffer state)))))
        ;; skip separating whitespace
        (setf char (skip-whitespace stream))
        ;; require = sign (and consume it if present)
@@ -350,6 +446,7 @@
 
 (defun parse-xml-element (stream state)
   "Parse and return an XML element from stream, positioned after the opening '<'"
+  (declare (special *namespaces*))
   ;; opening < has been read
   (when (char= (peek-char nil stream nil nil) #\!)
     (skip-special-tag stream)
@@ -357,62 +454,67 @@
   (let (char buffer open-tag parent-seed has-children)
     (setf parent-seed (get-seed state))
     ;; read tag name (no whitespace between < and name ?)
-    (setf open-tag (intern (parse-identifier stream (get-mini-buffer state)) :keyword))
+    (setf open-tag (copy-seq (parse-identifier stream (get-mini-buffer state))))
     ;; tag has been read, read attributes if any
     (multiple-value-bind (attributes peeked-char)
 	(parse-xml-element-attributes stream state)
-      (setf (get-seed state) (funcall (get-new-element-hook state)
-				      open-tag attributes (get-seed state)))
-      (setf char peeked-char)
-      (when (char= char #\/)
-	;; handle solitary tag of the form <tag .. />
-	(read-char stream)
-	(setf char (read-char stream nil nil))
-	(if (char= #\> char)
-	    (progn
-	      (setf (get-seed state) (funcall (get-finish-element-hook state)
-					      open-tag attributes parent-seed (get-seed state)))
-	      (return-from parse-xml-element))
-	  (error (parser-error "expected >" nil stream))))
-      ;; consume >
-      (read-char stream)
-      (loop
-       (setf buffer (get-buffer state))
-       ;; read whitespace into buffer
-       (setf char (parse-whitespace stream buffer))
-       ;; see what ended the whitespace scan
-       (cond ((null char) (error (parser-error "encountered unexpected eof handling ~a" (list open-tag))))
-	     ((char= char #\<)
-	      ;; consume the <
-	      (read-char stream)
-	      (if (char= (peek-char nil stream nil nil) #\/)
-		  (progn
-		    ;; handle the matching closing tag </tag> and done
-                    ;; if we read whitespace as this (leaf) element's contents, it is significant
-                    (when (and (not has-children) (plusp (length buffer)))
-                      (setf (get-seed state) (funcall (get-text-hook state)
-                                                      (copy-seq buffer) (get-seed state))))
-		    (read-char stream)
-		    (let ((close-tag (intern (parse-identifier stream (get-mini-buffer state)) :keyword)))
-		      (unless (eq open-tag close-tag)
-			(error (parser-error "found <~a> not matched by </~a> but by <~a>"
-					     (list open-tag open-tag close-tag) stream)))
-		      (unless (char= (read-char stream nil nil) #\>)
-			(error (parser-error "expected >" nil stream)))
-		      (setf (get-seed state) (funcall (get-finish-element-hook state)
-						      open-tag attributes parent-seed (get-seed state))))
-		    (return))
-		;; handle child tag and loop, no hooks to call here
-                ;; whitespace between child elements is skipped
-                (progn
-                  (setf has-children t)
-                  (parse-xml-element stream state))))
-	     (t
-	      ;; no child tag, concatenate text to whitespace in buffer
-	      ;; handle text content and loop
-	      (setf char (parse-text stream state buffer))
-	      (setf (get-seed state) (funcall (get-text-hook state)
-					      (copy-seq buffer) (get-seed state)))))))))
+      (let ((*namespaces* (extend-namespaces attributes *namespaces*)))
+        (setf open-tag (resolve-identifier open-tag *namespaces*)
+              attributes (loop :for (key . value) :in attributes
+                               :collect (cons (resolve-identifier key *namespaces* t) value)))
+        (setf (get-seed state) (funcall (get-new-element-hook state)
+                                        open-tag attributes (get-seed state)))
+        (setf char peeked-char)
+        (when (char= char #\/)
+          ;; handle solitary tag of the form <tag .. />
+          (read-char stream)
+          (setf char (read-char stream nil nil))
+          (if (char= #\> char)
+              (progn
+                (setf (get-seed state) (funcall (get-finish-element-hook state)
+                                                open-tag attributes parent-seed (get-seed state)))
+                (return-from parse-xml-element))
+            (error (parser-error "expected >" nil stream))))
+        ;; consume >
+        (read-char stream)
+        (loop
+         (setf buffer (get-buffer state))
+         ;; read whitespace into buffer
+         (setf char (parse-whitespace stream buffer))
+         ;; see what ended the whitespace scan
+         (cond ((null char) (error (parser-error "encountered unexpected eof handling ~a" (list open-tag))))
+               ((char= char #\<)
+                ;; consume the <
+                (read-char stream)
+                (if (char= (peek-char nil stream nil nil) #\/)
+                    (progn
+                      ;; handle the matching closing tag </tag> and done
+                      ;; if we read whitespace as this (leaf) element's contents, it is significant
+                      (when (and (not has-children) (plusp (length buffer)))
+                        (setf (get-seed state) (funcall (get-text-hook state)
+                                                        (copy-seq buffer) (get-seed state))))
+                      (read-char stream)
+                      (let ((close-tag (resolve-identifier (parse-identifier stream (get-mini-buffer state)) 
+                                                           *namespaces*)))
+                        (unless (eq open-tag close-tag)
+                          (error (parser-error "found <~a> not matched by </~a> but by <~a>"
+                                               (list open-tag open-tag close-tag) stream)))
+                        (unless (char= (read-char stream nil nil) #\>)
+                          (error (parser-error "expected >" nil stream)))
+                        (setf (get-seed state) (funcall (get-finish-element-hook state)
+                                                        open-tag attributes parent-seed (get-seed state))))
+                      (return))
+                  ;; handle child tag and loop, no hooks to call here
+                  ;; whitespace between child elements is skipped
+                  (progn
+                    (setf has-children t)
+                    (parse-xml-element stream state))))
+               (t
+                ;; no child tag, concatenate text to whitespace in buffer
+                ;; handle text content and loop
+                (setf char (parse-text stream state buffer))
+                (setf (get-seed state) (funcall (get-text-hook state) 
+                                                (copy-seq buffer) (get-seed state))))))))))
 
 (defun start-parse-xml (stream &optional (state (make-instance 'xml-parser-state)))
   "Parse and return a toplevel XML element from stream, using parser state"




More information about the S-xml-cvs mailing list