[cxml-cvs] CVS cxml/xml
dlichteblau
dlichteblau at common-lisp.net
Sun Feb 11 18:21:22 UTC 2007
Update of /project/cxml/cvsroot/cxml/xml
In directory clnet:/tmp/cvs-serv26091/xml
Modified Files:
package.lisp xml-parse.lisp
Log Message:
klacks parser
--- /project/cxml/cvsroot/cxml/xml/package.lisp 2006/12/02 13:21:37 1.13
+++ /project/cxml/cvsroot/cxml/xml/package.lisp 2007/02/11 18:21:21 1.14
@@ -83,4 +83,6 @@
#:make-namespace-normalizer
#:make-whitespace-normalizer
#:rod-to-utf8-string
- #:utf8-string-to-rod))
+ #:utf8-string-to-rod
+
+ #:make-source))
--- /project/cxml/cvsroot/cxml/xml/xml-parse.lisp 2006/09/16 07:52:59 1.64
+++ /project/cxml/cvsroot/cxml/xml/xml-parse.lisp 2007/02/11 18:21:22 1.65
@@ -68,11 +68,11 @@
;; :stag (<name> . <atts>) ;start tag
;; :etag (<name> . <atts>) ;end tag
;; :ztag (<name> . <atts>) ;empty tag
-;; :<!element
-;; :<!entity
-;; :<!attlist
-;; :<!notation
-;; :<!doctype
+;; :<!ELEMENT
+;; :<!ENTITY
+;; :<!ATTLIST
+;; :<!NOTATION
+;; :<!DOCTYPE
;; :<![
;; :comment <content>
@@ -194,11 +194,13 @@
(defvar *expand-pe-p* nil)
-(defparameter *namespace-bindings*
+(defparameter *initial-namespace-bindings*
'((#"" . nil)
(#"xmlns" . #"http://www.w3.org/2000/xmlns/")
(#"xml" . #"http://www.w3.org/XML/1998/namespace")))
+(defparameter *namespace-bindings* *initial-namespace-bindings*)
+
;;;; ---------------------------------------------------------------------------
;;;; xstreams
;;;;
@@ -2571,22 +2573,16 @@
:main-zstream input
:entity-resolver entity-resolver
:disallow-internal-subset disallow-internal-subset))
- (*validate* validate))
+ (*validate* validate)
+ (*namespace-bindings* *initial-namespace-bindings*))
(sax:start-document handler)
;; document ::= XMLDecl? Misc* (doctypedecl Misc*)? element Misc*
;; Misc ::= Comment | PI | S
;; xmldecl::='<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
;; sddecl::= S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"'))
- ;;
- ;; we will use the attribute-value parser for the xml decl.
(let ((*data-behaviour* :DTD))
;; optional XMLDecl?
- (cond ((eq (peek-token input) :xml-decl)
- (let ((hd (parse-xml-decl (cdr (nth-value 1 (peek-token input))))))
- (setf (standalone-p *ctx*) (eq (xml-header-standalone-p hd) :yes))
- (setup-encoding input hd))
- (read-token input)))
- (set-full-speed input)
+ (p/xmldecl input)
;; Misc*
(p/misc*-2 input)
;; (doctypedecl Misc*)?
@@ -2595,13 +2591,7 @@
(p/doctype-decl input dtd)
(p/misc*-2 input))
(dtd
- (let ((dummy (string->xstream "<!DOCTYPE dummy>")))
- (setf (xstream-name dummy)
- (make-stream-name
- :entity-name "dummy doctype"
- :entity-kind :main
- :uri (zstream-base-sysid input)))
- (p/doctype-decl (make-zstream :input-stack (list dummy)) dtd)))
+ (synthesize-doctype dtd input))
((and validate (not dtd))
(validity-error "invalid document: no doctype")))
(ensure-dtd)
@@ -2610,28 +2600,65 @@
(setf (model-stack *ctx*) (list (make-root-model root))))
;; element
(let ((*data-behaviour* :DOC))
- (when (eq (peek-token input) :seen-<)
- (multiple-value-bind (c s)
- (read-token-after-|<| input (car (zstream-input-stack input)))
- (setf (zstream-token-category input) c
- (zstream-token-semantic input) s)))
+ (fix-seen-< input)
(p/element input))
;; optional Misc*
(p/misc*-2 input)
- (unless (eq (peek-token input) :eof)
- (wf-error input "Garbage at end of document."))
- (when *validate*
- (maphash (lambda (k v)
- (unless v
- (validity-error "(11) IDREF: ~S not defined" (rod-string k))))
- (id-table *ctx*))
-
- (dolist (name (referenced-notations *ctx*))
- (unless (find-notation name (dtd *ctx*))
- (validity-error "(23) Notation Declared: ~S" (rod-string name)))))
+ (p/eof input)
(sax:end-document handler))))
+(defun synthesize-doctype (dtd input)
+ (let ((dummy (string->xstream "<!DOCTYPE dummy>")))
+ (setf (xstream-name dummy)
+ (make-stream-name
+ :entity-name "dummy doctype"
+ :entity-kind :main
+ :uri (zstream-base-sysid input)))
+ (p/doctype-decl (make-zstream :input-stack (list dummy)) dtd)))
+
+(defun fix-seen-< (input)
+ (when (eq (peek-token input) :seen-<)
+ (multiple-value-bind (c s)
+ (read-token-after-|<| input (car (zstream-input-stack input)))
+ (setf (zstream-token-category input) c
+ (zstream-token-semantic input) s))))
+
+(defun p/xmldecl (input)
+ ;; we will use the attribute-value parser for the xml decl.
+ (prog1
+ (when (eq (peek-token input) :xml-decl)
+ (let ((hd (parse-xml-decl (cdr (nth-value 1 (peek-token input))))))
+ (setf (standalone-p *ctx*) (eq (xml-header-standalone-p hd) :yes))
+ (setup-encoding input hd)
+ (read-token input)
+ hd))
+ (set-full-speed input)))
+
+(defun p/eof (input)
+ (unless (eq (peek-token input) :eof)
+ (wf-error input "Garbage at end of document."))
+ (when *validate*
+ (maphash (lambda (k v)
+ (unless v
+ (validity-error "(11) IDREF: ~S not defined" (rod-string k))))
+ (id-table *ctx*))
+
+ (dolist (name (referenced-notations *ctx*))
+ (unless (find-notation name (dtd *ctx*))
+ (validity-error "(23) Notation Declared: ~S" (rod-string name))))))
+
(defun p/element (input)
+ (multiple-value-bind (cat n-b new-b uri lname qname attrs) (p/sztag input)
+ (sax:start-element (handler *ctx*) uri lname qname attrs)
+ (when (eq cat :stag)
+ (let ((*namespace-bindings* n-b))
+ (p/content input))
+ (p/etag input qname))
+ (sax:end-element (handler *ctx*) uri lname qname)
+ (undeclare-namespaces new-b)
+ (validate-end-element *ctx* qname)))
+
+(defun p/sztag (input)
(multiple-value-bind (cat sem) (read-token input)
(case cat
((:stag :ztag))
@@ -2657,28 +2684,39 @@
(setf attrs
(remove-if (compose #'xmlns-attr-p #'sax:attribute-qname)
attrs)))
- (cond
- ((eq cat :ztag)
- (sax:start-element (handler *ctx*) uri local-name name attrs)
- (sax:end-element (handler *ctx*) uri local-name name))
-
- ((eq cat :stag)
- (sax:start-element (handler *ctx*) uri local-name name attrs)
- (p/content input)
- (multiple-value-bind (cat2 sem2) (read-token input)
- (unless (and (eq cat2 :etag)
- (eq (car sem2) name))
- (wf-error input "Bad nesting. ~S / ~S"
- (mu name)
- (mu (cons cat2 sem2))))
- (when (cdr sem2)
- (wf-error input "no attributes allowed in end tag")))
- (sax:end-element (handler *ctx*) uri local-name name))
-
- (t
- (wf-error input "Expecting element, got ~S." cat))))
- (undeclare-namespaces new-namespaces))
- (validate-end-element *ctx* name))))
+ (values cat
+ *namespace-bindings*
+ new-namespaces
+ uri local-name name attrs))))))
+
+(defun p/etag (input qname)
+ (multiple-value-bind (cat2 sem2) (read-token input)
+ (unless (and (eq cat2 :etag)
+ (eq (car sem2) qname))
+ (wf-error input "Bad nesting. ~S / ~S"
+ (mu qname)
+ (mu (cons cat2 sem2))))
+ (when (cdr sem2)
+ (wf-error input "no attributes allowed in end tag"))))
+
+(defun process-characters (input sem)
+ (consume-token input)
+ (when (search #"]]>" sem)
+ (wf-error input "']]>' not allowed in CharData"))
+ (validate-characters *ctx* sem))
+
+(defun process-cdata-section (input)
+ (consume-token input)
+ (let ((input (car (zstream-input-stack input))))
+ (unless (and (rune= #/C (read-rune input))
+ (rune= #/D (read-rune input))
+ (rune= #/A (read-rune input))
+ (rune= #/T (read-rune input))
+ (rune= #/A (read-rune input))
+ (rune= #/\[ (read-rune input)))
+ (wf-error input "After '<![', 'CDATA[' is expected."))
+ (validate-characters *ctx* #"hack") ;anything other than whitespace
+ (read-cdata-sect input)))
(defun p/content (input)
;; [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
@@ -2688,10 +2726,7 @@
(p/element input)
(p/content input))
((:CDATA)
- (consume-token input)
- (when (search #"]]>" sem)
- (wf-error input "']]>' not allowed in CharData"))
- (validate-characters *ctx* sem)
+ (process-characters input sem)
(sax:characters (handler *ctx*) sem)
(p/content input))
((:ENTITY-REF)
@@ -2709,21 +2744,11 @@
(peek-token input))))))
(p/content input))))
((:<!\[)
- (consume-token input)
- (cons
- (let ((input (car (zstream-input-stack input))))
- (unless (and (rune= #/C (read-rune input))
- (rune= #/D (read-rune input))
- (rune= #/A (read-rune input))
- (rune= #/T (read-rune input))
- (rune= #/A (read-rune input))
- (rune= #/\[ (read-rune input)))
- (wf-error input "After '<![', 'CDATA[' is expected."))
- (validate-characters *ctx* #"hack") ;anything other than whitespace
- (sax:start-cdata (handler *ctx*))
- (sax:characters (handler *ctx*) (read-cdata-sect input))
- (sax:end-cdata (handler *ctx*)))
- (p/content input)))
+ (let ((data (process-cdata-section input)))
+ (sax:start-cdata (handler *ctx*))
+ (sax:characters (handler *ctx*) data)
+ (sax:end-cdata (handler *ctx*)))
+ (p/content input))
((:PI)
(consume-token input)
(sax:processing-instruction (handler *ctx*) (car sem) (cdr sem))
More information about the Cxml-cvs
mailing list