[cxml-cvs] CVS cxml/klacks

dlichteblau dlichteblau at common-lisp.net
Sun Feb 11 18:21:21 UTC 2007


Update of /project/cxml/cvsroot/cxml/klacks
In directory clnet:/tmp/cvs-serv26091/klacks

Added Files:
	klacks-impl.lisp klacks.lisp package.lisp 
Log Message:
klacks parser



--- /project/cxml/cvsroot/cxml/klacks/klacks-impl.lisp	2007/02/11 18:21:21	NONE
+++ /project/cxml/cvsroot/cxml/klacks/klacks-impl.lisp	2007/02/11 18:21:21	1.1
;;; -*- Mode: Lisp; readtable: runes; -*-
;;;  (c) copyright 2007 David Lichteblau

;;; This library is free software; you can redistribute it and/or
;;; modify it under the terms of the GNU Library General Public
;;; License as published by the Free Software Foundation; either
;;; version 2 of the License, or (at your option) any later version.
;;;
;;; This library is distributed in the hope that it will be useful,
;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
;;; Library General Public License for more details.
;;;
;;; You should have received a copy of the GNU Library General Public
;;; License along with this library; if not, write to the
;;; Free Software Foundation, Inc., 59 Temple Place - Suite 330,
;;; Boston, MA  02111-1307  USA.

(in-package :cxml)

(defclass cxml-source (klacks:source)
    (;; args to make-source
     (context :initarg :context)
     (validate :initarg :validate)
     (root :initarg :root)
     (dtd :initarg :dtd)
     (error-culprit :initarg :error-culprit)
     ;; current state
     (continuation)
     (current-key :initform nil)
     (current-values)
     (current-attributes)
     (cdata-section-p :reader klacks:current-cdata-section-p)
     ;; extra with-source magic
     (data-behaviour :initform :DTD)
     (namespace-stack :initform (list *initial-namespace-bindings*))
     (temporary-streams :initform nil)
     (scratch-pad :initarg :scratch-pad)
     (scratch-pad-2 :initarg :scratch-pad-2)
     (scratch-pad-3 :initarg :scratch-pad-3)
     (scratch-pad-4 :initarg :scratch-pad-4)))

(defmethod klacks:close-source ((source cxml-source))
  (dolist (xstream (slot-value source 'temporary-streams))
    ;; fixme: error handling?
    (close-xstream xstream)))

(defmacro with-source ((source &rest slots) &body body)
  (let ((s (gensym)))
    `(let* ((,s ,source)
	    (*ctx* (slot-value ,s 'context))
	    (*validate* (slot-value ,s 'validate))
	    (*data-behaviour* (slot-value source 'data-behaviour))
	    (*namespace-bindings* (car (slot-value source 'namespace-stack)))
	    (*scratch-pad* (slot-value source 'scratch-pad))
	    (*scratch-pad-2* (slot-value source 'scratch-pad-2))
	    (*scratch-pad-3* (slot-value source 'scratch-pad-3))
	    (*scratch-pad-4* (slot-value source 'scratch-pad-4)))
       (handler-case
	   (with-slots (, at slots) ,s
	     , at body)
	 (runes-encoding:encoding-error (c)
	   (wf-error (slot-value ,s 'error-culprit) "~A" c))))))

(defun fill-source (source)
  (with-slots (current-key current-values continuation) source
    (unless current-key
      (setf current-key :bogus)
      (setf continuation (funcall continuation))
      (assert (not (eq current-key :bogus))))))

(defmethod klacks:peek ((source cxml-source))
  (with-source (source current-key current-values)
    (fill-source source)
    (apply #'values current-key current-values)))

(defmethod klacks:peek-value ((source cxml-source))
  (with-source (source current-key current-values)
    (fill-source source)
    (apply #'values current-values)))

(defmethod klacks:consume ((source cxml-source))
  (with-source (source current-key current-values)
    (fill-source source)
    (multiple-value-prog1
	(apply #'values current-key current-values)
      (setf current-key nil))))

(defmethod klacks:map-attributes (fn (source cxml-source))
  (dolist (a (slot-value source 'current-attributes))
    (funcall fn
	     (sax:attribute-namespace-uri a)
	     (sax:attribute-local-name a)
	     (sax:attribute-qname a)
	     (sax:attribute-value a)
	     (sax:attribute-specified-p a))))

(defmethod klacks:list-attributes ((source cxml-source))
  (slot-value source 'current-attributes))

(defun make-source
    (input &rest args
     &key validate dtd root entity-resolver disallow-internal-subset
	  pathname)
  (declare (ignore validate dtd root entity-resolver disallow-internal-subset))
  (etypecase input
    (xstream
      (let ((*ctx* nil))
	(let ((zstream (make-zstream :input-stack (list input))))
	  (peek-rune input)
	  (with-scratch-pads ()
	    (apply #'%make-source
		   zstream
		   (loop
		       for (name value) on args by #'cddr
		       unless (eq name :pathname)
		       append (list name value)))))))
    (stream
      (let ((xstream (make-xstream input)))
	(setf (xstream-name xstream)
	      (make-stream-name
	       :entity-name "main document"
	       :entity-kind :main
	       :uri (pathname-to-uri
		     (merge-pathnames (or pathname (pathname input))))))
	(apply #'make-source xstream args)))
    (pathname
      (let* ((xstream
	      (make-xstream (open input :element-type '(unsigned-byte 8))))
	     (source (apply #'make-source
			    xstream
			    :pathname input
			    args)))
	(push xstream (slot-value source 'temporary-streams))
	source))
    (rod
      (let ((xstream (string->xstream input)))
	(setf (xstream-name xstream)
	      (make-stream-name
	       :entity-name "main document"
	       :entity-kind :main
	       :uri nil))
	(apply #'make-source xstream args)))))

(defun %make-source
    (input &key validate dtd root entity-resolver disallow-internal-subset
		error-culprit)
  ;; check types of user-supplied arguments for better error messages:
  (check-type validate boolean)
  (check-type dtd (or null extid))
  (check-type root (or null rod))
  (check-type entity-resolver (or null function symbol))
  (check-type disallow-internal-subset boolean)
  (let* ((context
	  (make-context :handler nil
			:main-zstream input
			:entity-resolver entity-resolver
			:disallow-internal-subset disallow-internal-subset))
	 (source
	  (make-instance 'cxml-source
	    :context context
	    :validate validate
	    :dtd dtd
	    :root root
	    :error-culprit error-culprit
	    :scratch-pad *scratch-pad*
	    :scratch-pad-2 *scratch-pad-2*
	    :scratch-pad-3 *scratch-pad-3*
	    :scratch-pad-4 *scratch-pad-4*)))
    (setf (slot-value source 'continuation)
	  (lambda () (klacks/xmldecl source input)))
    source))

(defun klacks/xmldecl (source input)
  (with-source (source current-key current-values)
    (let ((hd (p/xmldecl input)))
      (setf current-key :start-document)
      (setf current-values
	    (when hd
	      (list (xml-header-version hd)
		    (xml-header-encoding hd)
		    (xml-header-standalone-p hd))))
      (lambda ()
	(klacks/misc*-2 source input
			(lambda ()
			  (klacks/doctype source input)))))))

(defun klacks/misc*-2 (source input successor)
  (with-source (source current-key current-values)
    (multiple-value-bind (cat sem) (peek-token input)
      (case cat
	(:COMMENT
	  (setf current-key :comment)
	  (setf current-values (list sem))
	  (consume-token input)
	  (lambda () (klacks/misc*-2 source input successor)))
	(:PI
	  (setf current-key :processing-instruction)
	  (setf current-values (list (car sem) (cdr sem)))
	  (consume-token input)
	  (lambda () (klacks/misc*-2 source input successor)))
	(:S
	  (consume-token input)
	  (klacks/misc*-2 source input successor))
	(t
	  (funcall successor))))))

(defun klacks/doctype (source input)
  (with-source (source current-key current-values validate dtd)
    (let ((cont (lambda () (klacks/finish-doctype source input)))
	  ignoreme name extid)
      (prog1
	  (cond
	    ((eq (peek-token input) :<!DOCTYPE)
	      (setf (values ignoreme name extid)
		    (p/doctype-decl input dtd))
	      (lambda () (klacks/misc*-2 source input cont)))
	    (dtd
	      (setf (values ignoreme name extid)
		    (synthesize-doctype dtd input))
	      cont)
	    ((and validate (not dtd))
	      (validity-error "invalid document: no doctype"))
	    (t
	      (return-from klacks/doctype
		(funcall cont))))
	(setf current-key :dtd)
	(setf current-values
	      (list name (extid-public extid) (extid-system extid)))))))

(defun klacks/finish-doctype (source input)
  (with-source (source current-key current-values root data-behaviour)
    (ensure-dtd)
    (when root
      (setf (model-stack *ctx*) (list (make-root-model root))))
    (setf data-behaviour :DOC)
    (setf *data-behaviour* :DOC)
    (fix-seen-< input)
    (let* ((final
	    (lambda ()
	      (klacks/eof source input)))
	   (next
	    (lambda ()
	      (setf data-behaviour :DTD)
	      (setf *data-behaviour* :DTD)
	      (klacks/misc*-2 source input final))))
      (klacks/element source input next))))

(defun klacks/eof (source input)
  (with-source (source current-key current-values)
    (p/eof input)
    (setf current-key :end-document)
    (setf current-values nil)
    (lambda () (klacks/nil source))))

(defun klacks/nil (source)
  (with-source (source current-key current-values)
    (setf current-key nil)
    (setf current-values nil)
    (labels ((klacks/done () #'klacks/done))
      #'klacks/done)))

(defun klacks/element (source input cont)
  (with-source (source current-key current-values current-attributes)
    (multiple-value-bind (cat n-b new-b uri lname qname attrs) (p/sztag input)
      (declare (ignore new-b))
      (setf current-key :start-element)
      (setf current-values (list uri lname qname))
      (setf current-attributes attrs)
      (if (eq cat :stag)
	  (lambda ()
	    (klacks/element-2 source input n-b cont))
	  (lambda ()
	    (klacks/ztag source cont))))))

(defun klacks/ztag (source cont)
  (with-source (source current-key current-values current-attributes)
    (setf current-key :end-element)
    (setf current-attributes nil)
    ;; fixme: (undeclare-namespaces new-b)
    (validate-end-element *ctx* (third current-values))
    cont))

(defun klacks/element-2 (source input n-b cont)
  (with-source (source
		current-key current-values current-attributes namespace-stack)
    (let ((values* current-values))
      (setf current-attributes nil)
      (push n-b namespace-stack)
      (let ((finish
	     (lambda ()
	       (pop namespace-stack)
	       (klacks/element-3 source input values* cont))))
	(klacks/content source input finish)))))

(defun klacks/element-3 (source input tag-values cont)
  (with-source (source current-key current-values current-attributes)
    (setf current-key :end-element)
    (setf current-values tag-values)
    (let ((qname (third tag-values)))
      (p/etag input qname)
      ;; fixme: (undeclare-namespaces new-b)
      (validate-end-element *ctx* qname))
    cont))

(defun klacks/content (source input cont)
  (with-source (source current-key current-values cdata-section-p)
    (let ((recurse (lambda () (klacks/content source input cont))))
      (multiple-value-bind (cat sem) (peek-token input)
	(case cat
	  ((:stag :ztag)
	    (klacks/element source input recurse))
	  ((:CDATA)
	    (process-characters input sem)
	    (setf current-key :characters)
	    (setf current-values (list sem))
	    (setf cdata-section-p nil)
	    recurse)
	  ((:ENTITY-REF)
	    (let ((name sem))
	      (consume-token input)
	      (klacks/entity-reference source input name recurse)))
	  ((:<!\[)
	    (setf current-key :characters)
	    (setf current-values (list (process-cdata-section input sem)))
	    (setf cdata-section-p t)
	    recurse)
	  ((:PI)
	    (setf current-key :processing-instruction)
	    (setf current-values (list (car sem) (cdr sem)))
	    (consume-token input)
	    recurse)
	  ((:COMMENT)
	    (setf current-key :comment)
	    (setf current-values (list sem))
	    (consume-token input)
	    recurse)
	  (otherwise
	    (funcall cont)))))))

(defun klacks/entity-reference (source zstream name cont)
  (assert (not (zstream-token-category zstream)))
  (with-source (source temporary-streams)
    (let ((new-xstream (entity->xstream zstream name :general nil)))
      (push new-xstream temporary-streams)
      (push :stop (zstream-input-stack zstream))
      (zstream-push new-xstream zstream)
      (let ((next
	     (lambda ()
	       (klacks/entity-reference-2 source zstream new-xstream cont))))
	(etypecase (checked-get-entdef name :general)
	  (internal-entdef
	    (klacks/content source zstream next))
	  (external-entdef
	    (klacks/ext-parsed-ent source zstream next)))))))

(defun klacks/entity-reference-2 (source zstream new-xstream cont)
  (with-source (source temporary-streams)
    (unless (eq (peek-token zstream) :eof)
      (wf-error zstream "Trailing garbage. - ~S" (peek-token zstream)))
    (assert (eq (peek-token zstream) :eof))
    (assert (eq (pop (zstream-input-stack zstream)) new-xstream))
    (assert (eq (pop (zstream-input-stack zstream)) :stop))
    (setf (zstream-token-category zstream) nil)
    (setf temporary-streams (remove new-xstream temporary-streams))
    (close-xstream new-xstream)
    (funcall cont)))

(defun klacks/ext-parsed-ent (source input cont)
  (with-source (source)
    (when (eq (peek-token input) :xml-decl)
      (let ((hd (parse-text-decl (cdr (nth-value 1 (peek-token input))))))
	(setup-encoding input hd))
      (consume-token input))
    (set-full-speed input)
    (klacks/content source input cont)))

#+(or)
(trace CXML::KLACKS/DOCTYPE 
       CXML::KLACKS/EXT-PARSED-ENT 
       CXML::KLACKS/MISC*-2 
       CXML::KLACKS/ENTITY-REFERENCE 
       CXML::KLACKS/ENTITY-REFERENCE-2 
       CXML::KLACKS/ELEMENT 
       CXML::KLACKS/ZTAG 
       CXML::KLACKS/XMLDECL 
       CXML::KLACKS/FINISH-DOCTYPE 
       CXML::KLACKS/ELEMENT-3 
       CXML::KLACKS/EOF 
       CXML::KLACKS/ELEMENT-2 
       CXML::KLACKS/CONTENT )
--- /project/cxml/cvsroot/cxml/klacks/klacks.lisp	2007/02/11 18:21:21	NONE
+++ /project/cxml/cvsroot/cxml/klacks/klacks.lisp	2007/02/11 18:21:21	1.1
;;; -*- Mode: Lisp; readtable: runes; -*-
;;;  (c) copyright 2007 David Lichteblau

;;; This library is free software; you can redistribute it and/or
;;; modify it under the terms of the GNU Library General Public
;;; License as published by the Free Software Foundation; either
;;; version 2 of the License, or (at your option) any later version.
;;;
;;; This library is distributed in the hope that it will be useful,

[86 lines skipped]
--- /project/cxml/cvsroot/cxml/klacks/package.lisp	2007/02/11 18:21:21	NONE
+++ /project/cxml/cvsroot/cxml/klacks/package.lisp	2007/02/11 18:21:21	1.1

[124 lines skipped]



More information about the Cxml-cvs mailing list