[s-xml-devel] DOCTYPE and ENTITY support
Noldus Reijnders
n_reijnders at hotmail.com
Thu Feb 2 14:21:25 UTC 2012
I made a small extension to s-xml to make it parse DOCTYPE and ENTITY tags, that seems to work as far as I've tested. I don't know how to make a patch as I sometimes see posted by others (maybe someone can tell me?), so I'll just post the code here. I'm not entirely happy with the read-entity function because of the flags, but since I wanted to use the same coding style (useing the "when" strucutre) and I couldn't think of/wanted to spend more time on a way to do it smarter, this is how it is.
Note that the function "skip-special-tag" in "xml.lisp" is replaced by the function below.
(defun skip-special-tag (stream state) "Skip an XML special tag (comments and processing instructions) in stream, positioned after the opening '<', unexpected eof is an error" ;; opening < has been read, consume ? or ! (read-char stream) (let ((char (read-char stream nil #\Null))) (declare (type character char)) ;; see if we are dealing with a comment (when (char= char #\-) (setf char (read-char stream nil #\Null)) (when (char= char #\-) (skip-comment stream) (return-from skip-special-tag))) ;; maybe we are dealing with CDATA? (when (and (char= char #\[) (loop :for pattern :across "CDATA[" :for char = (read-char stream nil #\Null) :when (char= char #\Null) :do (error (parser-error "encountered unexpected eof in cdata")) :always (char= char pattern))) (read-cdata stream state (get-buffer state)) (return-from skip-special-tag)) ;; maybe we are dealing with DOCTYPE? (when (and (char= char #\D) (loop :for pattern :across "OCTYPE" :for char = (read-char stream nil #\Null) :when (char= char #\Null) :do (error (parser-error "encountered unexpected eof in DOCTYPE")) :always (char= char pattern))) (read-doctype stream state (get-buffer state)) (return-from skip-special-tag)) ;; loop over chars, dealing with strings (skipping their content) ;; and counting opening and closing < and > chars (let ((taglevel 1) (string-delimiter #\Null)) (declare (type character string-delimiter)) (loop (when (zerop taglevel) (return)) (setf char (read-char stream nil #\Null)) (when (char= char #\Null) (error (parser-error "encountered unexpected eof for special (! or ?) tag" nil stream))) (if (char/= string-delimiter #\Null) ;; inside a string we only look for a closing string delimiter (when (char= char string-delimiter) (setf string-delimiter #\Null)) ;; outside a string we count < and > and watch out for strings (cond ((or (char= char #\') (char= char #\")) (setf string-delimiter char)) ((char= char #\<) (incf taglevel)) ((char= char #\>) (decf taglevel))))))))
(defun read-doctype (stream state string) "Reads in the DOCTYPE and calls the callback for ENTITY if it exists" ;; we already read the <!DOCTYPE stuff continue to read until we hit ]> ;; and read in the entities (let ((char #\space) (last-2-characters (list #\E #\P)) (pattern (list #\> #\]))) (declare (type character char)) (loop (setf char (read-char stream nil #\Null)) (when (char= char #\Null) (error (parser-error "encountered unexpected eof in DOCTYPE content"))) ;; maybe we are dealing with an ENTITY? (when (and (char= char #\<) (loop :for pattern :across "!ENTITY" :for char = (read-char stream nil #\Null) :when (char= char #\Null) :do (error (parser-error "encountered unexpected eof in ENTITY")) :always (char= char pattern))) (read-entity stream state (get-buffer state))) ;; Push the new character onto the last-2-characters list (push char last-2-characters) (setf (cddr last-2-characters) nil) (cond ((equal last-2-characters pattern) (setf (fill-pointer string) (- (fill-pointer string) 2)) (setf (get-seed state) (funcall (get-text-hook state) (copy-seq string) (get-seed state))) (return-from read-doctype)) (t (vector-push-extend char string))))))
(defun read-entity (stream state string) "Reads in the ENTITY and adds it to the known entity list" ;; we already read the <!ENTITY stuff continue to read until we hit > (let ((char (read-char stream nil #\Null)) (ent "") (value "") (ent-flag nil) (value-flag nil)) (declare (type character char)) (loop (setf char (read-char stream nil #\Null)) (when (char= char #\Null) (error (parser-error "encountered unexpected eof in entity content"))) ;; If > is encountered, store the entity in the known entities and return (when (char= char #\>) (setf (gethash ent (get-entities state)) value) (return-from read-entity)) ;; If char /= space and no space has been encountered yet ;; store the characters in ent (when (and (char/= char #\space) (not value-flag)) (setf ent-flag t) (setf ent (concatenate 'string ent (string char)))) ;; If char /= space and a space has been encountered ;; store the characters in value (when (and (char/= char #\space) value-flag) (setf value (concatenate 'string value (string char)))) ;; If char = space then flag beginning of value (when (and (char= char #\space) ent-flag) (setf value-flag t)) )))
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://mailman.common-lisp.net/pipermail/s-xml-devel/attachments/20120202/62dfe57b/attachment.html>
More information about the s-xml-devel
mailing list