[flexi-streams-cvs] r58 - in branches/edi: . doc
eweitz at common-lisp.net
eweitz at common-lisp.net
Sun May 25 20:28:27 UTC 2008
Author: eweitz
Date: Sun May 25 16:28:25 2008
New Revision: 58
Modified:
branches/edi/decode.lisp
branches/edi/doc/index.html
branches/edi/encode.lisp
branches/edi/input.lisp
branches/edi/length.lisp
branches/edi/mapping.lisp
branches/edi/strings.lisp
Log:
Optimized the other direction as well
Passes tests on LispWorks
Modified: branches/edi/decode.lisp
==============================================================================
--- branches/edi/decode.lisp (original)
+++ branches/edi/decode.lisp Sun May 25 16:28:25 2008
@@ -1,5 +1,5 @@
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/decode.lisp,v 1.21 2008/05/25 12:26:02 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/decode.lisp,v 1.25 2008/05/25 20:26:34 edi Exp $
;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved.
@@ -60,26 +60,217 @@
The special variable *CURRENT-UNREADER* must be bound correctly
whenever this function is called."))
-(defmethod octets-to-char-code ((format flexi-latin-1-format) reader)
- (declare #.*fixnum-optimize-settings*)
- (declare (function reader))
- (funcall reader))
+(defgeneric octets-to-string* (format sequence start end)
+ (declare #.*standard-optimize-settings*)
+ (:documentation "A generic function which dispatches on the external
+format and does the real work for OCTETS-TO-STRING."))
-(defmethod octets-to-char-code ((format flexi-ascii-format) reader)
- (declare #.*fixnum-optimize-settings*)
- (declare (function reader))
- (when-let (octet (funcall reader))
+(defmethod octets-to-string* :around (format (list list) start end)
+ (declare #.*standard-optimize-settings*)
+ (call-next-method format (coerce list 'vector) start end))
+
+(defmacro define-sequence-readers ((format-class) &body body)
+ "Non-hygienic utility macro which defines methods for READ-SEQUENCE*
+and OCTETS-TO-STRING* for the class FORMAT-CLASS. BODY is described
+in the docstring of DEFINE-CHAR-ENCODERS but can additionally contain
+a form (UNGET <form>) which has to be replaced by the correct code to
+`unread' the octets for the character designated by <form>."
+ (let* ((body `((block char-decoder
+ (locally
+ (declare #.*fixnum-optimize-settings*)
+ , at body)))))
+ `(progn
+ (defmethod read-sequence* ((format ,format-class) flexi-input-stream sequence start end)
+ (with-accessors ((position flexi-stream-position)
+ (bound flexi-stream-bound)
+ (octet-stack flexi-stream-octet-stack)
+ (last-octet flexi-stream-last-octet)
+ (last-char-code flexi-stream-last-char-code)
+ (stream flexi-stream-stream))
+ flexi-input-stream
+ (let* (buffer
+ (buffer-pos 0)
+ (buffer-end 0)
+ (index start)
+ ;; whether we will later be able to rewind the stream if
+ ;; needed (to get rid of unused octets in the buffer)
+ (can-rewind-p (maybe-rewind stream 0))
+ (factor (encoding-factor format))
+ (integer-factor (floor factor))
+ ;; it's an interesting question whether it makes sense
+ ;; performance-wise to make RESERVE significantly bigger
+ ;; (and thus put potentially a lot more octets into
+ ;; OCTET-STACK), especially for UTF-8
+ (reserve (cond ((not (floatp factor)) 0)
+ ((not can-rewind-p) (* 2 integer-factor))
+ (t (ceiling (* (- factor integer-factor) (- end start)))))))
+ (declare (fixnum buffer-pos buffer-end index integer-factor reserve)
+ (boolean can-rewind-p))
+ (flet ((compute-fill-amount ()
+ "Computes the amount of octets we can savely read into
+the buffer without violating the stream's bound \(if there is one) and
+without potentially reading much more than we need \(unless we can
+rewind afterwards)."
+ (let ((minimum (min (the fixnum (+ (the fixnum (* integer-factor
+ (the fixnum (- end index))))
+ reserve))
+ +buffer-size+)))
+ (cond (bound (min minimum (- bound position)))
+ (t minimum))))
+ (fill-buffer (end)
+ "Tries to fill the buffer from BUFFER-POS to END and
+returns NIL if the buffer doesn't contain any new data."
+ ;; put data from octet stack into buffer if there is any
+ (loop
+ (when (>= buffer-pos end)
+ (return))
+ (let ((next-octet (pop octet-stack)))
+ (cond (next-octet
+ (setf (aref (the (array octet *) buffer) buffer-pos) (the octet next-octet))
+ (incf buffer-pos))
+ (t (return)))))
+ (setq buffer-end (read-sequence buffer stream
+ :start buffer-pos
+ :end end))
+ ;; BUFFER-POS is only greater than zero if the buffer
+ ;; already contains unread data from the octet stack
+ ;; (see below), so we test for ZEROP here and do /not/
+ ;; compare with BUFFER-POS
+ (unless (zerop buffer-end)
+ (incf position buffer-end))))
+ (let ((minimum (compute-fill-amount)))
+ (declare (fixnum minimum))
+ (setq buffer (make-octet-buffer minimum))
+ ;; fill buffer for the first time or return immediately if
+ ;; we don't succeed
+ (unless (fill-buffer minimum)
+ (return-from read-sequence* start)))
+ (setq buffer-pos 0)
+ (macrolet ((iterate (set-place)
+ "A very unhygienic macro to implement the
+actual iteration through the sequence including housekeeping for the
+flexi stream. SET-PLACE is the place \(using the index INDEX) used to
+access the sequence."
+ `(flet ((leave ()
+ "This is the function used to
+abort the LOOP iteration below."
+ (when (> index start)
+ (setq last-octet nil
+ last-char-code ,(sublis '((index . (1- index))) set-place)))
+ (return-from read-sequence* index)))
+ (loop
+ (when (>= index end)
+ ;; check if there are octets in the
+ ;; buffer we didn't use - see
+ ;; COMPUTE-FILL-AMOUNT above
+ (let ((rest (- buffer-end buffer-pos)))
+ (when (plusp rest)
+ (or (and can-rewind-p
+ (maybe-rewind stream rest))
+ (loop
+ (when (>= buffer-pos buffer-end)
+ (return))
+ (decf buffer-end)
+ (push (aref (the (array octet *) buffer) buffer-end)
+ octet-stack)))))
+ (leave))
+ (let ((next-char-code
+ (progn (symbol-macrolet
+ ((octet-getter
+ ;; this is the code to retrieve the next octet (or
+ ;; NIL) and to fill the buffer if needed
+ (block next-octet
+ (when (>= buffer-pos buffer-end)
+ (setq buffer-pos 0)
+ (unless (fill-buffer (compute-fill-amount))
+ (return-from next-octet)))
+ (prog1
+ (aref (the (array octet *) buffer) buffer-pos)
+ (incf buffer-pos)))))
+ (macrolet ((unget (form)
+ `(unread-char% ,form flexi-input-stream)))
+ ,', at body)))))
+ (unless next-char-code
+ (leave))
+ (setf ,set-place (code-char next-char-code))
+ (incf index))))))
+ (etypecase sequence
+ (string (iterate (char sequence index)))
+ (array (iterate (aref sequence index)))
+ (list (iterate (nth index sequence)))))))))
+ (defmethod octets-to-string* ((format ,format-class) sequence start end)
+ (declare #.*standard-optimize-settings*)
+ (declare (fixnum start end))
+ (let* ((i start)
+ (string-length (compute-number-of-chars format sequence start end nil))
+ (string (make-array string-length :element-type 'char*)))
+ (declare (fixnum i string-length))
+ (loop for j of-type fixnum from 0 below string-length
+ do (setf (schar string j)
+ (code-char (macrolet ((unget (form)
+ `(decf i (character-length format ,form))))
+ (symbol-macrolet ((octet-getter (and (< i end)
+ (prog1
+ (aref sequence i)
+ (incf i)))))
+ , at body))))
+ finally (return string)))))))
+
+(defmacro define-char-decoders ((lf-format-class cr-format-class crlf-format-class) &body body)
+ "Non-hygienic utility macro which defines several decoding-related
+methods for the classes LF-FORMAT-CLASS, CR-FORMAT-CLASS, and
+CRLF-FORMAT-CLASS where it is assumed that CR-FORMAT-CLASS is the same
+encoding as LF-FORMAT-CLASS but with CR instead of LF line endings and
+similar for CRLF-FORMAT-CLASS, i.e. LF-FORMAT-CLASS is the base class.
+BODY is a code template for the code to read octets and return one
+character. BODY must contain a symbol OCTET-GETTER representing the
+form which is used to obtain the next octet."
+ `(progn
+ (defmethod octets-to-char-code ((format ,lf-format-class) reader)
+ (declare #.*fixnum-optimize-settings*)
+ (declare (function reader))
+ (symbol-macrolet ((octet-getter (funcall reader)))
+ ,@(sublis '((char-decoder . octets-to-char-code))
+ body)))
+ (define-sequence-readers (,lf-format-class) , at body)
+ (define-sequence-readers (,cr-format-class)
+ ,(with-unique-names (char-code)
+ `(let ((,char-code (progn , at body)))
+ (case ,char-code
+ (#.+cr+ #.(char-code #\Newline))
+ (otherwise ,char-code)))))
+ (define-sequence-readers (,crlf-format-class)
+ ,(with-unique-names (char-code next-char-code get-char-code)
+ `(flet ((,get-char-code () , at body))
+ (let ((,char-code (,get-char-code)))
+ (case ,char-code
+ (#.+cr+
+ (let ((,next-char-code (,get-char-code)))
+ (case ,next-char-code
+ (#.+lf+ #.(char-code #\Newline))
+ ;; we saw a CR but no LF afterwards, but then the data
+ ;; ended, so we just return #\Return
+ ((nil) +cr+)
+ ;; if the character we peeked at wasn't a
+ ;; linefeed character we unread its constituents
+ (otherwise (unget (code-char ,next-char-code))
+ ,char-code))))
+ (otherwise ,char-code))))))))
+
+(define-char-decoders (flexi-latin-1-format flexi-cr-latin-1-format flexi-crlf-latin-1-format)
+ octet-getter)
+
+(define-char-decoders (flexi-ascii-format flexi-cr-ascii-format flexi-crlf-ascii-format)
+ (when-let (octet octet-getter)
(if (> (the octet octet) 127)
(recover-from-encoding-error format
"No character which corresponds to octet #x~X." octet)
octet)))
-(defmethod octets-to-char-code ((format flexi-8-bit-format) reader)
- (declare #.*fixnum-optimize-settings*)
- (declare (function reader))
+(define-char-decoders (flexi-8-bit-format flexi-cr-8-bit-format flexi-crlf-8-bit-format)
(with-accessors ((decoding-table external-format-decoding-table))
format
- (when-let (octet (funcall reader))
+ (when-let (octet octet-getter)
(let ((char-code (aref (the (simple-array char-code-integer *) decoding-table)
(the octet octet))))
(if (or (null char-code)
@@ -88,19 +279,17 @@
"No character which corresponds to octet #x~X." octet)
char-code)))))
-(defmethod octets-to-char-code ((format flexi-utf-8-format) reader)
- (declare #.*fixnum-optimize-settings*)
- (declare (function reader))
+(define-char-decoders (flexi-utf-8-format flexi-cr-utf-8-format flexi-crlf-utf-8-format)
(let (first-octet-seen)
(declare (boolean first-octet-seen))
(macrolet ((read-next-byte ()
'(prog1
- (or (funcall reader)
+ (or octet-getter
(cond (first-octet-seen
- (return-from octets-to-char-code
+ (return-from char-decoder
(recover-from-encoding-error format
"End of data while in UTF-8 sequence.")))
- (t (return-from octets-to-char-code nil))))
+ (t (return-from char-decoder nil))))
(setq first-octet-seen t))))
(let ((octet (read-next-byte)))
(declare (type octet octet))
@@ -113,11 +302,7 @@
(values (logand octet #b00001111) 2))
((= #b11110000 (logand octet #b11111000))
(values (logand octet #b00000111) 3))
- ((= #b11111000 (logand octet #b11111100))
- (values (logand octet #b00000011) 4))
- ((= #b11111100 (logand octet #b11111110))
- (values (logand octet #b00000001) 5))
- (t (return-from octets-to-char-code
+ (t (return-from char-decoder
(recover-from-encoding-error format
"Unexpected value #x~X at start of UTF-8 sequence."
octet))))
@@ -130,24 +315,22 @@
repeat count
for octet of-type octet = (read-next-byte)
unless (= #b10000000 (logand octet #b11000000))
- do (return-from octets-to-char-code
+ do (return-from char-decoder
(recover-from-encoding-error format
"Unexpected value #x~X in UTF-8 sequence." octet))
finally (return result)))))))
-(defmethod octets-to-char-code ((format flexi-utf-16-le-format) reader)
- (declare #.*fixnum-optimize-settings*)
- (declare (function reader))
+(define-char-decoders (flexi-utf-16-le-format flexi-cr-utf-16-le-format flexi-crlf-utf-16-le-format)
(let (first-octet-seen)
(declare (boolean first-octet-seen))
(macrolet ((read-next-byte ()
'(prog1
- (or (funcall reader)
+ (or octet-getter
(cond (first-octet-seen
- (return-from octets-to-char-code
+ (return-from char-decoder
(recover-from-encoding-error format
"End of data while in UTF-16 sequence.")))
- (t (return-from octets-to-char-code nil))))
+ (t (return-from char-decoder nil))))
(setq first-octet-seen t))))
(flet ((read-next-word ()
(+ (the octet (read-next-byte))
@@ -159,7 +342,7 @@
(let ((next-word (read-next-word)))
(declare (type (unsigned-byte 16) next-word))
(unless (<= #xdc00 next-word #xdfff)
- (return-from octets-to-char-code
+ (return-from char-decoder
(recover-from-encoding-error format
"Unexpected UTF-16 word #x~X following #x~X."
next-word word)))
@@ -168,19 +351,17 @@
#x10000)))
(t word)))))))
-(defmethod octets-to-char-code ((format flexi-utf-16-be-format) reader)
- (declare #.*fixnum-optimize-settings*)
- (declare (function reader))
+(define-char-decoders (flexi-utf-16-be-format flexi-cr-utf-16-be-format flexi-crlf-utf-16-be-format)
(let (first-octet-seen)
(declare (boolean first-octet-seen))
(macrolet ((read-next-byte ()
'(prog1
- (or (funcall reader)
+ (or octet-getter
(cond (first-octet-seen
- (return-from octets-to-char-code
+ (return-from char-decoder
(recover-from-encoding-error format
"End of data while in UTF-16 sequence.")))
- (t (return-from octets-to-char-code nil))))
+ (t (return-from char-decoder nil))))
(setq first-octet-seen t))))
(flet ((read-next-word ()
(+ (ash (the octet (read-next-byte)) 8)
@@ -192,7 +373,7 @@
(let ((next-word (read-next-word)))
(declare (type (unsigned-byte 16) next-word))
(unless (<= #xdc00 next-word #xdfff)
- (return-from octets-to-char-code
+ (return-from char-decoder
(recover-from-encoding-error format
"Unexpected UTF-16 word #x~X following #x~X."
next-word word)))
@@ -201,37 +382,33 @@
#x10000)))
(t word)))))))
-(defmethod octets-to-char-code ((format flexi-utf-32-le-format) reader)
- (declare #.*fixnum-optimize-settings*)
- (declare (function reader))
+(define-char-decoders (flexi-utf-32-le-format flexi-cr-utf-32-le-format flexi-crlf-utf-32-le-format)
(let (first-octet-seen)
(declare (boolean first-octet-seen))
(macrolet ((read-next-byte ()
'(prog1
- (or (funcall reader)
+ (or octet-getter
(cond (first-octet-seen
- (return-from octets-to-char-code
+ (return-from char-decoder
(recover-from-encoding-error format
"End of data while in UTF-32 sequence.")))
- (t (return-from octets-to-char-code nil))))
+ (t (return-from char-decoder nil))))
(setq first-octet-seen t))))
(loop for count of-type fixnum from 0 to 24 by 8
for octet of-type octet = (read-next-byte)
sum (ash octet count)))))
-(defmethod octets-to-char-code ((format flexi-utf-32-be-format) reader)
- (declare #.*fixnum-optimize-settings*)
- (declare (function reader))
+(define-char-decoders (flexi-utf-32-be-format flexi-cr-utf-32-be-format flexi-crlf-utf-32-be-format)
(let (first-octet-seen)
(declare (boolean first-octet-seen))
(macrolet ((read-next-byte ()
'(prog1
- (or (funcall reader)
+ (or octet-getter
(cond (first-octet-seen
- (return-from octets-to-char-code
+ (return-from char-decoder
(recover-from-encoding-error format
"End of data while in UTF-32 sequence.")))
- (t (return-from octets-to-char-code nil))))
+ (t (return-from char-decoder nil))))
(setq first-octet-seen t))))
(loop for count of-type fixnum from 24 downto 0 by 8
for octet of-type octet = (read-next-byte)
Modified: branches/edi/doc/index.html
==============================================================================
--- branches/edi/doc/index.html (original)
+++ branches/edi/doc/index.html Sun May 25 16:28:25 2008
@@ -996,7 +996,7 @@
<h4><a name="strings" class=none>Strings</a></h4>
-This section collects a few convenience functions for strings conversions:
+This section collects a few convenience functions for strings conversions.
<p><br>[Function]
<br><a class=none name="string-to-octets"><b>string-to-octets</b> <i>string <tt>&key</tt> external-format start end</i> => <i>vector</i></a>
@@ -1009,7 +1009,9 @@
<code><i>start</i></code> and <code><i>end</i></code>
are <code>0</code> and the length of the string. The default
for <code><i>external-format</i></code> is <code>:LATIN1</code>.
-
+<p>
+In spite of the name, <code><i>string</i></code> can be any sequence of characters, but
+the function is optimized for strings.
</blockquote>
<p><br>[Function]
@@ -1023,6 +1025,11 @@
<code><i>start</i></code> and <code><i>end</i></code>
are <code>0</code> and the length of the sequence. The default
for <code><i>external-format</i></code> is <code>:LATIN1</code>.
+<p>
+This function is optimized for the case
+of <code><i>sequence</i></code> being
+a <a href="http://www.lispworks.com/documentation/HyperSpec/Body/t_vector.htm">vector</a>.
+Don't use lists if you are in hurry.
</blockquote>
<p><br>[Function]
@@ -1030,14 +1037,17 @@
<blockquote><br>
-Returns the length of the substring of <code><i>string</i></code> from <code><i>start</i></code> to <code><i>end</i></code> in
+Returns the length of the subsequence of <code><i>string</i></code> from <code><i>start</i></code> to <code><i>end</i></code> in
<a href="#octet">octets</a> if encoded using
the <a href="#external-formats">external format</a> designated
by <code><i>external-format</i></code>.
The defaults for
<code><i>start</i></code> and <code><i>end</i></code>
-are <code>0</code> and the length of the string. The default
+are <code>0</code> and the length of <code><i>string</i></code>. The default
for <code><i>external-format</i></code> is <code>:LATIN1</code>.
+<p>
+In spite of the name, <code><i>string</i></code> can be any sequence of characters, but
+the function is optimized for strings.
</blockquote>
<p><br>[Function]
@@ -1054,6 +1064,11 @@
<code><i>start</i></code> and <code><i>end</i></code>
are <code>0</code> and the length of the sequence. The default
for <code><i>external-format</i></code> is <code>:LATIN1</code>.
+<p>
+This function is optimized for the case
+of <code><i>sequence</i></code> being
+a <a href="http://www.lispworks.com/documentation/HyperSpec/Body/t_vector.htm">vector</a>.
+Don't use lists if you are in hurry.
</blockquote>
<br> <br><h3><a class=none name="position">File positions</a></h3>
@@ -1095,7 +1110,7 @@
his work on making FLEXI-STREAMS faster.
<p>
-$Header: /usr/local/cvsrep/flexi-streams/doc/index.html,v 1.114 2008/05/25 03:08:01 edi Exp $
+$Header: /usr/local/cvsrep/flexi-streams/doc/index.html,v 1.116 2008/05/25 19:07:55 edi Exp $
<p><a href="http://weitz.de/index.html">BACK TO MY HOMEPAGE</a>
</body>
Modified: branches/edi/encode.lisp
==============================================================================
--- branches/edi/encode.lisp (original)
+++ branches/edi/encode.lisp Sun May 25 16:28:25 2008
@@ -1,5 +1,5 @@
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/encode.lisp,v 1.18 2008/05/25 12:26:02 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/encode.lisp,v 1.21 2008/05/25 20:26:34 edi Exp $
;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved.
@@ -47,130 +47,140 @@
(:documentation "A generic function which dispatches on the external
format and does the real work for STRING-TO-OCTETS."))
+(defmethod string-to-octets* :around (format (list list) start end)
+ (declare #.*standard-optimize-settings*)
+ (call-next-method format (coerce list 'string*) start end))
+
(defmacro define-sequence-writers ((format-class) &body body)
- "Utility macro which defines methods for WRITE-SEQUENCE* and
-STRING-TO-OCTET* for the class FORMAT-CLASS. For BODY see the
-docstring of DEFINE-CHAR-ENCODERS."
- `(progn
- (defmethod write-sequence* ((format ,format-class) stream sequence start end)
- (declare #.*standard-optimize-settings*)
- (declare (fixnum start end))
- (with-accessors ((column flexi-stream-column))
- stream
- (let* ((octet-seen-p nil)
- (buffer-pos 0)
- ;; estimate should be good enough...
- (factor (encoding-factor format))
- ;; we don't want arbitrarily large buffer, do we?
- (buffer-size (min +buffer-size+ (ceiling (* factor (- end start)))))
- (buffer (make-octet-buffer buffer-size)))
- (declare (fixnum buffer-pos buffer-size)
- (boolean octet-seen-p)
- (type (array octet *) buffer))
- (macrolet ((octet-writer (form)
- `(write-octet ,form)))
- (labels ((flush-buffer ()
- "Sends all octets in BUFFER to the underlying stream."
- (write-sequence buffer stream :end buffer-pos)
- (setq buffer-pos 0))
- (write-octet (octet)
- "Adds one octet to the buffer and flushes it if necessary."
- (declare (type octet octet))
- (when (>= buffer-pos buffer-size)
- (flush-buffer))
- (setf (aref buffer buffer-pos) octet)
- (incf buffer-pos))
- (write-object (object)
- "Dispatches to WRITE-OCTET or WRITE-CHARACTER
-depending on the type of OBJECT."
- (etypecase object
- (octet (setq octet-seen-p t)
- (write-octet object))
- (character (symbol-macrolet ((char-getter object))
- , at body)))))
- (macrolet ((iterate (&body output-forms)
- "An unhygienic macro to implement the actual
-iteration through SEQUENCE. OUTPUT-FORM is the form to retrieve one
-sequence element and put its octet representation into the buffer."
- `(loop for index of-type fixnum from start below end
- do (progn , at output-forms)
- finally (when (plusp buffer-pos)
- (flush-buffer)))))
- (etypecase sequence
- (string (iterate
- (symbol-macrolet ((char-getter (char sequence index)))
- , at body)))
- (array (iterate
- (symbol-macrolet ((char-getter (aref sequence index)))
- , at body)))
- (list (iterate (write-object (nth index sequence))))))
- ;; update the column slot, setting it to NIL if we sent
- ;; octets
- (setq column
- (cond (octet-seen-p nil)
- (t (let ((last-newline-pos (position #\Newline sequence
- :test #'char=
- :start start
- :end end
- :from-end t)))
- (cond (last-newline-pos (- end last-newline-pos 1))
- (column (+ column (- end start)))))))))))))
- (defmethod string-to-octets* ((format ,format-class) string start end)
- (declare #.*standard-optimize-settings*)
- (declare (fixnum start end) (string string))
- (let ((octets (make-array (compute-number-of-octets format string start end)
- :element-type 'octet))
- (j 0))
- (declare (fixnum j))
- (loop for i of-type fixnum from start below end do
- (macrolet ((octet-writer (form)
- `(progn
- (setf (aref (the (array octet *) octets) j) ,form)
- (incf j))))
- (symbol-macrolet ((char-getter (char string i)))
- (progn , at body))))
- octets))))
-
-;; char-getter can be called more than once - no side effects
-(defmacro define-char-encoders ((format-class cr-format-class crlf-format-class) &body body)
- "Utility macro which defines several encoding-related methods for
-the classes FORMAT-CLASS, CR-FORMAT-CLASS, and CRLF-FORMAT-CLASS where
-it is assumed that CR-FORMAT-CLASS is the same encoding as
-FORMAT-CLASS but with CR line endings and similar for
-CRLF-FORMAT-CLASS. BODY is a code template for the code to convert
-one character to octets. BODY must contain a symbol CHAR-GETTER
-representing the form which is used to obtain the character and a
-forms like \(OCTET-WRITE <thing>) to write the octet <thing>. The
-CHAR-GETTER form might be called more than once."
+ "Non-hygienic utility macro which defines methods for
+WRITE-SEQUENCE* and STRING-TO-OCTETS* for the class FORMAT-CLASS. For
+BODY see the docstring of DEFINE-CHAR-ENCODERS."
(let ((body `((locally
(declare #.*fixnum-optimize-settings*)
, at body))))
`(progn
- (defmethod char-to-octets ((format ,format-class) char writer)
- (declare (character char) (function writer))
- (symbol-macrolet ((char-getter char))
- (macrolet ((octet-writer (form)
- `(funcall writer ,form)))
- , at body)))
- (define-sequence-writers (,format-class) , at body)
- (define-sequence-writers (,cr-format-class)
- ,@(sublis `((char-getter . ,(with-unique-names (char)
- `(let ((,char char-getter))
- (declare (character ,char))
- (if (char= ,char #\Newline)
- #\Return
- ,char)))))
- body))
- (define-sequence-writers (,crlf-format-class)
- ,(with-unique-names (char write-char)
- `(flet ((,write-char (,char)
- ,@(sublis `((char-getter . ,char)) body)))
- (let ((,char char-getter))
- (declare (character ,char))
- (cond ((char= ,char #\Newline)
- (,write-char #\Return)
- (,write-char #\Newline))
- (t (,write-char ,char))))))))))
+ (defmethod string-to-octets* ((format ,format-class) string start end)
+ (declare #.*standard-optimize-settings*)
+ (declare (fixnum start end) (string string))
+ (let ((octets (make-array (compute-number-of-octets format string start end)
+ :element-type 'octet))
+ (j 0))
+ (declare (fixnum j))
+ (loop for i of-type fixnum from start below end do
+ (macrolet ((octet-writer (form)
+ `(progn
+ (setf (aref (the (array octet *) octets) j) ,form)
+ (incf j))))
+ (symbol-macrolet ((char-getter (char string i)))
+ (progn , at body))))
+ octets))
+ (defmethod write-sequence* ((format ,format-class) stream sequence start end)
+ (declare #.*standard-optimize-settings*)
+ (declare (fixnum start end))
+ (with-accessors ((column flexi-stream-column))
+ stream
+ (let* ((octet-seen-p nil)
+ (buffer-pos 0)
+ ;; estimate should be good enough...
+ (factor (encoding-factor format))
+ ;; we don't want arbitrarily large buffer, do we?
+ (buffer-size (min +buffer-size+ (ceiling (* factor (- end start)))))
+ (buffer (make-octet-buffer buffer-size)))
+ (declare (fixnum buffer-pos buffer-size)
+ (boolean octet-seen-p)
+ (type (array octet *) buffer))
+ (macrolet ((octet-writer (form)
+ `(write-octet ,form)))
+ (labels ((flush-buffer ()
+ "Sends all octets in BUFFER to the underlying stream."
+ (write-sequence buffer stream :end buffer-pos)
+ (setq buffer-pos 0))
+ (write-octet (octet)
+ "Adds one octet to the buffer and flushes it if necessary."
+ (declare (type octet octet))
+ (when (>= buffer-pos buffer-size)
+ (flush-buffer))
+ (setf (aref buffer buffer-pos) octet)
+ (incf buffer-pos))
+ (write-object (object)
+ "Dispatches to WRITE-OCTET or WRITE-CHARACTER
+depending on the type of OBJECT."
+ (etypecase object
+ (octet (setq octet-seen-p t)
+ (write-octet object))
+ (character (symbol-macrolet ((char-getter object))
+ , at body)))))
+ (macrolet ((iterate (&body output-forms)
+ "An unhygienic macro to implement the actual
+iteration through SEQUENCE. OUTPUT-FORM is the form to retrieve one
+sequence element and put its octet representation into the buffer."
+ `(loop for index of-type fixnum from start below end
+ do (progn , at output-forms)
+ finally (when (plusp buffer-pos)
+ (flush-buffer)))))
+ (etypecase sequence
+ (string (iterate
+ (symbol-macrolet ((char-getter (char sequence index)))
+ , at body)))
+ (array (iterate
+ (symbol-macrolet ((char-getter (aref sequence index)))
+ , at body)))
+ (list (iterate (write-object (nth index sequence))))))
+ ;; update the column slot, setting it to NIL if we sent
+ ;; octets
+ (setq column
+ (cond (octet-seen-p nil)
+ (t (let ((last-newline-pos (position #\Newline sequence
+ :test #'char=
+ :start start
+ :end end
+ :from-end t)))
+ (cond (last-newline-pos (- end last-newline-pos 1))
+ (column (+ column (- end start))))))))))))))))
+
+(defmacro define-char-encoders ((lf-format-class cr-format-class crlf-format-class) &body body)
+ "Non-hygienic utility macro which defines several encoding-related
+methods for the classes LF-FORMAT-CLASS, CR-FORMAT-CLASS, and
+CRLF-FORMAT-CLASS where it is assumed that CR-FORMAT-CLASS is the same
+encoding as LF-FORMAT-CLASS but with CR instead of LF line endings and
+similar for CRLF-FORMAT-CLASS, i.e. LF-FORMAT-CLASS is the base class.
+BODY is a code template for the code to convert one character to
+octets. BODY must contain a symbol CHAR-GETTER representing the form
+which is used to obtain the character and a forms like \(OCTET-WRITE
+<thing>) to write the octet <thing>. The CHAR-GETTER form might be
+called more than once."
+ `(progn
+ (defmethod char-to-octets ((format ,lf-format-class) char writer)
+ (declare #.*fixnum-optimize-settings*)
+ (declare (character char) (function writer))
+ (symbol-macrolet ((char-getter char))
+ (macrolet ((octet-writer (form)
+ `(funcall writer ,form)))
+ , at body)))
+ (define-sequence-writers (,lf-format-class) , at body)
+ (define-sequence-writers (,cr-format-class)
+ ;; modify the body so that the getter replaces a #\Newline
+ ;; with a #\Return
+ ,@(sublis `((char-getter . ,(with-unique-names (char)
+ `(let ((,char char-getter))
+ (declare (character ,char))
+ (if (char= ,char #\Newline)
+ #\Return
+ ,char)))))
+ body))
+ (define-sequence-writers (,crlf-format-class)
+ ;; modify the body so that we potentially write octets for
+ ;; two characters (#\Return and #\Linefeed) - the original
+ ;; body is wrapped with the WRITE-CHAR local function
+ ,(with-unique-names (char write-char)
+ `(flet ((,write-char (,char)
+ ,@(sublis `((char-getter . ,char)) body)))
+ (let ((,char char-getter))
+ (declare (character ,char))
+ (cond ((char= ,char #\Newline)
+ (,write-char #\Return)
+ (,write-char #\Linefeed))
+ (t (,write-char ,char)))))))))
(define-char-encoders (flexi-latin-1-format flexi-cr-latin-1-format flexi-crlf-latin-1-format)
(let ((octet (char-code char-getter)))
Modified: branches/edi/input.lisp
==============================================================================
--- branches/edi/input.lisp (original)
+++ branches/edi/input.lisp Sun May 25 16:28:25 2008
@@ -1,5 +1,5 @@
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/input.lisp,v 1.77 2008/05/25 03:34:55 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/input.lisp,v 1.78 2008/05/25 19:25:44 edi Exp $
;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved.
@@ -201,9 +201,7 @@
others - see for example FLEXI-STREAMS-TEST::SEQUENCE-TEST."
(declare #.*standard-optimize-settings*)
(declare (fixnum start end))
- (with-accessors ((position flexi-stream-position)
- (bound flexi-stream-bound)
- (octet-stack flexi-stream-octet-stack)
+ (with-accessors ((octet-stack flexi-stream-octet-stack)
(external-format flexi-stream-external-format)
(last-octet flexi-stream-last-octet)
(last-char-code flexi-stream-last-char-code)
@@ -233,116 +231,8 @@
(setq last-char-code nil
last-octet (elt sequence (1- index))))
(return-from stream-read-sequence index)))
- (let* (buffer
- (buffer-pos 0)
- (buffer-end 0)
- (index start)
- ;; whether we will later be able to rewind the stream if
- ;; needed (to get rid of unused octets in the buffer)
- (can-rewind-p (maybe-rewind stream 0))
- (factor (encoding-factor external-format))
- (integer-factor (floor factor))
- ;; it's an interesting question whether it makes sense
- ;; performance-wise to make RESERVE significantly bigger
- ;; (and thus put potentially a lot more octets into
- ;; OCTET-STACK), especially for UTF-8
- (reserve (cond ((not (floatp factor)) 0)
- ((not can-rewind-p) (* 2 integer-factor))
- (t (ceiling (* (- factor integer-factor) (- end start)))))))
- (declare (fixnum buffer-pos buffer-end index integer-factor reserve)
- (boolean can-rewind-p))
- (flet ((compute-fill-amount ()
- "Computes the amount of octets we can savely read into
-the buffer without violating the stream's bound \(if there is one) and
-without potentially reading much more than we need \(unless we can
-rewind afterwards)."
- (let ((minimum (min (the fixnum (+ (the fixnum (* integer-factor
- (the fixnum (- end index))))
- reserve))
- +buffer-size+)))
- (cond (bound (min minimum (- bound position)))
- (t minimum))))
- (fill-buffer (end)
- "Tries to fill the buffer from BUFFER-POS to END and
-returns NIL if the buffer doesn't contain any new data."
- ;; put data from octet stack into buffer if there is any
- (loop
- (when (>= buffer-pos end)
- (return))
- (let ((next-octet (pop octet-stack)))
- (cond (next-octet
- (setf (aref (the (array octet *) buffer) buffer-pos) (the octet next-octet))
- (incf buffer-pos))
- (t (return)))))
- (setq buffer-end (read-sequence buffer stream
- :start buffer-pos
- :end end))
- ;; BUFFER-POS is only greater than zero if the buffer
- ;; already contains unread data from the octet stack
- ;; (see below), so we test for ZEROP here and do /not/
- ;; compare with BUFFER-POS
- (unless (zerop buffer-end)
- (incf position buffer-end))))
- (let ((minimum (compute-fill-amount)))
- (declare (fixnum minimum))
- (setq buffer (make-octet-buffer minimum))
- ;; fill buffer for the first time or return immediately if
- ;; we don't succeed
- (unless (fill-buffer minimum)
- (return-from stream-read-sequence start)))
- (setq buffer-pos 0)
- (flet ((next-octet ()
- "Returns the next octet from the buffer and fills it
-if it is exhausted. Returns NIL if there's no more data on the
-stream."
- (when (>= buffer-pos buffer-end)
- (setq buffer-pos 0)
- (unless (fill-buffer (compute-fill-amount))
- (return-from next-octet)))
- (prog1
- (aref (the (array octet *) buffer) buffer-pos)
- (incf buffer-pos)))
- (unreader (char)
- (unread-char% char flexi-input-stream)))
- (declare (dynamic-extent (function next-octet) (function unreader)))
- (let ((*current-unreader* #'unreader))
- (macrolet ((iterate (set-place)
- "A very unhygienic macro to implement the
-actual iteration through the sequence including housekeeping for the
-flexi stream. SET-PLACE is the place \(using the index INDEX) used to
-access the sequence."
- `(flet ((leave ()
- "This is the function used to abort
-the LOOP iteration below."
- (when (> index start)
- (setq last-octet nil
- last-char-code ,(sublis '((index . (1- index))) set-place)))
- (return-from stream-read-sequence index)))
- (loop
- (when (>= index end)
- ;; check if there are octets in the
- ;; buffer we didn't use - see
- ;; COMPUTE-FILL-AMOUNT above
- (let ((rest (- buffer-end buffer-pos)))
- (when (plusp rest)
- (or (and can-rewind-p
- (maybe-rewind stream rest))
- (loop
- (when (>= buffer-pos buffer-end)
- (return))
- (decf buffer-end)
- (push (aref (the (array octet *) buffer) buffer-end)
- octet-stack)))))
- (leave))
- (let ((next-char-code (octets-to-char-code external-format #'next-octet)))
- (unless next-char-code
- (leave))
- (setf ,set-place (code-char next-char-code))
- (incf index))))))
- (etypecase sequence
- (string (iterate (char sequence index)))
- (array (iterate (aref sequence index)))
- (list (iterate (nth index sequence)))))))))))
+ ;; otherwise hand over to the external format to do the work
+ (read-sequence* external-format flexi-input-stream sequence start end)))
(defmethod stream-unread-char ((stream flexi-input-stream) char)
"Implements UNREAD-CHAR for streams of type FLEXI-INPUT-STREAM.
Modified: branches/edi/length.lisp
==============================================================================
--- branches/edi/length.lisp (original)
+++ branches/edi/length.lisp Sun May 25 16:28:25 2008
@@ -1,5 +1,5 @@
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/length.lisp,v 1.1 2008/05/25 12:26:02 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/length.lisp,v 1.3 2008/05/25 20:15:28 edi Exp $
;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved.
@@ -115,7 +115,7 @@
;; formats with CRLF line endings have their own specialized methods
;; below
(declare #.*fixnum-optimize-settings*)
- (declare (fixnum start end))
+ (declare (fixnum start end) (vector sequence))
(declare (ignore warnp))
(let ((i start)
(length (- end start)))
@@ -132,7 +132,7 @@
(defmethod compute-number-of-chars ((format flexi-utf-8-format) sequence start end warnp)
(declare #.*fixnum-optimize-settings*)
- (declare (fixnum start end))
+ (declare (fixnum start end) (vector sequence))
(let ((sum 0)
(i start))
(declare (fixnum i sum))
@@ -152,7 +152,7 @@
(defmethod compute-number-of-chars ((format flexi-crlf-utf-8-format) sequence start end warnp)
(declare #.*fixnum-optimize-settings*)
- (declare (fixnum start end))
+ (declare (fixnum start end) (vector sequence))
(let ((sum 0)
(i start)
(last-octet 0))
@@ -175,7 +175,7 @@
(defmethod compute-number-of-chars :before ((format flexi-utf-16-format) sequence start end warnp)
(declare #.*fixnum-optimize-settings*)
- (declare (fixnum start end))
+ (declare (fixnum start end) (vector sequence))
(declare (ignore sequence))
(when (and warnp (oddp (- end start)))
(signal-encoding-warning format "~A octet~:P cannot be decoded ~
@@ -203,7 +203,7 @@
(defmethod compute-number-of-chars ((format flexi-utf-16-be-format) sequence start end warnp)
(declare #.*fixnum-optimize-settings*)
- (declare (fixnum start end))
+ (declare (fixnum start end) (vector sequence))
(let ((sum 0)
(i start))
(declare (fixnum i sum))
@@ -222,7 +222,7 @@
(defmethod compute-number-of-chars ((format flexi-crlf-utf-16-le-format) sequence start end warnp)
(declare #.*fixnum-optimize-settings*)
- (declare (fixnum start end))
+ (declare (fixnum start end) (vector sequence))
(let ((sum 0)
(i start)
(last-octet 0))
@@ -248,7 +248,7 @@
(defmethod compute-number-of-chars ((format flexi-crlf-utf-16-be-format) sequence start end warnp)
(declare #.*fixnum-optimize-settings*)
- (declare (fixnum start end))
+ (declare (fixnum start end) (vector sequence))
(let ((sum 0)
(i start)
(last-octet 0))
@@ -290,7 +290,7 @@
(defmethod compute-number-of-chars ((format flexi-crlf-utf-32-le-format) sequence start end warnp)
(declare #.*fixnum-optimize-settings*)
- (declare (fixnum start end))
+ (declare (fixnum start end) (vector sequence))
(declare (ignore warnp))
(let ((i start)
(length (ceiling (- end start) 4)))
@@ -308,7 +308,7 @@
(defmethod compute-number-of-chars ((format flexi-crlf-utf-32-be-format) sequence start end warnp)
(declare #.*fixnum-optimize-settings*)
- (declare (fixnum start end))
+ (declare (fixnum start end) (vector sequence))
(declare (ignore warnp))
(let ((i start)
(length (ceiling (- end start) 4)))
@@ -330,22 +330,26 @@
encode the sequence of characters in SEQUENCE from START to END using
the external format FORMAT."))
-(defmethod compute-number-of-octets ((format flexi-8-bit-format) sequence start end)
+(defmethod compute-number-of-octets :around (format (list list) start end)
+ (declare #.*standard-optimize-settings*)
+ (call-next-method format (coerce list 'string*) start end))
+
+(defmethod compute-number-of-octets ((format flexi-8-bit-format) string start end)
(declare #.*fixnum-optimize-settings*)
(declare (fixnum start end))
- (declare (ignore sequence))
+ (declare (ignore string))
(- end start))
-(defmethod compute-number-of-octets ((format flexi-utf-8-format) sequence start end)
+(defmethod compute-number-of-octets ((format flexi-utf-8-format) string start end)
(declare #.*fixnum-optimize-settings*)
- (declare (fixnum start end))
+ (declare (fixnum start end) (string string))
(let ((sum 0)
(i start))
(declare (fixnum i sum))
(loop
(when (>= i end)
(return))
- (let* ((char-code (char-code (aref sequence i)))
+ (let* ((char-code (char-code (char string i)))
(char-length (cond ((< char-code #x80) 1)
((< char-code #x800) 2)
((< char-code #x10000) 3)
@@ -355,16 +359,16 @@
(incf i)))
sum))
-(defmethod compute-number-of-octets ((format flexi-crlf-utf-8-format) sequence start end)
+(defmethod compute-number-of-octets ((format flexi-crlf-utf-8-format) string start end)
(declare #.*fixnum-optimize-settings*)
- (declare (fixnum start end))
+ (declare (fixnum start end) (string string))
(let ((sum 0)
(i start))
(declare (fixnum i sum))
(loop
(when (>= i end)
(return))
- (let* ((char-code (char-code (aref sequence i)))
+ (let* ((char-code (char-code (char string i)))
(char-length (cond ((= char-code #.(char-code #\Newline)) 2)
((< char-code #x80) 1)
((< char-code #x800) 2)
@@ -375,16 +379,16 @@
(incf i)))
sum))
-(defmethod compute-number-of-octets ((format flexi-utf-16-format) sequence start end)
+(defmethod compute-number-of-octets ((format flexi-utf-16-format) string start end)
(declare #.*fixnum-optimize-settings*)
- (declare (fixnum start end))
+ (declare (fixnum start end) (string string))
(let ((sum 0)
(i start))
(declare (fixnum i sum))
(loop
(when (>= i end)
(return))
- (let* ((char-code (char-code (aref sequence i)))
+ (let* ((char-code (char-code (char string i)))
(char-length (cond ((< char-code #x10000) 2)
(t 4))))
(declare (fixnum char-length) (type char-code-integer char-code))
@@ -392,16 +396,16 @@
(incf i)))
sum))
-(defmethod compute-number-of-octets ((format flexi-crlf-utf-16-le-format) sequence start end)
+(defmethod compute-number-of-octets ((format flexi-crlf-utf-16-le-format) string start end)
(declare #.*fixnum-optimize-settings*)
- (declare (fixnum start end))
+ (declare (fixnum start end) (string string))
(let ((sum 0)
(i start))
(declare (fixnum i sum))
(loop
(when (>= i end)
(return))
- (let* ((char-code (char-code (aref sequence i)))
+ (let* ((char-code (char-code (char string i)))
(char-length (cond ((= char-code #.(char-code #\Newline)) 4)
((< char-code #x10000) 2)
(t 4))))
@@ -410,16 +414,16 @@
(incf i)))
sum))
-(defmethod compute-number-of-octets ((format flexi-crlf-utf-16-be-format) sequence start end)
+(defmethod compute-number-of-octets ((format flexi-crlf-utf-16-be-format) string start end)
(declare #.*fixnum-optimize-settings*)
- (declare (fixnum start end))
+ (declare (fixnum start end) (string string))
(let ((sum 0)
(i start))
(declare (fixnum i sum))
(loop
(when (>= i end)
(return))
- (let* ((char-code (char-code (aref sequence i)))
+ (let* ((char-code (char-code (char string i)))
(char-length (cond ((= char-code #.(char-code #\Newline)) 4)
((< char-code #x10000) 2)
(t 4))))
@@ -428,17 +432,39 @@
(incf i)))
sum))
-(defmethod compute-number-of-octets ((format flexi-utf-32-format) sequence start end)
+(defmethod compute-number-of-octets ((format flexi-utf-32-format) string start end)
(declare #.*fixnum-optimize-settings*)
(declare (fixnum start end))
- (declare (ignore sequence))
+ (declare (ignore string))
(* 4 (- end start)))
-(defmethod compute-number-of-octets ((format flexi-crlf-mixin) sequence start end)
+(defmethod compute-number-of-octets ((format flexi-crlf-mixin) string start end)
(declare #.*fixnum-optimize-settings*)
- (declare (fixnum start end))
+ (declare (fixnum start end) (string string))
(+ (call-next-method)
(* (case (external-format-name format)
(:utf-32 4)
(otherwise 1))
- (count #\Newline sequence :start start :end end :test #'char=))))
\ No newline at end of file
+ (count #\Newline string :start start :end end :test #'char=))))
+
+(defgeneric character-length (format char)
+ (declare #.*fixnum-optimize-settings*)
+ (:documentation "Returns the number of octets needed to encode the
+single character CHAR.")
+ (:method (format char)
+ (compute-number-of-octets format (string char) 0 1)))
+
+(defmethod character-length :around ((format flexi-crlf-mixin) (char (eql #\Newline)))
+ (declare #.*fixnum-optimize-settings*)
+ (+ (call-next-method format +cr+)
+ (call-next-method format +lf+)))
+
+(defmethod character-length ((format flexi-8-bit-format) char)
+ (declare #.*fixnum-optimize-settings*)
+ (declare (ignore char))
+ 1)
+
+(defmethod character-length ((format flexi-utf-32-format) char)
+ (declare #.*fixnum-optimize-settings*)
+ (declare (ignore char))
+ 4)
\ No newline at end of file
Modified: branches/edi/mapping.lisp
==============================================================================
--- branches/edi/mapping.lisp (original)
+++ branches/edi/mapping.lisp Sun May 25 16:28:25 2008
@@ -1,5 +1,5 @@
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/mapping.lisp,v 1.2 2008/05/20 21:15:45 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/mapping.lisp,v 1.3 2008/05/25 19:07:53 edi Exp $
;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved.
@@ -39,6 +39,12 @@
#+:lispworks 'lw:simple-char
#-:lispworks 'character)
+(deftype string* ()
+ "Convenience shortcut to paper over the difference between LispWorks
+and the other Lisps."
+ #+:lispworks 'lw:text-string
+ #-:lispworks 'string)
+
(deftype char-code-integer ()
"The subtype of integers which can be returned by the function CHAR-CODE."
'(integer 0 #.(1- char-code-limit)))
Modified: branches/edi/strings.lisp
==============================================================================
--- branches/edi/strings.lisp (original)
+++ branches/edi/strings.lisp Sun May 25 16:28:25 2008
@@ -1,5 +1,5 @@
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/strings.lisp,v 1.29 2008/05/25 03:34:55 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/strings.lisp,v 1.30 2008/05/25 19:07:53 edi Exp $
;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved.
@@ -34,7 +34,10 @@
(start 0) (end (length string)))
"Converts the Lisp string STRING from START to END to an array of
octets corresponding to the external format designated by
-EXTERNAL-FORMAT."
+EXTERNAL-FORMAT.
+
+In spite of the name, STRING can be any sequence of characters, but
+the function is optimized for strings."
(declare #.*standard-optimize-settings*)
(declare (string string))
(setq external-format (maybe-convert-external-format external-format))
@@ -45,51 +48,22 @@
(external-format :latin1)
(start 0) (end (length sequence)))
"Converts the Lisp sequence SEQUENCE of octets from START to END to
-a string using the external format designated by EXTERNAL-FORMAT."
+a string using the external format designated by EXTERNAL-FORMAT.
+
+This function is optimized for the case of SEQUENCE being a vector.
+Don't use lists if you're in a hurry."
(declare #.*standard-optimize-settings*)
(declare (fixnum start end))
(setq external-format (maybe-convert-external-format external-format))
- (let* ((i start)
- (reader (etypecase sequence
- ((array octet *)
- (lambda ()
- (and (< i end)
- (prog1
- (aref (the (array octet *) sequence) i)
- (incf i)))))
- ((array * *)
- (lambda ()
- (and (< i end)
- (prog1
- (aref sequence i)
- (incf i)))))
- (list
- (lambda ()
- (and (< i end)
- (prog1
- (nth i sequence)
- (incf i))))))))
- (declare (fixnum i) (dynamic-extent reader))
- (labels ((pseudo-writer (octet)
- (declare (ignore octet))
- (decf i))
- (unreader (char)
- (char-to-octets external-format char #'pseudo-writer)))
- (declare (dynamic-extent (function pseudo-writer) (function unreader)))
- (let ((*current-unreader* #'unreader))
- (flet ((next-char ()
- (code-char (octets-to-char-code external-format reader))))
- (declare (inline next-char))
- (let* ((string-length (compute-number-of-chars external-format sequence start end nil))
- (string (make-array string-length :element-type 'char*)))
- (declare (fixnum string-length))
- (loop for j of-type fixnum from 0 below string-length
- do (setf (schar string j) (next-char))
- finally (return string))))))))
+ ;; the external format knows how to do it...
+ (octets-to-string* external-format sequence start end))
(defun octet-length (string &key (external-format :latin1) (start 0) (end (length string)))
"Returns the length of the substring of STRING from START to END in
-octets if encoded using the external format EXTERNAL-FORMAT."
+octets if encoded using the external format EXTERNAL-FORMAT.
+
+In spite of the name, STRING can be any sequence of characters, but
+the function is optimized for strings."
(declare #.*standard-optimize-settings*)
(declare (fixnum start end) (string string))
(setq external-format (maybe-convert-external-format external-format))
@@ -98,7 +72,10 @@
(defun char-length (sequence &key (external-format :latin1) (start 0) (end (length sequence)))
"Kind of the inverse of OCTET-LENGTH. Returns the length of the
subsequence \(of octets) of SEQUENCE from START to END in characters
-if decoded using the external format EXTERNAL-FORMAT."
+if decoded using the external format EXTERNAL-FORMAT.
+
+This function is optimized for the case of SEQUENCE being a vector.
+Don't use lists if you're in a hurry."
(declare #.*standard-optimize-settings*)
(declare (fixnum start end))
(setq external-format (maybe-convert-external-format external-format))
More information about the Flexi-streams-cvs
mailing list