[flexi-streams-cvs] r58 - in branches/edi: . doc

eweitz at common-lisp.net eweitz at common-lisp.net
Sun May 25 20:28:27 UTC 2008


Author: eweitz
Date: Sun May 25 16:28:25 2008
New Revision: 58

Modified:
   branches/edi/decode.lisp
   branches/edi/doc/index.html
   branches/edi/encode.lisp
   branches/edi/input.lisp
   branches/edi/length.lisp
   branches/edi/mapping.lisp
   branches/edi/strings.lisp
Log:
Optimized the other direction as well

Passes tests on LispWorks


Modified: branches/edi/decode.lisp
==============================================================================
--- branches/edi/decode.lisp	(original)
+++ branches/edi/decode.lisp	Sun May 25 16:28:25 2008
@@ -1,5 +1,5 @@
 ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/decode.lisp,v 1.21 2008/05/25 12:26:02 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/decode.lisp,v 1.25 2008/05/25 20:26:34 edi Exp $
 
 ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz.  All rights reserved.
 
@@ -60,26 +60,217 @@
 The special variable *CURRENT-UNREADER* must be bound correctly
 whenever this function is called."))
 
-(defmethod octets-to-char-code ((format flexi-latin-1-format) reader)
-  (declare #.*fixnum-optimize-settings*)
-  (declare (function reader))
-  (funcall reader))
+(defgeneric octets-to-string* (format sequence start end)
+  (declare #.*standard-optimize-settings*)
+  (:documentation "A generic function which dispatches on the external
+format and does the real work for OCTETS-TO-STRING."))
 
-(defmethod octets-to-char-code ((format flexi-ascii-format) reader)
-  (declare #.*fixnum-optimize-settings*)
-  (declare (function reader))
-  (when-let (octet (funcall reader))
+(defmethod octets-to-string* :around (format (list list) start end)
+  (declare #.*standard-optimize-settings*)
+  (call-next-method format (coerce list 'vector) start end))
+
+(defmacro define-sequence-readers ((format-class) &body body)
+  "Non-hygienic utility macro which defines methods for READ-SEQUENCE*
+and OCTETS-TO-STRING* for the class FORMAT-CLASS.  BODY is described
+in the docstring of DEFINE-CHAR-ENCODERS but can additionally contain
+a form (UNGET <form>) which has to be replaced by the correct code to
+`unread' the octets for the character designated by <form>."
+  (let* ((body `((block char-decoder
+                   (locally
+                     (declare #.*fixnum-optimize-settings*)
+                     , at body)))))
+    `(progn
+       (defmethod read-sequence* ((format ,format-class) flexi-input-stream sequence start end)
+         (with-accessors ((position flexi-stream-position)
+                          (bound flexi-stream-bound)
+                          (octet-stack flexi-stream-octet-stack)
+                          (last-octet flexi-stream-last-octet)
+                          (last-char-code flexi-stream-last-char-code)
+                          (stream flexi-stream-stream))
+             flexi-input-stream
+           (let* (buffer
+                  (buffer-pos 0)
+                  (buffer-end 0)
+                  (index start)
+                  ;; whether we will later be able to rewind the stream if
+                  ;; needed (to get rid of unused octets in the buffer)
+                  (can-rewind-p (maybe-rewind stream 0))
+                  (factor (encoding-factor format))
+                  (integer-factor (floor factor))
+                  ;; it's an interesting question whether it makes sense
+                  ;; performance-wise to make RESERVE significantly bigger
+                  ;; (and thus put potentially a lot more octets into
+                  ;; OCTET-STACK), especially for UTF-8
+                  (reserve (cond ((not (floatp factor)) 0)
+                                 ((not can-rewind-p) (* 2 integer-factor))
+                                 (t (ceiling (* (- factor integer-factor) (- end start)))))))
+             (declare (fixnum buffer-pos buffer-end index integer-factor reserve)
+                      (boolean can-rewind-p))
+             (flet ((compute-fill-amount ()
+                      "Computes the amount of octets we can savely read into
+the buffer without violating the stream's bound \(if there is one) and
+without potentially reading much more than we need \(unless we can
+rewind afterwards)."
+                      (let ((minimum (min (the fixnum (+ (the fixnum (* integer-factor
+                                                                        (the fixnum (- end index))))
+                                                         reserve))
+                                          +buffer-size+)))
+                        (cond (bound (min minimum (- bound position)))
+                              (t minimum))))
+                    (fill-buffer (end)
+                      "Tries to fill the buffer from BUFFER-POS to END and
+returns NIL if the buffer doesn't contain any new data."
+                      ;; put data from octet stack into buffer if there is any
+                      (loop
+                       (when (>= buffer-pos end)
+                         (return))
+                       (let ((next-octet (pop octet-stack)))
+                         (cond (next-octet
+                                (setf (aref (the (array octet *) buffer) buffer-pos) (the octet next-octet))
+                                (incf buffer-pos))
+                               (t (return)))))
+                      (setq buffer-end (read-sequence buffer stream
+                                                      :start buffer-pos
+                                                      :end end))
+                      ;; BUFFER-POS is only greater than zero if the buffer
+                      ;; already contains unread data from the octet stack
+                      ;; (see below), so we test for ZEROP here and do /not/
+                      ;; compare with BUFFER-POS
+                      (unless (zerop buffer-end)
+                        (incf position buffer-end))))
+               (let ((minimum (compute-fill-amount)))
+                 (declare (fixnum minimum))
+                 (setq buffer (make-octet-buffer minimum))
+                 ;; fill buffer for the first time or return immediately if
+                 ;; we don't succeed
+                 (unless (fill-buffer minimum)
+                   (return-from read-sequence* start)))
+               (setq buffer-pos 0)
+               (macrolet ((iterate (set-place)
+                            "A very unhygienic macro to implement the
+actual iteration through the sequence including housekeeping for the
+flexi stream.  SET-PLACE is the place \(using the index INDEX) used to
+access the sequence."
+                            `(flet ((leave ()
+                                      "This is the function used to
+abort the LOOP iteration below."
+                                      (when (> index start)
+                                        (setq last-octet nil
+                                              last-char-code ,(sublis '((index . (1- index))) set-place)))
+                                      (return-from read-sequence* index)))
+                               (loop
+                                (when (>= index end)
+                                  ;; check if there are octets in the
+                                  ;; buffer we didn't use - see
+                                  ;; COMPUTE-FILL-AMOUNT above
+                                  (let ((rest (- buffer-end buffer-pos)))
+                                    (when (plusp rest)
+                                      (or (and can-rewind-p
+                                               (maybe-rewind stream rest))
+                                          (loop
+                                           (when (>= buffer-pos buffer-end)
+                                             (return))
+                                           (decf buffer-end)
+                                           (push (aref (the (array octet *) buffer) buffer-end)
+                                                 octet-stack)))))
+                                  (leave))
+                                (let ((next-char-code
+                                       (progn (symbol-macrolet
+                                                  ((octet-getter
+                                                    ;; this is the code to retrieve the next octet (or
+                                                    ;; NIL) and to fill the buffer if needed
+                                                    (block next-octet
+                                                      (when (>= buffer-pos buffer-end)
+                                                        (setq buffer-pos 0)
+                                                        (unless (fill-buffer (compute-fill-amount))
+                                                          (return-from next-octet)))
+                                                      (prog1
+                                                          (aref (the (array octet *) buffer) buffer-pos)
+                                                        (incf buffer-pos)))))
+                                                (macrolet ((unget (form)
+                                                             `(unread-char% ,form flexi-input-stream)))
+                                                  ,', at body)))))
+                                  (unless next-char-code
+                                    (leave))
+                                  (setf ,set-place (code-char next-char-code))
+                                  (incf index))))))
+                 (etypecase sequence
+                   (string (iterate (char sequence index)))
+                   (array (iterate (aref sequence index)))
+                   (list (iterate (nth index sequence)))))))))
+       (defmethod octets-to-string* ((format ,format-class) sequence start end)
+         (declare #.*standard-optimize-settings*)
+         (declare (fixnum start end))
+         (let* ((i start)
+                (string-length (compute-number-of-chars format sequence start end nil))
+                (string (make-array string-length :element-type 'char*)))
+           (declare (fixnum i string-length))
+           (loop for j of-type fixnum from 0 below string-length
+                 do (setf (schar string j)
+                          (code-char (macrolet ((unget (form)
+                                                  `(decf i (character-length format ,form))))
+                                       (symbol-macrolet ((octet-getter (and (< i end)
+                                                                            (prog1
+                                                                                (aref sequence i)
+                                                                              (incf i)))))
+                                         , at body))))
+                 finally (return string)))))))
+
+(defmacro define-char-decoders ((lf-format-class cr-format-class crlf-format-class) &body body)
+  "Non-hygienic utility macro which defines several decoding-related
+methods for the classes LF-FORMAT-CLASS, CR-FORMAT-CLASS, and
+CRLF-FORMAT-CLASS where it is assumed that CR-FORMAT-CLASS is the same
+encoding as LF-FORMAT-CLASS but with CR instead of LF line endings and
+similar for CRLF-FORMAT-CLASS, i.e. LF-FORMAT-CLASS is the base class.
+BODY is a code template for the code to read octets and return one
+character.  BODY must contain a symbol OCTET-GETTER representing the
+form which is used to obtain the next octet."
+  `(progn
+     (defmethod octets-to-char-code ((format ,lf-format-class) reader)
+       (declare #.*fixnum-optimize-settings*)
+       (declare (function reader))
+       (symbol-macrolet ((octet-getter (funcall reader)))
+         ,@(sublis '((char-decoder . octets-to-char-code))
+                   body)))
+     (define-sequence-readers (,lf-format-class) , at body)
+     (define-sequence-readers (,cr-format-class)
+       ,(with-unique-names (char-code)
+          `(let ((,char-code (progn , at body)))
+             (case ,char-code
+               (#.+cr+ #.(char-code #\Newline))
+               (otherwise ,char-code)))))
+     (define-sequence-readers  (,crlf-format-class)
+       ,(with-unique-names (char-code next-char-code get-char-code)
+          `(flet ((,get-char-code () , at body))
+             (let ((,char-code (,get-char-code)))
+               (case ,char-code
+                 (#.+cr+
+                  (let ((,next-char-code (,get-char-code)))
+                    (case ,next-char-code
+                      (#.+lf+ #.(char-code #\Newline))
+                      ;; we saw a CR but no LF afterwards, but then the data
+                      ;; ended, so we just return #\Return
+                      ((nil) +cr+)
+                      ;; if the character we peeked at wasn't a
+                      ;; linefeed character we unread its constituents
+                      (otherwise (unget (code-char ,next-char-code))
+                                 ,char-code))))
+                 (otherwise ,char-code))))))))
+
+(define-char-decoders (flexi-latin-1-format flexi-cr-latin-1-format flexi-crlf-latin-1-format)
+  octet-getter)
+
+(define-char-decoders (flexi-ascii-format flexi-cr-ascii-format flexi-crlf-ascii-format)
+  (when-let (octet octet-getter)
     (if (> (the octet octet) 127)
       (recover-from-encoding-error format
                                    "No character which corresponds to octet #x~X." octet)
       octet)))
 
-(defmethod octets-to-char-code ((format flexi-8-bit-format) reader)
-  (declare #.*fixnum-optimize-settings*)
-  (declare (function reader))
+(define-char-decoders (flexi-8-bit-format flexi-cr-8-bit-format flexi-crlf-8-bit-format)
   (with-accessors ((decoding-table external-format-decoding-table))
       format
-    (when-let (octet (funcall reader))
+    (when-let (octet octet-getter)
       (let ((char-code (aref (the (simple-array char-code-integer *) decoding-table)
                              (the octet octet))))
         (if (or (null char-code)
@@ -88,19 +279,17 @@
                                        "No character which corresponds to octet #x~X." octet)
           char-code)))))
 
-(defmethod octets-to-char-code ((format flexi-utf-8-format) reader)
-  (declare #.*fixnum-optimize-settings*)
-  (declare (function reader))  
+(define-char-decoders (flexi-utf-8-format flexi-cr-utf-8-format flexi-crlf-utf-8-format)
   (let (first-octet-seen)
     (declare (boolean first-octet-seen))
     (macrolet ((read-next-byte ()
                  '(prog1
-                      (or (funcall reader)
+                      (or octet-getter
                           (cond (first-octet-seen
-                                 (return-from octets-to-char-code
+                                 (return-from char-decoder
                                    (recover-from-encoding-error format
                                                                 "End of data while in UTF-8 sequence.")))
-                                (t (return-from octets-to-char-code nil))))
+                                (t (return-from char-decoder nil))))
                     (setq first-octet-seen t))))
       (let ((octet (read-next-byte)))
         (declare (type octet octet))
@@ -113,11 +302,7 @@
                    (values (logand octet #b00001111) 2))
                   ((= #b11110000 (logand octet #b11111000))
                    (values (logand octet #b00000111) 3))
-                  ((= #b11111000 (logand octet #b11111100))
-                   (values (logand octet #b00000011) 4))
-                  ((= #b11111100 (logand octet #b11111110))
-                   (values (logand octet #b00000001) 5))
-                  (t (return-from octets-to-char-code
+                  (t (return-from char-decoder
                        (recover-from-encoding-error format
                                                     "Unexpected value #x~X at start of UTF-8 sequence."
                                                     octet))))
@@ -130,24 +315,22 @@
                 repeat count
                 for octet of-type octet = (read-next-byte)
                 unless (= #b10000000 (logand octet #b11000000))
-                do (return-from octets-to-char-code
+                do (return-from char-decoder
                      (recover-from-encoding-error format
                                                   "Unexpected value #x~X in UTF-8 sequence." octet))
                 finally (return result)))))))
 
-(defmethod octets-to-char-code ((format flexi-utf-16-le-format) reader)
-  (declare #.*fixnum-optimize-settings*)
-  (declare (function reader))
+(define-char-decoders (flexi-utf-16-le-format flexi-cr-utf-16-le-format flexi-crlf-utf-16-le-format)
   (let (first-octet-seen)
     (declare (boolean first-octet-seen))
     (macrolet ((read-next-byte ()
                  '(prog1
-                      (or (funcall reader)
+                      (or octet-getter
                           (cond (first-octet-seen
-                                 (return-from octets-to-char-code
+                                 (return-from char-decoder
                                    (recover-from-encoding-error format
                                                                 "End of data while in UTF-16 sequence.")))
-                                (t (return-from octets-to-char-code nil))))
+                                (t (return-from char-decoder nil))))
                     (setq first-octet-seen t))))
       (flet ((read-next-word ()
                (+ (the octet (read-next-byte))
@@ -159,7 +342,7 @@
                  (let ((next-word (read-next-word)))
                    (declare (type (unsigned-byte 16) next-word))
                    (unless (<= #xdc00 next-word #xdfff)
-                     (return-from octets-to-char-code
+                     (return-from char-decoder
                        (recover-from-encoding-error format
                                                     "Unexpected UTF-16 word #x~X following #x~X."
                                                     next-word word)))
@@ -168,19 +351,17 @@
                       #x10000)))
                 (t word)))))))
 
-(defmethod octets-to-char-code ((format flexi-utf-16-be-format) reader)
-  (declare #.*fixnum-optimize-settings*)
-  (declare (function reader))
+(define-char-decoders (flexi-utf-16-be-format flexi-cr-utf-16-be-format flexi-crlf-utf-16-be-format)
   (let (first-octet-seen)
     (declare (boolean first-octet-seen))
     (macrolet ((read-next-byte ()
                  '(prog1
-                      (or (funcall reader)
+                      (or octet-getter
                           (cond (first-octet-seen
-                                 (return-from octets-to-char-code
+                                 (return-from char-decoder
                                    (recover-from-encoding-error format
                                                                 "End of data while in UTF-16 sequence.")))
-                                (t (return-from octets-to-char-code nil))))
+                                (t (return-from char-decoder nil))))
                     (setq first-octet-seen t))))
       (flet ((read-next-word ()
                (+ (ash (the octet (read-next-byte)) 8)
@@ -192,7 +373,7 @@
                  (let ((next-word (read-next-word)))
                    (declare (type (unsigned-byte 16) next-word))
                    (unless (<= #xdc00 next-word #xdfff)
-                     (return-from octets-to-char-code
+                     (return-from char-decoder
                        (recover-from-encoding-error format
                                                     "Unexpected UTF-16 word #x~X following #x~X."
                                                     next-word word)))
@@ -201,37 +382,33 @@
                       #x10000)))
                 (t word)))))))
 
-(defmethod octets-to-char-code ((format flexi-utf-32-le-format) reader)
-  (declare #.*fixnum-optimize-settings*)
-  (declare (function reader))
+(define-char-decoders (flexi-utf-32-le-format flexi-cr-utf-32-le-format flexi-crlf-utf-32-le-format)
   (let (first-octet-seen)
     (declare (boolean first-octet-seen))
     (macrolet ((read-next-byte ()
                  '(prog1
-                      (or (funcall reader)
+                      (or octet-getter
                           (cond (first-octet-seen
-                                 (return-from octets-to-char-code
+                                 (return-from char-decoder
                                    (recover-from-encoding-error format
                                                                 "End of data while in UTF-32 sequence.")))
-                                (t (return-from octets-to-char-code nil))))
+                                (t (return-from char-decoder nil))))
                     (setq first-octet-seen t))))
       (loop for count of-type fixnum from 0 to 24 by 8
             for octet of-type octet = (read-next-byte)
             sum (ash octet count)))))
 
-(defmethod octets-to-char-code ((format flexi-utf-32-be-format) reader)
-  (declare #.*fixnum-optimize-settings*)
-  (declare (function reader))
+(define-char-decoders (flexi-utf-32-be-format flexi-cr-utf-32-be-format flexi-crlf-utf-32-be-format)
   (let (first-octet-seen)
     (declare (boolean first-octet-seen))
     (macrolet ((read-next-byte ()
                  '(prog1
-                      (or (funcall reader)
+                      (or octet-getter
                           (cond (first-octet-seen
-                                 (return-from octets-to-char-code
+                                 (return-from char-decoder
                                    (recover-from-encoding-error format
                                                                 "End of data while in UTF-32 sequence.")))
-                                (t (return-from octets-to-char-code nil))))
+                                (t (return-from char-decoder nil))))
                     (setq first-octet-seen t))))
       (loop for count of-type fixnum from 24 downto 0 by 8
             for octet of-type octet = (read-next-byte)

Modified: branches/edi/doc/index.html
==============================================================================
--- branches/edi/doc/index.html	(original)
+++ branches/edi/doc/index.html	Sun May 25 16:28:25 2008
@@ -996,7 +996,7 @@
 
 <h4><a name="strings" class=none>Strings</a></h4>
 
-This section collects a few convenience functions for strings conversions:
+This section collects a few convenience functions for strings conversions.
 
 <p><br>[Function]
 <br><a class=none name="string-to-octets"><b>string-to-octets</b> <i>string <tt>&key</tt> external-format start end</i> => <i>vector</i></a>
@@ -1009,7 +1009,9 @@
 <code><i>start</i></code> and <code><i>end</i></code>
 are <code>0</code> and the length of the string.  The default
 for <code><i>external-format</i></code> is <code>:LATIN1</code>.
-
+<p>
+In spite of the name, <code><i>string</i></code> can be any sequence of characters, but
+the function is optimized for strings.
 </blockquote>
 
 <p><br>[Function]
@@ -1023,6 +1025,11 @@
 <code><i>start</i></code> and <code><i>end</i></code>
 are <code>0</code> and the length of the sequence.  The default
 for <code><i>external-format</i></code> is <code>:LATIN1</code>.
+<p>
+This function is optimized for the case
+of <code><i>sequence</i></code> being
+a <a href="http://www.lispworks.com/documentation/HyperSpec/Body/t_vector.htm">vector</a>.
+Don't use lists if you are in hurry.
 </blockquote>
 
 <p><br>[Function]
@@ -1030,14 +1037,17 @@
 
 <blockquote><br>
 
-Returns the length of the substring of <code><i>string</i></code> from <code><i>start</i></code> to <code><i>end</i></code> in
+Returns the length of the subsequence of <code><i>string</i></code> from <code><i>start</i></code> to <code><i>end</i></code> in
 <a href="#octet">octets</a> if encoded using
 the <a href="#external-formats">external format</a> designated
 by <code><i>external-format</i></code>.
 The defaults for
 <code><i>start</i></code> and <code><i>end</i></code>
-are <code>0</code> and the length of the string.  The default
+are <code>0</code> and the length of <code><i>string</i></code>.  The default
 for <code><i>external-format</i></code> is <code>:LATIN1</code>.
+<p>
+In spite of the name, <code><i>string</i></code> can be any sequence of characters, but
+the function is optimized for strings.
 </blockquote>
 
 <p><br>[Function]
@@ -1054,6 +1064,11 @@
 <code><i>start</i></code> and <code><i>end</i></code>
 are <code>0</code> and the length of the sequence.  The default
 for <code><i>external-format</i></code> is <code>:LATIN1</code>.
+<p>
+This function is optimized for the case
+of <code><i>sequence</i></code> being
+a <a href="http://www.lispworks.com/documentation/HyperSpec/Body/t_vector.htm">vector</a>.
+Don't use lists if you are in hurry.
 </blockquote>
 
 <br> <br><h3><a class=none name="position">File positions</a></h3>
@@ -1095,7 +1110,7 @@
 his work on making FLEXI-STREAMS faster.
 
 <p>
-$Header: /usr/local/cvsrep/flexi-streams/doc/index.html,v 1.114 2008/05/25 03:08:01 edi Exp $
+$Header: /usr/local/cvsrep/flexi-streams/doc/index.html,v 1.116 2008/05/25 19:07:55 edi Exp $
 <p><a href="http://weitz.de/index.html">BACK TO MY HOMEPAGE</a>
 
 </body>

Modified: branches/edi/encode.lisp
==============================================================================
--- branches/edi/encode.lisp	(original)
+++ branches/edi/encode.lisp	Sun May 25 16:28:25 2008
@@ -1,5 +1,5 @@
 ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/encode.lisp,v 1.18 2008/05/25 12:26:02 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/encode.lisp,v 1.21 2008/05/25 20:26:34 edi Exp $
 
 ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz.  All rights reserved.
 
@@ -47,130 +47,140 @@
   (:documentation "A generic function which dispatches on the external
 format and does the real work for STRING-TO-OCTETS."))
 
+(defmethod string-to-octets* :around (format (list list) start end)
+  (declare #.*standard-optimize-settings*)
+  (call-next-method format (coerce list 'string*) start end))
+
 (defmacro define-sequence-writers ((format-class) &body body)
-  "Utility macro which defines methods for WRITE-SEQUENCE* and
-STRING-TO-OCTET* for the class FORMAT-CLASS.  For BODY see the
-docstring of DEFINE-CHAR-ENCODERS."
-  `(progn
-     (defmethod write-sequence* ((format ,format-class) stream sequence start end)
-       (declare #.*standard-optimize-settings*)
-       (declare (fixnum start end))
-       (with-accessors ((column flexi-stream-column))
-           stream
-         (let* ((octet-seen-p nil)
-                (buffer-pos 0)
-                ;; estimate should be good enough...
-                (factor (encoding-factor format))
-                ;; we don't want arbitrarily large buffer, do we?
-                (buffer-size (min +buffer-size+ (ceiling (* factor (- end start)))))
-                (buffer (make-octet-buffer buffer-size)))
-           (declare (fixnum buffer-pos buffer-size)
-                    (boolean octet-seen-p)
-                    (type (array octet *) buffer))
-           (macrolet ((octet-writer (form)
-                        `(write-octet ,form)))
-             (labels ((flush-buffer ()
-                        "Sends all octets in BUFFER to the underlying stream."
-                        (write-sequence buffer stream :end buffer-pos)
-                        (setq buffer-pos 0))
-                      (write-octet (octet)
-                        "Adds one octet to the buffer and flushes it if necessary."
-                        (declare (type octet octet))
-                        (when (>= buffer-pos buffer-size)
-                          (flush-buffer))
-                        (setf (aref buffer buffer-pos) octet)
-                        (incf buffer-pos))
-                      (write-object (object)
-                        "Dispatches to WRITE-OCTET or WRITE-CHARACTER
-depending on the type of OBJECT."
-                        (etypecase object
-                          (octet (setq octet-seen-p t)
-                                 (write-octet object))
-                          (character (symbol-macrolet ((char-getter object))
-                                       , at body)))))
-               (macrolet ((iterate (&body output-forms)
-                            "An unhygienic macro to implement the actual
-iteration through SEQUENCE.  OUTPUT-FORM is the form to retrieve one
-sequence element and put its octet representation into the buffer."
-                            `(loop for index of-type fixnum from start below end
-                                   do (progn , at output-forms)
-                                   finally (when (plusp buffer-pos)
-                                             (flush-buffer)))))
-                 (etypecase sequence
-                   (string (iterate
-                            (symbol-macrolet ((char-getter (char sequence index)))
-                              , at body)))
-                   (array (iterate
-                           (symbol-macrolet ((char-getter (aref sequence index)))
-                             , at body)))
-                   (list  (iterate (write-object (nth index sequence))))))
-               ;; update the column slot, setting it to NIL if we sent
-               ;; octets
-               (setq column
-                     (cond (octet-seen-p nil)
-                           (t (let ((last-newline-pos (position #\Newline sequence
-                                                                :test #'char=
-                                                                :start start
-                                                                :end end
-                                                                :from-end t)))
-                                (cond (last-newline-pos (- end last-newline-pos 1))
-                                      (column (+ column (- end start)))))))))))))  
-     (defmethod string-to-octets* ((format ,format-class) string start end)
-       (declare #.*standard-optimize-settings*)
-       (declare (fixnum start end) (string string))
-       (let ((octets (make-array (compute-number-of-octets format string start end)
-                                 :element-type 'octet))
-             (j 0))
-         (declare (fixnum j))
-         (loop for i of-type fixnum from start below end do
-               (macrolet ((octet-writer (form)
-                            `(progn
-                               (setf (aref (the (array octet *) octets) j) ,form)
-                               (incf j))))
-                 (symbol-macrolet ((char-getter (char string i)))
-                   (progn , at body))))
-         octets))))
-
-;; char-getter can be called more than once - no side effects
-(defmacro define-char-encoders ((format-class cr-format-class crlf-format-class) &body body)
-  "Utility macro which defines several encoding-related methods for
-the classes FORMAT-CLASS, CR-FORMAT-CLASS, and CRLF-FORMAT-CLASS where
-it is assumed that CR-FORMAT-CLASS is the same encoding as
-FORMAT-CLASS but with CR line endings and similar for
-CRLF-FORMAT-CLASS.  BODY is a code template for the code to convert
-one character to octets.  BODY must contain a symbol CHAR-GETTER
-representing the form which is used to obtain the character and a
-forms like \(OCTET-WRITE <thing>) to write the octet <thing>.  The
-CHAR-GETTER form might be called more than once."
+  "Non-hygienic utility macro which defines methods for
+WRITE-SEQUENCE* and STRING-TO-OCTETS* for the class FORMAT-CLASS.  For
+BODY see the docstring of DEFINE-CHAR-ENCODERS."
   (let ((body `((locally
                   (declare #.*fixnum-optimize-settings*)
                   , at body))))
     `(progn
-       (defmethod char-to-octets ((format ,format-class) char writer)
-         (declare (character char) (function writer))
-         (symbol-macrolet ((char-getter char))
-           (macrolet ((octet-writer (form)
-                        `(funcall writer ,form)))
-             , at body)))
-       (define-sequence-writers (,format-class) , at body)
-       (define-sequence-writers (,cr-format-class)
-         ,@(sublis `((char-getter . ,(with-unique-names (char)
-                                       `(let ((,char char-getter))
-                                          (declare (character ,char))
-                                          (if (char= ,char #\Newline)
-                                            #\Return
-                                            ,char)))))
-                   body))
-       (define-sequence-writers (,crlf-format-class)
-         ,(with-unique-names (char write-char)
-            `(flet ((,write-char (,char)
-                      ,@(sublis `((char-getter . ,char)) body)))
-               (let ((,char char-getter))
-                 (declare (character ,char))
-                 (cond ((char= ,char #\Newline)
-                        (,write-char #\Return)
-                        (,write-char #\Newline))
-                       (t (,write-char ,char))))))))))
+       (defmethod string-to-octets* ((format ,format-class) string start end)
+         (declare #.*standard-optimize-settings*)
+         (declare (fixnum start end) (string string))
+         (let ((octets (make-array (compute-number-of-octets format string start end)
+                                   :element-type 'octet))
+               (j 0))
+           (declare (fixnum j))
+           (loop for i of-type fixnum from start below end do
+                 (macrolet ((octet-writer (form)
+                              `(progn
+                                 (setf (aref (the (array octet *) octets) j) ,form)
+                                 (incf j))))
+                   (symbol-macrolet ((char-getter (char string i)))
+                     (progn , at body))))
+           octets)) 
+       (defmethod write-sequence* ((format ,format-class) stream sequence start end)
+         (declare #.*standard-optimize-settings*)
+         (declare (fixnum start end))
+         (with-accessors ((column flexi-stream-column))
+             stream
+           (let* ((octet-seen-p nil)
+                  (buffer-pos 0)
+                  ;; estimate should be good enough...
+                  (factor (encoding-factor format))
+                  ;; we don't want arbitrarily large buffer, do we?
+                  (buffer-size (min +buffer-size+ (ceiling (* factor (- end start)))))
+                  (buffer (make-octet-buffer buffer-size)))
+             (declare (fixnum buffer-pos buffer-size)
+                      (boolean octet-seen-p)
+                      (type (array octet *) buffer))
+             (macrolet ((octet-writer (form)
+                          `(write-octet ,form)))
+               (labels ((flush-buffer ()
+                          "Sends all octets in BUFFER to the underlying stream."
+                          (write-sequence buffer stream :end buffer-pos)
+                          (setq buffer-pos 0))
+                        (write-octet (octet)
+                          "Adds one octet to the buffer and flushes it if necessary."
+                          (declare (type octet octet))
+                          (when (>= buffer-pos buffer-size)
+                            (flush-buffer))
+                          (setf (aref buffer buffer-pos) octet)
+                          (incf buffer-pos))
+                        (write-object (object)
+                          "Dispatches to WRITE-OCTET or WRITE-CHARACTER
+depending on the type of OBJECT."
+                          (etypecase object
+                            (octet (setq octet-seen-p t)
+                                   (write-octet object))
+                            (character (symbol-macrolet ((char-getter object))
+                                         , at body)))))
+                 (macrolet ((iterate (&body output-forms)
+                              "An unhygienic macro to implement the actual
+iteration through SEQUENCE.  OUTPUT-FORM is the form to retrieve one
+sequence element and put its octet representation into the buffer."
+                              `(loop for index of-type fixnum from start below end
+                                     do (progn , at output-forms)
+                                     finally (when (plusp buffer-pos)
+                                               (flush-buffer)))))
+                   (etypecase sequence
+                     (string (iterate
+                              (symbol-macrolet ((char-getter (char sequence index)))
+                                , at body)))
+                     (array (iterate
+                             (symbol-macrolet ((char-getter (aref sequence index)))
+                               , at body)))
+                     (list  (iterate (write-object (nth index sequence))))))
+                 ;; update the column slot, setting it to NIL if we sent
+                 ;; octets
+                 (setq column
+                       (cond (octet-seen-p nil)
+                             (t (let ((last-newline-pos (position #\Newline sequence
+                                                                  :test #'char=
+                                                                  :start start
+                                                                  :end end
+                                                                  :from-end t)))
+                                  (cond (last-newline-pos (- end last-newline-pos 1))
+                                        (column (+ column (- end start))))))))))))))))
+
+(defmacro define-char-encoders ((lf-format-class cr-format-class crlf-format-class) &body body)
+  "Non-hygienic utility macro which defines several encoding-related
+methods for the classes LF-FORMAT-CLASS, CR-FORMAT-CLASS, and
+CRLF-FORMAT-CLASS where it is assumed that CR-FORMAT-CLASS is the same
+encoding as LF-FORMAT-CLASS but with CR instead of LF line endings and
+similar for CRLF-FORMAT-CLASS, i.e. LF-FORMAT-CLASS is the base class.
+BODY is a code template for the code to convert one character to
+octets.  BODY must contain a symbol CHAR-GETTER representing the form
+which is used to obtain the character and a forms like \(OCTET-WRITE
+<thing>) to write the octet <thing>.  The CHAR-GETTER form might be
+called more than once."
+  `(progn
+     (defmethod char-to-octets ((format ,lf-format-class) char writer)
+       (declare #.*fixnum-optimize-settings*)
+       (declare (character char) (function writer))
+       (symbol-macrolet ((char-getter char))
+         (macrolet ((octet-writer (form)
+                      `(funcall writer ,form)))
+           , at body)))
+     (define-sequence-writers (,lf-format-class) , at body)
+     (define-sequence-writers (,cr-format-class)
+       ;; modify the body so that the getter replaces a #\Newline
+       ;; with a #\Return
+       ,@(sublis `((char-getter . ,(with-unique-names (char)
+                                     `(let ((,char char-getter))
+                                        (declare (character ,char))
+                                        (if (char= ,char #\Newline)
+                                          #\Return
+                                          ,char)))))
+                 body))
+     (define-sequence-writers (,crlf-format-class)
+       ;; modify the body so that we potentially write octets for
+       ;; two characters (#\Return and #\Linefeed) - the original
+       ;; body is wrapped with the WRITE-CHAR local function
+       ,(with-unique-names (char write-char)
+          `(flet ((,write-char (,char)
+                    ,@(sublis `((char-getter . ,char)) body)))
+             (let ((,char char-getter))
+               (declare (character ,char))
+               (cond ((char= ,char #\Newline)
+                      (,write-char #\Return)
+                      (,write-char #\Linefeed))
+                     (t (,write-char ,char)))))))))
 
 (define-char-encoders (flexi-latin-1-format flexi-cr-latin-1-format  flexi-crlf-latin-1-format)
   (let ((octet (char-code char-getter)))

Modified: branches/edi/input.lisp
==============================================================================
--- branches/edi/input.lisp	(original)
+++ branches/edi/input.lisp	Sun May 25 16:28:25 2008
@@ -1,5 +1,5 @@
 ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/input.lisp,v 1.77 2008/05/25 03:34:55 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/input.lisp,v 1.78 2008/05/25 19:25:44 edi Exp $
 
 ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz.  All rights reserved.
 
@@ -201,9 +201,7 @@
 others - see for example FLEXI-STREAMS-TEST::SEQUENCE-TEST."
   (declare #.*standard-optimize-settings*)
   (declare (fixnum start end))
-  (with-accessors ((position flexi-stream-position)
-                   (bound flexi-stream-bound)
-                   (octet-stack flexi-stream-octet-stack)
+  (with-accessors ((octet-stack flexi-stream-octet-stack)
                    (external-format flexi-stream-external-format)
                    (last-octet flexi-stream-last-octet)
                    (last-char-code flexi-stream-last-char-code)
@@ -233,116 +231,8 @@
           (setq last-char-code nil
                 last-octet (elt sequence (1- index))))
         (return-from stream-read-sequence index)))
-    (let* (buffer
-           (buffer-pos 0)
-           (buffer-end 0)
-           (index start)
-           ;; whether we will later be able to rewind the stream if
-           ;; needed (to get rid of unused octets in the buffer)
-           (can-rewind-p (maybe-rewind stream 0))
-           (factor (encoding-factor external-format))
-           (integer-factor (floor factor))
-           ;; it's an interesting question whether it makes sense
-           ;; performance-wise to make RESERVE significantly bigger
-           ;; (and thus put potentially a lot more octets into
-           ;; OCTET-STACK), especially for UTF-8
-           (reserve (cond ((not (floatp factor)) 0)
-                          ((not can-rewind-p) (* 2 integer-factor))
-                          (t (ceiling (* (- factor integer-factor) (- end start)))))))
-      (declare (fixnum buffer-pos buffer-end index integer-factor reserve)
-               (boolean can-rewind-p))
-      (flet ((compute-fill-amount ()
-               "Computes the amount of octets we can savely read into
-the buffer without violating the stream's bound \(if there is one) and
-without potentially reading much more than we need \(unless we can
-rewind afterwards)."
-               (let ((minimum (min (the fixnum (+ (the fixnum (* integer-factor
-                                                                 (the fixnum (- end index))))
-                                                  reserve))
-                                   +buffer-size+)))
-                 (cond (bound (min minimum (- bound position)))
-                       (t minimum))))
-             (fill-buffer (end)
-               "Tries to fill the buffer from BUFFER-POS to END and
-returns NIL if the buffer doesn't contain any new data."
-               ;; put data from octet stack into buffer if there is any
-               (loop
-                (when (>= buffer-pos end)
-                  (return))
-                (let ((next-octet (pop octet-stack)))
-                  (cond (next-octet
-                         (setf (aref (the (array octet *) buffer) buffer-pos) (the octet next-octet))
-                         (incf buffer-pos))
-                        (t (return)))))
-               (setq buffer-end (read-sequence buffer stream
-                                               :start buffer-pos
-                                               :end end))
-               ;; BUFFER-POS is only greater than zero if the buffer
-               ;; already contains unread data from the octet stack
-               ;; (see below), so we test for ZEROP here and do /not/
-               ;; compare with BUFFER-POS
-               (unless (zerop buffer-end)
-                 (incf position buffer-end))))
-        (let ((minimum (compute-fill-amount)))
-          (declare (fixnum minimum))
-          (setq buffer (make-octet-buffer minimum))
-          ;; fill buffer for the first time or return immediately if
-          ;; we don't succeed
-          (unless (fill-buffer minimum)
-            (return-from stream-read-sequence start)))
-        (setq buffer-pos 0)
-        (flet ((next-octet ()
-                 "Returns the next octet from the buffer and fills it
-if it is exhausted.  Returns NIL if there's no more data on the
-stream."
-                 (when (>= buffer-pos buffer-end)
-                   (setq buffer-pos 0)
-                   (unless (fill-buffer (compute-fill-amount))
-                     (return-from next-octet)))
-                 (prog1
-                     (aref (the (array octet *) buffer) buffer-pos)
-                   (incf buffer-pos)))
-               (unreader (char)
-                 (unread-char% char flexi-input-stream)))
-          (declare (dynamic-extent (function next-octet) (function unreader)))
-          (let ((*current-unreader* #'unreader))
-            (macrolet ((iterate (set-place)
-                         "A very unhygienic macro to implement the
-actual iteration through the sequence including housekeeping for the
-flexi stream.  SET-PLACE is the place \(using the index INDEX) used to
-access the sequence."
-                         `(flet ((leave ()
-                                   "This is the function used to abort
-the LOOP iteration below."
-                                   (when (> index start)
-                                     (setq last-octet nil
-                                           last-char-code ,(sublis '((index . (1- index))) set-place)))
-                                   (return-from stream-read-sequence index)))
-                            (loop
-                             (when (>= index end)
-                               ;; check if there are octets in the
-                               ;; buffer we didn't use - see
-                               ;; COMPUTE-FILL-AMOUNT above
-                               (let ((rest (- buffer-end buffer-pos)))
-                                 (when (plusp rest)
-                                   (or (and can-rewind-p
-                                            (maybe-rewind stream rest))
-                                       (loop
-                                        (when (>= buffer-pos buffer-end)
-                                          (return))
-                                        (decf buffer-end)
-                                        (push (aref (the (array octet *) buffer) buffer-end)
-                                              octet-stack)))))
-                               (leave))
-                             (let ((next-char-code (octets-to-char-code external-format #'next-octet)))
-                               (unless next-char-code
-                                 (leave))
-                               (setf ,set-place (code-char next-char-code))
-                               (incf index))))))
-              (etypecase sequence
-                (string (iterate (char sequence index)))
-                (array (iterate (aref sequence index)))
-                (list (iterate (nth index sequence)))))))))))
+    ;; otherwise hand over to the external format to do the work
+    (read-sequence* external-format flexi-input-stream sequence start end)))
 
 (defmethod stream-unread-char ((stream flexi-input-stream) char)
   "Implements UNREAD-CHAR for streams of type FLEXI-INPUT-STREAM.

Modified: branches/edi/length.lisp
==============================================================================
--- branches/edi/length.lisp	(original)
+++ branches/edi/length.lisp	Sun May 25 16:28:25 2008
@@ -1,5 +1,5 @@
 ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/length.lisp,v 1.1 2008/05/25 12:26:02 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/length.lisp,v 1.3 2008/05/25 20:15:28 edi Exp $
 
 ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz.  All rights reserved.
 
@@ -115,7 +115,7 @@
   ;; formats with CRLF line endings have their own specialized methods
   ;; below
   (declare #.*fixnum-optimize-settings*)
-  (declare (fixnum start end))
+  (declare (fixnum start end) (vector sequence))
   (declare (ignore warnp))
   (let ((i start)
         (length (- end start)))
@@ -132,7 +132,7 @@
 
 (defmethod compute-number-of-chars ((format flexi-utf-8-format) sequence start end warnp)
   (declare #.*fixnum-optimize-settings*)
-  (declare (fixnum start end))
+  (declare (fixnum start end) (vector sequence))
   (let ((sum 0)
         (i start))
     (declare (fixnum i sum))
@@ -152,7 +152,7 @@
 
 (defmethod compute-number-of-chars ((format flexi-crlf-utf-8-format) sequence start end warnp)
   (declare #.*fixnum-optimize-settings*)
-  (declare (fixnum start end))
+  (declare (fixnum start end) (vector sequence))
   (let ((sum 0)
         (i start)
         (last-octet 0))
@@ -175,7 +175,7 @@
 
 (defmethod compute-number-of-chars :before ((format flexi-utf-16-format) sequence start end warnp)
   (declare #.*fixnum-optimize-settings*)
-  (declare (fixnum start end))
+  (declare (fixnum start end) (vector sequence))
   (declare (ignore sequence))
   (when (and warnp (oddp (- end start)))
     (signal-encoding-warning format "~A octet~:P cannot be decoded ~
@@ -203,7 +203,7 @@
 
 (defmethod compute-number-of-chars ((format flexi-utf-16-be-format) sequence start end warnp)
   (declare #.*fixnum-optimize-settings*)
-  (declare (fixnum start end))
+  (declare (fixnum start end) (vector sequence))
   (let ((sum 0)
         (i start))
     (declare (fixnum i sum))
@@ -222,7 +222,7 @@
 
 (defmethod compute-number-of-chars ((format flexi-crlf-utf-16-le-format) sequence start end warnp)
   (declare #.*fixnum-optimize-settings*)
-  (declare (fixnum start end))
+  (declare (fixnum start end) (vector sequence))
   (let ((sum 0)
         (i start)
         (last-octet 0))
@@ -248,7 +248,7 @@
 
 (defmethod compute-number-of-chars ((format flexi-crlf-utf-16-be-format) sequence start end warnp)
   (declare #.*fixnum-optimize-settings*)
-  (declare (fixnum start end))
+  (declare (fixnum start end) (vector sequence))
   (let ((sum 0)
         (i start)
         (last-octet 0))
@@ -290,7 +290,7 @@
 
 (defmethod compute-number-of-chars ((format flexi-crlf-utf-32-le-format) sequence start end warnp)
   (declare #.*fixnum-optimize-settings*)
-  (declare (fixnum start end))
+  (declare (fixnum start end) (vector sequence))
   (declare (ignore warnp))
   (let ((i start)
         (length (ceiling (- end start) 4)))
@@ -308,7 +308,7 @@
 
 (defmethod compute-number-of-chars ((format flexi-crlf-utf-32-be-format) sequence start end warnp)
   (declare #.*fixnum-optimize-settings*)
-  (declare (fixnum start end))
+  (declare (fixnum start end) (vector sequence))
   (declare (ignore warnp))
   (let ((i start)
         (length (ceiling (- end start) 4)))
@@ -330,22 +330,26 @@
 encode the sequence of characters in SEQUENCE from START to END using
 the external format FORMAT."))
 
-(defmethod compute-number-of-octets ((format flexi-8-bit-format) sequence start end)
+(defmethod compute-number-of-octets :around (format (list list) start end)
+  (declare #.*standard-optimize-settings*)
+  (call-next-method format (coerce list 'string*) start end))
+
+(defmethod compute-number-of-octets ((format flexi-8-bit-format) string start end)
   (declare #.*fixnum-optimize-settings*)
   (declare (fixnum start end))
-  (declare (ignore sequence))  
+  (declare (ignore string))  
   (- end start))
 
-(defmethod compute-number-of-octets ((format flexi-utf-8-format) sequence start end)
+(defmethod compute-number-of-octets ((format flexi-utf-8-format) string start end)
   (declare #.*fixnum-optimize-settings*)
-  (declare (fixnum start end))
+  (declare (fixnum start end) (string string))
   (let ((sum 0)
         (i start))
     (declare (fixnum i sum))
     (loop
      (when (>= i end)
        (return))
-     (let* ((char-code (char-code (aref sequence i)))
+     (let* ((char-code (char-code (char string i)))
             (char-length (cond ((< char-code #x80) 1)
                                ((< char-code #x800) 2)
                                ((< char-code #x10000) 3)
@@ -355,16 +359,16 @@
        (incf i)))
     sum))
 
-(defmethod compute-number-of-octets ((format flexi-crlf-utf-8-format) sequence start end)
+(defmethod compute-number-of-octets ((format flexi-crlf-utf-8-format) string start end)
   (declare #.*fixnum-optimize-settings*)
-  (declare (fixnum start end))
+  (declare (fixnum start end) (string string))
   (let ((sum 0)
         (i start))
     (declare (fixnum i sum))
     (loop
      (when (>= i end)
        (return))
-     (let* ((char-code (char-code (aref sequence i)))
+     (let* ((char-code (char-code (char string i)))
             (char-length (cond ((= char-code #.(char-code #\Newline)) 2)
                                ((< char-code #x80) 1)
                                ((< char-code #x800) 2)
@@ -375,16 +379,16 @@
        (incf i)))
     sum))
 
-(defmethod compute-number-of-octets ((format flexi-utf-16-format) sequence start end)
+(defmethod compute-number-of-octets ((format flexi-utf-16-format) string start end)
   (declare #.*fixnum-optimize-settings*)
-  (declare (fixnum start end))
+  (declare (fixnum start end) (string string))
   (let ((sum 0)
         (i start))
     (declare (fixnum i sum))
     (loop
      (when (>= i end)
        (return))
-     (let* ((char-code (char-code (aref sequence i)))
+     (let* ((char-code (char-code (char string i)))
             (char-length (cond ((< char-code #x10000) 2)
                                (t 4))))
        (declare (fixnum char-length) (type char-code-integer char-code))
@@ -392,16 +396,16 @@
        (incf i)))
     sum))
 
-(defmethod compute-number-of-octets ((format flexi-crlf-utf-16-le-format) sequence start end)
+(defmethod compute-number-of-octets ((format flexi-crlf-utf-16-le-format) string start end)
   (declare #.*fixnum-optimize-settings*)
-  (declare (fixnum start end))
+  (declare (fixnum start end) (string string))
   (let ((sum 0)
         (i start))
     (declare (fixnum i sum))
     (loop
      (when (>= i end)
        (return))
-     (let* ((char-code (char-code (aref sequence i)))
+     (let* ((char-code (char-code (char string i)))
             (char-length (cond ((= char-code #.(char-code #\Newline)) 4)
                                ((< char-code #x10000) 2)
                                (t 4))))
@@ -410,16 +414,16 @@
        (incf i)))
     sum))
 
-(defmethod compute-number-of-octets ((format flexi-crlf-utf-16-be-format) sequence start end)
+(defmethod compute-number-of-octets ((format flexi-crlf-utf-16-be-format) string start end)
   (declare #.*fixnum-optimize-settings*)
-  (declare (fixnum start end))
+  (declare (fixnum start end) (string string))
   (let ((sum 0)
         (i start))
     (declare (fixnum i sum))
     (loop
      (when (>= i end)
        (return))
-     (let* ((char-code (char-code (aref sequence i)))
+     (let* ((char-code (char-code (char string i)))
             (char-length (cond ((= char-code #.(char-code #\Newline)) 4)
                                ((< char-code #x10000) 2)
                                (t 4))))
@@ -428,17 +432,39 @@
        (incf i)))
     sum))
 
-(defmethod compute-number-of-octets ((format flexi-utf-32-format) sequence start end)
+(defmethod compute-number-of-octets ((format flexi-utf-32-format) string start end)
   (declare #.*fixnum-optimize-settings*)
   (declare (fixnum start end))
-  (declare (ignore sequence))
+  (declare (ignore string))
   (* 4 (- end start)))
 
-(defmethod compute-number-of-octets ((format flexi-crlf-mixin) sequence start end)
+(defmethod compute-number-of-octets ((format flexi-crlf-mixin) string start end)
   (declare #.*fixnum-optimize-settings*)
-  (declare (fixnum start end))
+  (declare (fixnum start end) (string string))
   (+ (call-next-method)
      (* (case (external-format-name format)
           (:utf-32 4)
           (otherwise 1))
-        (count #\Newline sequence :start start :end end :test #'char=))))
\ No newline at end of file
+        (count #\Newline string :start start :end end :test #'char=))))
+
+(defgeneric character-length (format char)
+  (declare #.*fixnum-optimize-settings*)
+  (:documentation "Returns the number of octets needed to encode the
+single character CHAR.")
+  (:method (format char)
+   (compute-number-of-octets format (string char) 0 1)))
+
+(defmethod character-length :around ((format flexi-crlf-mixin) (char (eql #\Newline)))
+  (declare #.*fixnum-optimize-settings*)
+  (+ (call-next-method format +cr+)
+     (call-next-method format +lf+)))
+
+(defmethod character-length ((format flexi-8-bit-format) char)
+  (declare #.*fixnum-optimize-settings*)
+  (declare (ignore char))
+  1)
+
+(defmethod character-length ((format flexi-utf-32-format) char)
+  (declare #.*fixnum-optimize-settings*)
+  (declare (ignore char))
+  4)
\ No newline at end of file

Modified: branches/edi/mapping.lisp
==============================================================================
--- branches/edi/mapping.lisp	(original)
+++ branches/edi/mapping.lisp	Sun May 25 16:28:25 2008
@@ -1,5 +1,5 @@
 ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/mapping.lisp,v 1.2 2008/05/20 21:15:45 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/mapping.lisp,v 1.3 2008/05/25 19:07:53 edi Exp $
 
 ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz.  All rights reserved.
 
@@ -39,6 +39,12 @@
   #+:lispworks 'lw:simple-char
   #-:lispworks 'character)
 
+(deftype string* ()
+  "Convenience shortcut to paper over the difference between LispWorks
+and the other Lisps."
+  #+:lispworks 'lw:text-string
+  #-:lispworks 'string)
+
 (deftype char-code-integer ()
   "The subtype of integers which can be returned by the function CHAR-CODE."
   '(integer 0 #.(1- char-code-limit)))

Modified: branches/edi/strings.lisp
==============================================================================
--- branches/edi/strings.lisp	(original)
+++ branches/edi/strings.lisp	Sun May 25 16:28:25 2008
@@ -1,5 +1,5 @@
 ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/strings.lisp,v 1.29 2008/05/25 03:34:55 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/strings.lisp,v 1.30 2008/05/25 19:07:53 edi Exp $
 
 ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz.  All rights reserved.
 
@@ -34,7 +34,10 @@
                                 (start 0) (end (length string)))
   "Converts the Lisp string STRING from START to END to an array of
 octets corresponding to the external format designated by
-EXTERNAL-FORMAT."
+EXTERNAL-FORMAT.
+
+In spite of the name, STRING can be any sequence of characters, but
+the function is optimized for strings."
   (declare #.*standard-optimize-settings*)
   (declare (string string))
   (setq external-format (maybe-convert-external-format external-format))
@@ -45,51 +48,22 @@
                                   (external-format :latin1)
                                   (start 0) (end (length sequence)))
   "Converts the Lisp sequence SEQUENCE of octets from START to END to
-a string using the external format designated by EXTERNAL-FORMAT."
+a string using the external format designated by EXTERNAL-FORMAT.
+
+This function is optimized for the case of SEQUENCE being a vector.
+Don't use lists if you're in a hurry."
   (declare #.*standard-optimize-settings*)
   (declare (fixnum start end))
   (setq external-format (maybe-convert-external-format external-format))
-  (let* ((i start)
-         (reader (etypecase sequence
-                   ((array octet *)
-                    (lambda ()
-                      (and (< i end)
-                           (prog1
-                               (aref (the (array octet *) sequence) i)
-                             (incf i)))))
-                   ((array * *)
-                    (lambda ()
-                      (and (< i end)
-                           (prog1
-                               (aref sequence i)
-                             (incf i)))))
-                   (list
-                    (lambda ()
-                      (and (< i end)
-                           (prog1
-                               (nth i sequence)
-                             (incf i))))))))
-    (declare (fixnum i) (dynamic-extent reader))
-    (labels ((pseudo-writer (octet)
-               (declare (ignore octet))
-               (decf i))
-             (unreader (char)
-               (char-to-octets external-format char #'pseudo-writer)))
-      (declare (dynamic-extent (function pseudo-writer) (function unreader)))
-      (let ((*current-unreader* #'unreader))
-        (flet ((next-char ()
-                 (code-char (octets-to-char-code external-format reader))))
-          (declare (inline next-char))
-          (let* ((string-length (compute-number-of-chars external-format sequence start end nil))
-                 (string (make-array string-length :element-type 'char*)))
-            (declare (fixnum string-length))
-            (loop for j of-type fixnum from 0 below string-length
-                  do (setf (schar string j) (next-char))
-                  finally (return string))))))))
+  ;; the external format knows how to do it...
+  (octets-to-string* external-format sequence start end))
 
 (defun octet-length (string &key (external-format :latin1) (start 0) (end (length string)))
   "Returns the length of the substring of STRING from START to END in
-octets if encoded using the external format EXTERNAL-FORMAT."
+octets if encoded using the external format EXTERNAL-FORMAT.
+
+In spite of the name, STRING can be any sequence of characters, but
+the function is optimized for strings."
   (declare #.*standard-optimize-settings*)
   (declare (fixnum start end) (string string))
   (setq external-format (maybe-convert-external-format external-format))
@@ -98,7 +72,10 @@
 (defun char-length (sequence &key (external-format :latin1) (start 0) (end (length sequence)))
   "Kind of the inverse of OCTET-LENGTH.  Returns the length of the
 subsequence \(of octets) of SEQUENCE from START to END in characters
-if decoded using the external format EXTERNAL-FORMAT."
+if decoded using the external format EXTERNAL-FORMAT.
+
+This function is optimized for the case of SEQUENCE being a vector.
+Don't use lists if you're in a hurry."
   (declare #.*standard-optimize-settings*)
   (declare (fixnum start end))
   (setq external-format (maybe-convert-external-format external-format))



More information about the Flexi-streams-cvs mailing list