[flexi-streams-cvs] r53 - in branches/edi: . test

eweitz at common-lisp.net eweitz at common-lisp.net
Sat May 24 23:34:52 UTC 2008


Author: eweitz
Date: Sat May 24 19:34:51 2008
New Revision: 53

Added:
   branches/edi/conditions.lisp
Modified:
   branches/edi/encode.lisp
   branches/edi/output.lisp
   branches/edi/strings.lisp
   branches/edi/test/test.lisp
Log:
Faster encoding - passes all tests on LW


Added: branches/edi/conditions.lisp
==============================================================================
--- (empty file)
+++ branches/edi/conditions.lisp	Sat May 24 19:34:51 2008
@@ -0,0 +1,108 @@
+;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
+;;; $Header: /usr/local/cvsrep/flexi-streams/conditions.lisp,v 1.7 2008/05/21 00:05:42 edi Exp $
+
+;;; Copyright (c) 2005-2008, Dr. Edmund Weitz.  All rights reserved.
+
+;;; Redistribution and use in source and binary forms, with or without
+;;; modification, are permitted provided that the following conditions
+;;; are met:
+
+;;;   * Redistributions of source code must retain the above copyright
+;;;     notice, this list of conditions and the following disclaimer.
+
+;;;   * Redistributions in binary form must reproduce the above
+;;;     copyright notice, this list of conditions and the following
+;;;     disclaimer in the documentation and/or other materials
+;;;     provided with the distribution.
+
+;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED
+;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+;;; ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
+;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+(in-package :flexi-streams)
+
+(define-condition flexi-stream-error (stream-error)
+  ()
+  (:documentation "Superclass for all errors related to flexi
+streams."))
+
+(define-condition flexi-stream-simple-error (flexi-stream-error simple-condition)
+  ()
+  (:documentation "Like FLEXI-STREAM-ERROR but with formatting
+capabilities."))
+
+(define-condition flexi-stream-element-type-error (flexi-stream-error)
+  ((element-type :initarg :element-type
+                 :reader flexi-stream-element-type-error-element-type))
+  (:report (lambda (condition stream)
+             (format stream "Element type ~S not allowed."
+                     (flexi-stream-element-type-error-element-type condition))))
+  (:documentation "Errors of this type are signalled if the flexi
+stream has a wrong element type."))
+
+(define-condition flexi-stream-out-of-sync-error (flexi-stream-error)
+  ()
+  (:report (lambda (condition stream)
+             (declare (ignore condition))
+             (format stream "Stream out of sync from previous
+lookahead, couldn't rewind.")))
+  (:documentation "This can happen if you're trying to write to an IO
+stream which had prior to that `looked ahead' while reading and now
+can't `rewind' to the octet where you /should/ be."))
+
+(define-condition in-memory-stream-error (stream-error)
+  ()
+  (:documentation "Superclass for all errors related to
+IN-MEMORY streams."))
+
+(define-condition in-memory-stream-simple-error (in-memory-stream-error simple-condition)
+  ()
+  (:documentation "Like IN-MEMORY-STREAM-ERROR but with formatting
+capabilities."))
+
+(define-condition in-memory-stream-closed-error (in-memory-stream-error)
+  ()
+  (:report (lambda (condition stream)
+             (format stream "~S is closed."
+                     (stream-error-stream condition))))
+  (:documentation "An error that is signalled when someone is trying
+to read from or write to a closed IN-MEMORY stream."))
+
+(define-condition in-memory-stream-position-spec-error (in-memory-stream-simple-error)
+  ((position-spec :initarg :position-spec
+                  :reader in-memory-stream-position-spec-error-position-spec))
+  (:documentation "Errors of this type are signalled if an erroneous
+position spec is used in conjunction with FILE-POSITION."))
+
+(define-condition external-format-error ()
+  ((external-format :initarg :external-format
+                    :initform nil
+                    :reader external-format-error-external-format))
+  (:documentation "Superclass for all errors related to external
+formats."))
+
+(define-condition external-format-simple-error (external-format-error simple-condition)
+  ()
+  (:documentation "Like EXTERNAL-FORMAT-ERROR but with formatting
+capabilities."))
+  
+(define-condition external-format-encoding-error (external-format-simple-error)
+  ()
+  (:documentation "Errors of this type are signalled if there is an
+encoding problem."))
+
+(defun signal-encoding-error (external-format format-control &rest format-args)
+  "Convenience function similar to ERROR to signal conditions of type
+EXTERNAL-FORMAT-ENCODING-ERROR."
+  (error 'external-format-encoding-error
+         :format-control format-control
+         :format-arguments format-args
+         :external-format external-format))

Modified: branches/edi/encode.lisp
==============================================================================
--- branches/edi/encode.lisp	(original)
+++ branches/edi/encode.lisp	Sat May 24 19:34:51 2008
@@ -1,5 +1,5 @@
 ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/encode.lisp,v 1.12 2008/05/20 23:01:50 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/encode.lisp,v 1.16 2008/05/24 23:27:23 edi Exp $
 
 ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz.  All rights reserved.
 
@@ -29,6 +29,125 @@
 
 (in-package :flexi-streams)
 
+(defgeneric compute-number-of-octets (format sequence start end)
+  (declare #.*standard-optimize-settings*)
+  (:documentation "Computes the exact number of octets required to
+encode the sequence of characters in SEQUENCE from START to END using
+the external format FORMAT."))
+
+(defmethod compute-number-of-octets ((format flexi-8-bit-format) sequence start end)
+  (declare #.*fixnum-optimize-settings*)
+  (declare (fixnum start end))
+  (declare (ignore sequence))  
+  (- end start))
+
+(defmethod compute-number-of-octets ((format flexi-utf-8-format) sequence start end)
+  (declare #.*fixnum-optimize-settings*)
+  (declare (fixnum start end))
+  (let ((sum 0)
+        (i start))
+    (declare (fixnum i sum))
+    (loop
+     (when (>= i end)
+       (return))
+     (let* ((char-code (char-code (aref sequence i)))
+            (char-length (cond ((< char-code #x80) 1)
+                               ((< char-code #x800) 2)
+                               ((< char-code #x10000) 3)
+                               (t 4))))
+       (declare (fixnum char-length) (char-code-integer char-code))
+       (incf sum char-length)
+       (incf i)))
+    sum))
+
+(defmethod compute-number-of-octets ((format flexi-crlf-utf-8-format) sequence start end)
+  (declare #.*fixnum-optimize-settings*)
+  (declare (fixnum start end))
+  (let ((sum 0)
+        (i start))
+    (declare (fixnum i sum))
+    (loop
+     (when (>= i end)
+       (return))
+     (let* ((char-code (char-code (aref sequence i)))
+            (char-length (cond ((= char-code #.(char-code #\Newline)) 2)
+                               ((< char-code #x80) 1)
+                               ((< char-code #x800) 2)
+                               ((< char-code #x10000) 3)
+                               (t 4))))
+       (declare (fixnum char-length) (char-code-integer char-code))
+       (incf sum char-length)
+       (incf i)))
+    sum))
+
+(defmethod compute-number-of-octets ((format flexi-utf-16-format) sequence start end)
+  (declare #.*fixnum-optimize-settings*)
+  (declare (fixnum start end))
+  (let ((sum 0)
+        (i start))
+    (declare (fixnum i sum))
+    (loop
+     (when (>= i end)
+       (return))
+     (let* ((char-code (char-code (aref sequence i)))
+            (char-length (cond ((< char-code #x10000) 2)
+                               (t 4))))
+       (declare (fixnum char-length) (char-code-integer char-code))
+       (incf sum char-length)
+       (incf i)))
+    sum))
+
+(defmethod compute-number-of-octets ((format flexi-crlf-utf-16-le-format) sequence start end)
+  (declare #.*fixnum-optimize-settings*)
+  (declare (fixnum start end))
+  (let ((sum 0)
+        (i start))
+    (declare (fixnum i sum))
+    (loop
+     (when (>= i end)
+       (return))
+     (let* ((char-code (char-code (aref sequence i)))
+            (char-length (cond ((= char-code #.(char-code #\Newline)) 4)
+                               ((< char-code #x10000) 2)
+                               (t 4))))
+       (declare (fixnum char-length) (char-code-integer char-code))
+       (incf sum char-length)
+       (incf i)))
+    sum))
+
+(defmethod compute-number-of-octets ((format flexi-crlf-utf-16-be-format) sequence start end)
+  (declare #.*fixnum-optimize-settings*)
+  (declare (fixnum start end))
+  (let ((sum 0)
+        (i start))
+    (declare (fixnum i sum))
+    (loop
+     (when (>= i end)
+       (return))
+     (let* ((char-code (char-code (aref sequence i)))
+            (char-length (cond ((= char-code #.(char-code #\Newline)) 4)
+                               ((< char-code #x10000) 2)
+                               (t 4))))
+       (declare (fixnum char-length) (char-code-integer char-code))
+       (incf sum char-length)
+       (incf i)))
+    sum))
+
+(defmethod compute-number-of-octets ((format flexi-utf-32-format) sequence start end)
+  (declare #.*fixnum-optimize-settings*)
+  (declare (fixnum start end))
+  (declare (ignore sequence))
+  (* 4 (- end start)))
+
+(defmethod compute-number-of-octets ((format flexi-crlf-mixin) sequence start end)
+  (declare #.*fixnum-optimize-settings*)
+  (declare (fixnum start end))
+  (+ (call-next-method)
+     (* (case (external-format-name format)
+          (:utf-32 4)
+          (otherwise 1))
+        (count #\Newline sequence :start start :end end :test #'char=))))
+
 (defgeneric char-to-octets (format char writer)
   (declare #.*standard-optimize-settings*)
   (:documentation "Converts the character CHAR to a sequence of octets
@@ -37,72 +156,188 @@
 repeatedly each octet.  The return value of this function is
 unspecified."))
 
-(defmethod char-to-octets ((format flexi-latin-1-format) char writer)
-  (declare #.*fixnum-optimize-settings*)
-  (declare (character char) (function writer))
-  (let ((octet (char-code char)))
+(defgeneric write-sequence* (format stream sequence start end)
+  (declare #.*standard-optimize-settings*)
+  (:documentation "A generic function which dispatches on the external
+format and does the real work for STREAM-WRITE-SEQUENCE."))
+
+(defgeneric string-to-octets* (format string start end)
+  (declare #.*standard-optimize-settings*)
+  (:documentation "A generic function which dispatches on the external
+format and does the real work for STRING-TO-OCTETS."))
+
+(defmacro define-sequence-writers ((format-class) &body body)
+  "Utility macro which defines methods for WRITE-SEQUENCE* and
+STRING-TO-OCTET* for the class FORMAT-CLASS.  For BODY see the
+docstring of DEFINE-CHAR-ENCODERS."
+  `(progn
+     (defmethod write-sequence* ((format ,format-class) stream sequence start end)
+       (declare #.*standard-optimize-settings*)
+       (declare (fixnum start end))
+       (with-accessors ((column flexi-stream-column))
+           stream
+         (let* ((octet-seen-p nil)
+                (buffer-pos 0)
+                ;; estimate should be good enough...
+                (factor (encoding-factor format))
+                ;; we don't want arbitrarily large buffer, do we?
+                (buffer-size (min +buffer-size+ (ceiling (* factor (- end start)))))
+                (buffer (make-octet-buffer buffer-size)))
+           (declare (fixnum buffer-pos buffer-size)
+                    (boolean octet-seen-p)
+                    (type (array octet *) buffer))
+           (macrolet ((octet-writer (form)
+                        `(write-octet ,form)))
+             (labels ((flush-buffer ()
+                        "Sends all octets in BUFFER to the underlying stream."
+                        (write-sequence buffer stream :end buffer-pos)
+                        (setq buffer-pos 0))
+                      (write-octet (octet)
+                        "Adds one octet to the buffer and flushes it if necessary."
+                        (declare (octet octet))
+                        (when (>= buffer-pos buffer-size)
+                          (flush-buffer))
+                        (setf (aref buffer buffer-pos) octet)
+                        (incf buffer-pos))
+                      (write-object (object)
+                        "Dispatches to WRITE-OCTET or WRITE-CHARACTER
+depending on the type of OBJECT."
+                        (etypecase object
+                          (octet (setq octet-seen-p t)
+                                 (write-octet object))
+                          (character (symbol-macrolet ((char-getter object))
+                                       , at body)))))
+               (macrolet ((iterate (&body output-forms)
+                            "An unhygienic macro to implement the actual
+iteration through SEQUENCE.  OUTPUT-FORM is the form to retrieve one
+sequence element and put its octet representation into the buffer."
+                            `(loop for index of-type fixnum from start below end
+                                   do (progn , at output-forms)
+                                   finally (when (plusp buffer-pos)
+                                             (flush-buffer)))))
+                 (etypecase sequence
+                   (string (iterate
+                            (symbol-macrolet ((char-getter (char sequence index)))
+                              , at body)))
+                   (array (iterate
+                           (symbol-macrolet ((char-getter (aref sequence index)))
+                             , at body)))
+                   (list  (iterate (write-object (nth index sequence))))))
+               ;; update the column slot, setting it to NIL if we sent
+               ;; octets
+               (setq column
+                     (cond (octet-seen-p nil)
+                           (t (let ((last-newline-pos (position #\Newline sequence
+                                                                :test #'char=
+                                                                :start start
+                                                                :end end
+                                                                :from-end t)))
+                                (cond (last-newline-pos (- end last-newline-pos 1))
+                                      (column (+ column (- end start)))))))))))))  
+     (defmethod string-to-octets* ((format ,format-class) string start end)
+       (declare #.*standard-optimize-settings*)
+       (declare (fixnum start end) (string string))
+       (let ((octets (make-array (compute-number-of-octets format string start end)
+                                 :element-type 'octet))
+             (j 0))
+         (declare (fixnum j))
+         (loop for i of-type fixnum from start below end do
+               (macrolet ((octet-writer (form)
+                            `(progn
+                               (setf (aref (the (array octet *) octets) j) ,form)
+                               (incf j))))
+                 (symbol-macrolet ((char-getter (char string i)))
+                   (progn , at body))))
+         octets))))
+
+;; char-getter can be called more than once - no side effects
+(defmacro define-char-encoders ((format-class cr-format-class crlf-format-class) &body body)
+  "Utility macro which defines several encoding-related methods for
+the classes FORMAT-CLASS, CR-FORMAT-CLASS, and CRLF-FORMAT-CLASS where
+it is assumed that CR-FORMAT-CLASS is the same encoding as
+FORMAT-CLASS but with CR line endings and similar for
+CRLF-FORMAT-CLASS.  BODY is a code template for the code to convert
+one character to octets.  BODY must contain a symbol CHAR-GETTER
+representing the form which is used to obtain the character and a
+forms like \(OCTET-WRITE <thing>) to write the octet <thing>.  The
+CHAR-GETTER form might be called more than once."
+  (let ((body `((locally
+                  (declare #.*fixnum-optimize-settings*)
+                  , at body))))
+    `(progn
+       (defmethod char-to-octets ((format ,format-class) char writer)
+         (declare (character char) (function writer))
+         (symbol-macrolet ((char-getter char))
+           (macrolet ((octet-writer (form)
+                        `(funcall writer ,form)))
+             , at body)))
+       (define-sequence-writers (,format-class) , at body)
+       (define-sequence-writers (,cr-format-class)
+         ,@(sublis `((char-getter . ,(with-unique-names (char)
+                                       `(let ((,char char-getter))
+                                          (declare (character ,char))
+                                          (if (char= ,char #\Newline)
+                                            #\Return
+                                            ,char)))))
+                   body))
+       (define-sequence-writers (,crlf-format-class)
+         ,(with-unique-names (char write-char)
+            `(flet ((,write-char (,char)
+                      ,@(sublis `((char-getter . ,char)) body)))
+               (let ((,char char-getter))
+                 (declare (character ,char))
+                 (cond ((char= ,char #\Newline)
+                        (,write-char #\Return)
+                        (,write-char #\Newline))
+                       (t (,write-char ,char))))))))))
+
+(define-char-encoders (flexi-latin-1-format flexi-cr-latin-1-format  flexi-crlf-latin-1-format)
+  (let ((octet (char-code char-getter)))
     (when (> octet 255)
-      (signal-encoding-error format "~S (code ~A) is not a LATIN-1 character." char octet))
-    (funcall writer octet)))
+      (signal-encoding-error format "~S (code ~A) is not a LATIN-1 character." char-getter octet))
+    (octet-writer octet)))
 
-(defmethod char-to-octets ((format flexi-ascii-format) char writer)
-  (declare #.*fixnum-optimize-settings*)
-  (declare (character char) (function writer))
-  (let ((octet (char-code char)))
+(define-char-encoders (flexi-ascii-format flexi-cr-ascii-format flexi-crlf-ascii-format)
+  (let ((octet (char-code char-getter)))
     (when (> octet 127)
-      (signal-encoding-error format "~S (code ~A) is not an ASCII character." char octet))
-    (funcall writer octet)))
+      (signal-encoding-error format "~S (code ~A) is not an ASCII character." char-getter octet))
+    (octet-writer octet)))
 
-(defmethod char-to-octets ((format flexi-8-bit-format) char writer)
-  (declare #.*fixnum-optimize-settings*)
-  (declare (character char) (function writer))
+(define-char-encoders (flexi-8-bit-format flexi-cr-8-bit-format flexi-crlf-8-bit-format)
   (with-accessors ((encoding-hash external-format-encoding-hash))
       format
-    (let ((octet (gethash (char-code char) encoding-hash)))
+    (let ((octet (gethash (char-code char-getter) encoding-hash)))
       (unless octet
-        (signal-encoding-error format "~S (code ~A) is not in this encoding." char octet))
-      (funcall writer octet))))
+        (signal-encoding-error format "~S (code ~A) is not in this encoding." char-getter octet))
+      (octet-writer octet))))
 
-(defmethod char-to-octets ((format flexi-utf-8-format) char writer)
-  (declare #.*fixnum-optimize-settings*)
-  (declare (character char) (function writer))
-  (let ((char-code (char-code char)))
+(define-char-encoders (flexi-utf-8-format flexi-cr-utf-8-format flexi-crlf-utf-8-format)
+  (let ((char-code (char-code char-getter)))
     (tagbody
      (cond ((< char-code #x80)
-            (funcall writer char-code)
+            (octet-writer char-code)
             (go zero))
            ((< char-code #x800)
-            (funcall writer (logior #b11000000 (ldb (byte 5 6) char-code)))
+            (octet-writer (logior #b11000000 (ldb (byte 5 6) char-code)))
             (go one))
            ((< char-code #x10000)
-            (funcall writer (logior #b11100000 (ldb (byte 4 12) char-code)))
+            (octet-writer (logior #b11100000 (ldb (byte 4 12) char-code)))
             (go two))
-           ((< char-code #x200000)
-            (funcall writer (logior #b11110000 (ldb (byte 3 18) char-code)))
-            (go three))
-           ((< char-code #x4000000)
-            (funcall writer (logior #b11111000 (ldb (byte 2 24) char-code)))
-            (go four))
-           (t (funcall writer (if (logbitp 30 char-code) #b11111101 #b11111100))))
-     (funcall writer (logior #b10000000 (ldb (byte 6 24) char-code)))
-     four
-     (funcall writer (logior #b10000000 (ldb (byte 6 18) char-code)))
-     three
-     (funcall writer (logior #b10000000 (ldb (byte 6 12) char-code)))
+           (t
+            (octet-writer (logior #b11110000 (ldb (byte 3 18) char-code)))))
+     (octet-writer (logior #b10000000 (ldb (byte 6 12) char-code)))
      two
-     (funcall writer (logior #b10000000 (ldb (byte 6 6) char-code)))
+     (octet-writer (logior #b10000000 (ldb (byte 6 6) char-code)))
      one
-     (funcall writer (logior #b10000000 (ldb (byte 6 0) char-code)))
+     (octet-writer (logior #b10000000 (ldb (byte 6 0) char-code)))
      zero)))
 
-(defmethod char-to-octets ((format flexi-utf-16-le-format) char writer)
-  (declare #.*fixnum-optimize-settings*)
-  (declare (character char) (function writer))
+(define-char-encoders (flexi-utf-16-le-format flexi-cr-utf-16-le-format flexi-crlf-utf-16-le-format)
   (flet ((write-word (word)
-           (funcall writer (ldb (byte 8 0) word))
-           (funcall writer (ldb (byte 8 8) word))))
+           (octet-writer (ldb (byte 8 0) word))
+           (octet-writer (ldb (byte 8 8) word))))
     (declare (inline write-word))
-    (let ((char-code (char-code char)))
+    (let ((char-code (char-code char-getter)))
       (declare (type char-code-integer char-code))
       (cond ((< char-code #x10000)
              (write-word char-code))
@@ -110,14 +345,12 @@
                (write-word (logior #xd800 (ldb (byte 10 10) char-code)))
                (write-word (logior #xdc00 (ldb (byte 10 0) char-code))))))))
 
-(defmethod char-to-octets ((format flexi-utf-16-be-format) char writer)
-  (declare #.*fixnum-optimize-settings*)
-  (declare (character char) (function writer))
+(define-char-encoders (flexi-utf-16-be-format flexi-cr-utf-16-be-format flexi-crlf-utf-16-be-format)
   (flet ((write-word (word)
-           (funcall writer (ldb (byte 8 8) word))
-           (funcall writer (ldb (byte 8 0) word))))
+           (octet-writer (ldb (byte 8 8) word))
+           (octet-writer (ldb (byte 8 0) word))))
     (declare (inline write-word))
-    (let ((char-code (char-code char)))
+    (let ((char-code (char-code char-getter)))
       (declare (type char-code-integer char-code))
       (cond ((< char-code #x10000)
              (write-word char-code))
@@ -125,23 +358,19 @@
                (write-word (logior #xd800 (ldb (byte 10 10) char-code)))
                (write-word (logior #xdc00 (ldb (byte 10 0) char-code))))))))
 
-(defmethod char-to-octets ((format flexi-utf-32-le-format) char writer)
-  (declare #.*fixnum-optimize-settings*)
-  (declare (character char) (function writer))
-  (let ((char-code (char-code char)))
-    (funcall writer (ldb (byte 8 0) char-code))
-    (funcall writer (ldb (byte 8 8) char-code))
-    (funcall writer (ldb (byte 8 16) char-code))
-    (funcall writer (ldb (byte 8 24) char-code))))
-
-(defmethod char-to-octets ((format flexi-utf-32-be-format) char writer)
-  (declare #.*fixnum-optimize-settings*)
-  (declare (character char) (function writer))
-  (let ((char-code (char-code char)))
-    (funcall writer (ldb (byte 8 24) char-code))
-    (funcall writer (ldb (byte 8 16) char-code))
-    (funcall writer (ldb (byte 8 8) char-code))
-    (funcall writer (ldb (byte 8 0) char-code))))
+(define-char-encoders (flexi-utf-32-le-format flexi-cr-utf-32-le-format flexi-crlf-utf-32-le-format)
+  (let ((char-code (char-code char-getter)))
+    (octet-writer (ldb (byte 8 0) char-code))
+    (octet-writer (ldb (byte 8 8) char-code))
+    (octet-writer (ldb (byte 8 16) char-code))
+    (octet-writer (ldb (byte 8 24) char-code))))
+
+(define-char-encoders (flexi-utf-32-be-format flexi-cr-utf-32-be-format flexi-crlf-utf-32-be-format)
+  (let ((char-code (char-code char-getter)))
+    (octet-writer (ldb (byte 8 24) char-code))
+    (octet-writer (ldb (byte 8 16) char-code))
+    (octet-writer (ldb (byte 8 8) char-code))
+    (octet-writer (ldb (byte 8 0) char-code))))
 
 (defmethod char-to-octets ((format flexi-cr-mixin) char writer)
   (declare #.*fixnum-optimize-settings*)

Modified: branches/edi/output.lisp
==============================================================================
--- branches/edi/output.lisp	(original)
+++ branches/edi/output.lisp	Sat May 24 19:34:51 2008
@@ -1,5 +1,5 @@
 ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/output.lisp,v 1.63 2008/05/23 14:43:09 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/output.lisp,v 1.65 2008/05/24 23:15:25 edi Exp $
 
 ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz.  All rights reserved.
 
@@ -129,7 +129,7 @@
   ;; needed for AllegroCL - grrr...
   (stream-write-char stream #\Newline))
 
-(defmethod stream-write-sequence ((stream flexi-output-stream) sequence start end &key)
+(defmethod stream-write-sequence ((flexi-output-stream flexi-output-stream) sequence start end &key)
   "An optimized version which uses a buffer underneath.  The function
 can accepts characters as well as octets and it decides what to do
 based on the element type of the sequence \(if possible) or on the
@@ -141,7 +141,7 @@
   (with-accessors ((column flexi-stream-column)
                    (external-format flexi-stream-external-format)
                    (stream flexi-stream-stream))
-      stream
+      flexi-output-stream
     (when (>= start end)
       (return-from stream-write-sequence sequence))
     (when (and (vectorp sequence)
@@ -151,59 +151,8 @@
       (setq column nil)
       (return-from stream-write-sequence
         (write-sequence sequence stream :start start :end end)))
-    (let* ((octet-seen-p nil)
-           (buffer-pos 0)
-           (factor (encoding-factor external-format))
-           (buffer-size (min +buffer-size+ (ceiling (* factor (- end start)))))
-           (buffer (make-octet-buffer buffer-size)))
-      (declare (fixnum buffer-pos buffer-size)
-               (boolean octet-seen-p)
-               (type (array octet *) buffer))
-      (labels ((flush-buffer ()
-                 "Sends all octets in BUFFER to the underlying stream."
-                 (write-sequence buffer stream :end buffer-pos)
-                 (setq buffer-pos 0))
-               (write-octet (octet)
-                 "Adds one octet to the buffer and flush it if necessary."
-                 (declare (octet octet))
-                 (when (>= buffer-pos buffer-size)
-                   (flush-buffer))
-                 (setf (aref buffer buffer-pos) octet)
-                 (incf buffer-pos))
-               (write-character (char)
-                 "Adds the octets representing the character CHAR to the buffer."
-                 (char-to-octets external-format char #'write-octet))
-               (write-object (object)
-                 "Dispatches to WRITE-OCTET or WRITE-CHARACTER
-depending on the type of OBJECT."
-                 (etypecase object
-                   (octet (setq octet-seen-p t)
-                          (write-octet object))
-                   (character (write-character object)))))
-        (declare (dynamic-extent (function write-octet)))
-        (macrolet ((iterate (output-form)
-                     "An unhygienic macro to implement the actual
-iteration through SEQUENCE.  OUTPUT-FORM is the form to retrieve one
-sequence element and put its octet representation into the buffer."
-                     `(loop for index of-type fixnum from start below end
-                            do ,output-form
-                            finally (when (plusp buffer-pos)
-                                      (flush-buffer)))))
-          (etypecase sequence
-            (string (iterate (write-character (char sequence index))))
-            (array (iterate (write-object (aref sequence index))))
-            (list (iterate (write-object (nth index sequence)))))
-          ;; update the column slot, setting it to NIL if we sent
-          ;; octets
-          (setq column
-                (cond (octet-seen-p nil)
-                      (t (let ((last-newline-pos (position #\Newline sequence
-                                                           :test #'char=
-                                                           :start start
-                                                           :end end
-                                                           :from-end t)))
-                           (cond (last-newline-pos (- end last-newline-pos 1))
-                                 (column (+ column (- end start))))))))))))          
+    ;; otherwise hand over to the external format to do the work
+    (write-sequence* external-format flexi-output-stream sequence start end))
   sequence)
 
 (defmethod stream-write-string ((stream flexi-output-stream) string

Modified: branches/edi/strings.lisp
==============================================================================
--- branches/edi/strings.lisp	(original)
+++ branches/edi/strings.lisp	Sat May 24 19:34:51 2008
@@ -1,5 +1,5 @@
 ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/strings.lisp,v 1.22 2008/05/21 01:43:43 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/strings.lisp,v 1.24 2008/05/24 23:15:25 edi Exp $
 
 ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz.  All rights reserved.
 
@@ -36,56 +36,10 @@
 octets corresponding to the external format designated by
 EXTERNAL-FORMAT."
   (declare #.*standard-optimize-settings*)
-  (declare (fixnum start end) (string string))
+  (declare (string string))
   (setq external-format (maybe-convert-external-format external-format))
-  (let ((factor (encoding-factor external-format))
-        (length (- end start)))
-    (declare (fixnum length))
-    (etypecase factor
-      (integer
-       (let ((octets (make-array (* factor length) :element-type 'octet))
-             (j 0))
-         (declare (fixnum j))
-         (flet ((writer (octet)
-                  (declare (octet octet))
-                  (setf (aref (the (array octet *) octets) j) octet)
-                  (incf j)))
-           (declare (dynamic-extent (function writer)))
-           (loop for i of-type fixnum from start below end do
-                 (char-to-octets external-format
-                                 (char string i)
-                                 #'writer)))
-         octets))
-      (double-float
-       ;; this is a bit clunky but hopefully a bit more efficient than
-       ;; using VECTOR-PUSH-EXTEND
-       (let* ((octets-length (ceiling (* factor length)))
-              (octets (make-array octets-length
-                                  :element-type 'octet
-                                  :fill-pointer t
-                                  :adjustable t))
-              (i start)
-              (j 0))
-         (declare (fixnum i j octets-length)
-                  (double-float factor))
-         (flet ((writer (octet)
-                  (declare (octet octet))
-                  (when (>= j octets-length)
-                    (setq factor (* factor 2.0d0))
-                    (incf octets-length (the fixnum (ceiling (* factor (- end i)))))
-                    (adjust-array octets octets-length :fill-pointer t))
-                  (setf (aref (the (array octet *) octets) j) octet)
-                  (incf j)))
-           (declare (dynamic-extent (function writer)))
-           (loop
-            (when (>= i end)
-              (return))
-            (char-to-octets external-format
-                            (char string i)
-                            #'writer)
-            (incf i))
-           (setf (fill-pointer octets) j)
-           octets))))))
+  ;; the external format knows how to do it...
+  (string-to-octets* external-format string start end))
 
 (defun octets-to-string (sequence &key
                                   (external-format :latin1)

Modified: branches/edi/test/test.lisp
==============================================================================
--- branches/edi/test/test.lisp	(original)
+++ branches/edi/test/test.lisp	Sat May 24 19:34:51 2008
@@ -1,5 +1,5 @@
 ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS-TEST; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/test/test.lisp,v 1.31 2008/05/20 23:01:53 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/test/test.lisp,v 1.32 2008/05/21 17:51:42 edi Exp $
 
 ;;; Copyright (c) 2006-2008, Dr. Edmund Weitz.  All rights reserved.
 
@@ -329,10 +329,11 @@
        (loop for i below (length seq1)
              always (eql (elt seq1 i) (elt seq2 i)))))
 
-(defun read-sequence-test (pathspec external-format)
-  "Several tests to confirm that READ-SEQUENCE behaves as expected."
-  (with-test ((format nil "READ-SEQUENCE tests with format ~S."
-                      (flex::normalize-external-format external-format)))
+(defun sequence-test (pathspec external-format)
+  "Several tests to confirm that READ-SEQUENCE and WRITE-SEQUENCE
+behave as expected."
+  (with-test ((format nil "Sequence tests with format ~S and file ~A."
+                      (flex::normalize-external-format external-format) pathspec))
     (let* ((full-path (merge-pathnames pathspec *this-file*))
            (file-string (file-as-string full-path external-format))
            (string-length (length file-string))
@@ -397,7 +398,33 @@
           (check (sequence-equal array (subseq file-string 25 (- string-length 25))))
           (check (sequence-equal (loop repeat 25
                                        collect (read-char in))
-                                 (subseq file-string (- string-length 25)))))))))
+                                 (subseq file-string (- string-length 25))))))
+      (let ((path-out (ensure-directories-exist (merge-pathnames pathspec *tmp-dir*))))
+        (with-open-file (out path-out
+                             :direction :output
+                             :if-exists :supersede
+                             :element-type 'octet)
+          (let ((out (make-flexi-stream out :external-format external-format)))
+            (write-sequence octets out)))
+        (check (file-equal full-path path-out))
+        (with-open-file (out path-out
+                             :direction :output
+                             :if-exists :supersede
+                             :element-type 'octet)
+          (let ((out (make-flexi-stream out :external-format external-format)))
+            (write-sequence file-string out)))
+        (check (file-equal full-path path-out))
+        (with-open-file (out path-out
+                             :direction :output
+                             :if-exists :supersede
+                             :element-type 'octet)
+          (let ((out (make-flexi-stream out :external-format external-format)))
+            (write-sequence file-string out :end 100)
+            (write-sequence octets out
+                            :start (length (string-to-octets file-string
+                                                             :external-format external-format
+                                                             :end 100)))))
+        (check (file-equal full-path path-out))))))
 
 (defmacro using-values ((&rest values) &body body)
   "Executes BODY and feeds an element from VALUES to the USE-VALUE
@@ -544,7 +571,7 @@
                                               nconc (create-test-combinations file-name symbols t))))
       (incf no-tests (length read-sequence-test-args-list))
       (dolist (args read-sequence-test-args-list)
-        (apply 'read-sequence-test args)))
+        (apply 'sequence-test args)))
     (incf no-tests)
     (error-handling-test)
     (incf no-tests)



More information about the Flexi-streams-cvs mailing list