[flexi-streams-cvs] r4 - branches/hans

hhubner at common-lisp.net hhubner at common-lisp.net
Thu May 1 06:31:48 UTC 2008


Author: hhubner
Date: Thu May  1 02:31:46 2008
New Revision: 4

Modified:
   branches/hans/input.lisp
   branches/hans/stream.lisp
   branches/hans/strings.lisp
Log:
commit first set of changes to speed up octets-to-string

Modified: branches/hans/input.lisp
==============================================================================
--- branches/hans/input.lisp	(original)
+++ branches/hans/input.lisp	Thu May  1 02:31:46 2008
@@ -242,34 +242,78 @@
        (decf position)
        (push #.(char-code #\Return) octet-stack)))))
 
+(defun buffer-code-char (buffer char-code)
+  "Given a BUFFER, which is assumed to be a
+   to-string-conversion-buffer (see strings.lisp) and a character
+   code, convert to a character and perform newline processing for the
+   stream if the character is a #\Return.  This code basically repeats
+   what the stream-read-char ((stream flexi-cr-mixin)) does, but it
+   does so in an optimized manner to make octet->string conversion
+   faster."
+  (declare (optimize speed (safety 0))
+           (type to-string-conversion-buffer buffer)
+           (type fixnum char-code))
+  (let ((char (code-char char-code)))
+    (if (eql char #\Return)
+        (case (tscb-eol-style buffer)
+          (:cr
+           #\Newline)
+          (:crlf
+           (cond
+             ((= (tscb-position buffer) (tscb-end buffer))
+              :eof)
+             ((eql #.(char-code #\Newline) (aref (tscb-vector buffer) (tscb-position buffer)))
+              (incf (tscb-position buffer))
+              #\Newline)
+             (t
+              #\Return)))
+          (t
+           #\Return))
+        char)))
+(declaim (inline buffer-code-char))
+
 (defmacro define-char-reader ((stream-var stream-class) &body body)
-  "Helper macro to define methods for STREAM-READ-CHAR.  Defines a
-method for the class STREAM-CLASS using the variable STREAM-VAR and
-the code body BODY wrapped with some standard code common to all
-methods defined here.  The return value of BODY is a character code.
-In case of encoding problems, BODY must return the value returned by
-\(RECOVER-FROM-ENCODING-ERROR ...)."
-  (with-unique-names (char-code body-fn)
-    `(defmethod stream-read-char ((,stream-var ,stream-class))
-       "This method was generated with the DEFINE-CHAR-READER macro."
-       (declare (optimize speed))
-       ;; note that we do nothing for the :LF EOL style because we
-       ;; assume that #\Newline is the same as #\Linefeed in all
-       ;; Lisps which will use this library
-       (with-accessors ((last-octet flexi-stream-last-octet)
-                        (last-char-code flexi-stream-last-char-code))
-           ,stream-var
-         ;; set LAST-OCTET slot to NIL because we can't UNREAD-BYTE after
-         ;; this operation
-         (setq last-octet nil)
-         (let ((,char-code
-                (flet ((,body-fn () , at body))
-                  (declare (inline ,body-fn) (dynamic-extent (function ,body-fn)))
-                  (,body-fn))))
-           ;; remember this character and the current external format
-           ;; for UNREAD-CHAR
-           (setq last-char-code ,char-code)
-           (or (code-char ,char-code) ,char-code))))))
+  "Helper macro to define methods for STREAM-READ-CHAR and
+BUFFER-READ-CHAR.  Defines a method for the class STREAM-CLASS using
+the variable STREAM-VAR and the code body BODY wrapped with some
+standard code common to all methods defined here.  The return value of
+BODY is a character code.  In case of encoding problems, BODY must
+return the value returned by \(RECOVER-FROM-ENCODING-ERROR ...).  In
+addition, a method on BUFFER-READ-CHAR is defined with the first
+argument being the buffer, the second argument a STREAM-CLASS instance
+used only for dispatching.  The BUFFER-READ-CHAR generic function is
+used to shortcut through the streams mechanic from the
+OCTETS-TO-STRING function."
+  (with-unique-names (char-code body-fn dummy-stream)
+    (let ((body body))
+      `(progn
+         (defmethod stream-read-char ((,stream-var ,stream-class))
+           "This method was generated with the DEFINE-CHAR-READER macro."
+           (declare (optimize speed))
+           ;; note that we do nothing for the :LF EOL style because we
+           ;; assume that #\Newline is the same as #\Linefeed in all
+           ;; Lisps which will use this library
+           (with-accessors ((last-octet flexi-stream-last-octet)
+                            (last-char-code flexi-stream-last-char-code))
+               ,stream-var
+             ;; set LAST-OCTET slot to NIL because we can't UNREAD-BYTE after
+             ;; this operation
+             (setq last-octet nil)
+             (let ((,char-code
+                    (flet ((,body-fn () , at body))
+                      (declare (inline ,body-fn) (dynamic-extent (function ,body-fn)))
+                      (,body-fn))))
+               ;; remember this character and the current external format
+               ;; for UNREAD-CHAR
+               (setq last-char-code ,char-code)
+               (or (code-char ,char-code) ,char-code))))
+         (defmethod buffer-read-char (,stream-var (,dummy-stream ,stream-class))
+           (declare (optimize speed))
+           (declare (ignore ,dummy-stream)) ; used only for dispatch
+           (block stream-read-char ;; for RETURN-FROM in BODY
+             (let ((,char-code (progn , at body)))
+               (declare (type fixnum ,char-code))
+               (or (buffer-code-char ,stream-var ,char-code) ,char-code))))))))
 
 (defun recover-from-encoding-error (flexi-stream format-control &rest format-args)
   "Helper function used by the STREAM-READ-CHAR methods below to deal
@@ -582,4 +626,4 @@
                     (t (= octet peek-type)))
         finally (unless (eql octet eof-value)
                   (unread-byte octet flexi-input-stream))
-                (return octet)))
\ No newline at end of file
+                (return octet)))

Modified: branches/hans/stream.lisp
==============================================================================
--- branches/hans/stream.lisp	(original)
+++ branches/hans/stream.lisp	Thu May  1 02:31:46 2008
@@ -509,46 +509,50 @@
   ;; <http://thread.gmane.org/gmane.lisp.lispworks.general/6269>
   (set-class stream))
 
+(defun input-stream-class-name (external-format)
+  "Given an EXTERNAL-FORMAT, return the flexi-stream class name that
+   needs to be used for reading such encoded data.  Returns the class'
+   name (a symbol)."
+  (declare (optimize speed))
+  (let ((external-format-name (external-format-name external-format))
+        (external-format-cr (not (eq (external-format-eol-style external-format) :lf))))
+    (cond ((ascii-name-p external-format-name)
+           (if external-format-cr
+               'flexi-cr-ascii-input-stream
+               'flexi-ascii-input-stream))
+          ((eq external-format-name :iso-8859-1)
+           (if external-format-cr
+               'flexi-cr-latin-1-input-stream
+               'flexi-latin-1-input-stream))
+          ((or (koi8-r-name-p external-format-name)
+               (iso-8859-name-p external-format-name)
+               (code-page-name-p external-format-name))
+           (if external-format-cr
+               'flexi-cr-8-bit-input-stream
+               'flexi-8-bit-input-stream))
+          (t (case external-format-name
+               (:utf-8 (if external-format-cr
+                           'flexi-cr-utf-8-input-stream
+                           'flexi-utf-8-input-stream))
+               (:utf-16 (if external-format-cr
+                            (if (external-format-little-endian external-format)
+                                'flexi-cr-utf-16-le-input-stream
+                                'flexi-cr-utf-16-be-input-stream)
+                            (if (external-format-little-endian external-format)
+                                'flexi-utf-16-le-input-stream
+                                'flexi-utf-16-be-input-stream)))
+               (:utf-32 (if external-format-cr
+                            (if (external-format-little-endian external-format)
+                                'flexi-cr-utf-32-le-input-stream
+                                'flexi-cr-utf-32-be-input-stream)
+                            (if (external-format-little-endian external-format)
+                                'flexi-utf-32-le-input-stream
+                                'flexi-utf-32-be-input-stream))))))))
+
 (defmethod set-class ((stream flexi-input-stream))
   "Changes the actual class of STREAM depending on its external format."
-  (declare (optimize speed))
-  (with-accessors ((external-format flexi-stream-external-format))
-      stream
-    (let ((external-format-name (external-format-name external-format))
-          (external-format-cr (not (eq (external-format-eol-style external-format) :lf))))
-      (change-class stream
-                    (cond ((ascii-name-p external-format-name)
-                           (if external-format-cr
-                             'flexi-cr-ascii-input-stream
-                             'flexi-ascii-input-stream))
-                          ((eq external-format-name :iso-8859-1)
-                           (if external-format-cr
-                             'flexi-cr-latin-1-input-stream
-                             'flexi-latin-1-input-stream))
-                          ((or (koi8-r-name-p external-format-name)
-                               (iso-8859-name-p external-format-name)
-                               (code-page-name-p external-format-name))
-                           (if external-format-cr
-                             'flexi-cr-8-bit-input-stream
-                             'flexi-8-bit-input-stream))
-                          (t (case external-format-name
-                               (:utf-8 (if external-format-cr
-                                         'flexi-cr-utf-8-input-stream
-                                         'flexi-utf-8-input-stream))
-                               (:utf-16 (if external-format-cr
-                                          (if (external-format-little-endian external-format)
-                                            'flexi-cr-utf-16-le-input-stream
-                                            'flexi-cr-utf-16-be-input-stream)
-                                          (if (external-format-little-endian external-format)
-                                            'flexi-utf-16-le-input-stream
-                                            'flexi-utf-16-be-input-stream)))
-                               (:utf-32 (if external-format-cr
-                                          (if (external-format-little-endian external-format)
-                                            'flexi-cr-utf-32-le-input-stream
-                                            'flexi-cr-utf-32-be-input-stream)
-                                          (if (external-format-little-endian external-format)
-                                            'flexi-utf-32-le-input-stream
-                                            'flexi-utf-32-be-input-stream))))))))))
+  (change-class stream
+                (input-stream-class-name (flexi-stream-external-format stream))))
 
 (defmethod set-class ((stream flexi-output-stream))
   "Changes the actual class of STREAM depending on its external format."

Modified: branches/hans/strings.lisp
==============================================================================
--- branches/hans/strings.lisp	(original)
+++ branches/hans/strings.lisp	Thu May  1 02:31:46 2008
@@ -38,19 +38,58 @@
     (let ((flexi (make-flexi-stream out :external-format external-format)))
       (write-string string flexi :start start :end end))))
 
+;; TO-STRING-CONVERSION-BUFFER structures are used for fast conversion
+;; of octets to strings, circumventing streams.
+
+(defstruct (to-string-conversion-buffer
+             (:conc-name tscb-))
+  (vector nil :type (simple-array (unsigned-byte 8) *))
+  (position nil :type fixnum)
+  (end nil :type fixnum)
+  (eol-style nil :type (or null symbol)))
+
+(defmethod read-byte* ((to-string-conversion-buffer to-string-conversion-buffer))
+  (declare (optimize speed (safety 0)))
+  (let ((position (tscb-position to-string-conversion-buffer)))
+    (when (< position (tscb-end to-string-conversion-buffer))
+      (prog1
+          (aref (tscb-vector to-string-conversion-buffer) position)
+        (incf (tscb-position to-string-conversion-buffer))))))
+
 (defun octets-to-string (vector &key (external-format (make-external-format :latin1))
                                      (start 0) (end (length vector)))
   "Converts the Lisp vector VECTOR of octets from START to END to
 string using the external format EXTERNAL-FORMAT."
+  (declare (optimize speed (safety 0)))
+  (declare (type (simple-array (unsigned-byte 8) *) vector)
+           (type fixnum start end))
+  (let ((buffer (make-to-string-conversion-buffer :vector vector
+                                                  :position start
+                                                  :end end
+                                                  :eol-style (external-format-eol-style external-format)))
+        (dummy-input-stream (make-flexi-stream (make-string-input-stream "") :external-format external-format))
+        (string (make-array (the fixnum (- end start)) :element-type 'character :fill-pointer 0)))
+    (declare (type (array character (*)) string))
+    (do ((char (buffer-read-char buffer dummy-input-stream)
+               (buffer-read-char buffer dummy-input-stream)))
+        ((eql char :eof)
+         string)
+      (vector-push char string))))
+
+(defun octets-to-string* (vector &key (external-format (make-external-format :latin1))
+                          (start 0) (end (length vector)))
+  "Converts the Lisp vector VECTOR of octets from START to END to
+string using the external format EXTERNAL-FORMAT."
+  ;; This version of OCTETS-TO-STRING is here so that one can do speed
+  ;; comparisons.  It should be significantly slower than the version
+  ;; above.
   (declare (optimize speed))
   (with-input-from-sequence (in vector :start start :end end)
     (let ((flexi (make-flexi-stream in :external-format external-format))
           (result (make-array (- end start)
                               :element-type #+:lispworks 'lw:simple-char
-                                            #-:lispworks 'character
+                              #-:lispworks 'character
                               :fill-pointer t)))
       (setf (fill-pointer result)
             (read-sequence result flexi))
       result)))
-                              
-



More information about the Flexi-streams-cvs mailing list