[flexi-streams-cvs] r8 - branches/hans

hhubner at common-lisp.net hhubner at common-lisp.net
Thu May 1 15:49:14 UTC 2008


Author: hhubner
Date: Thu May  1 11:49:13 2008
New Revision: 8

Modified:
   branches/hans/input.lisp
   branches/hans/output.lisp
   branches/hans/strings.lisp
   branches/hans/test-speed.lisp
Log:
Fast and unhygienic version of OCTETS-TO-STRING.  This gives a 10x speedup
compared to the streams-based implementation.


Modified: branches/hans/input.lisp
==============================================================================
--- branches/hans/input.lisp	(original)
+++ branches/hans/input.lisp	Thu May  1 11:49:13 2008
@@ -242,35 +242,35 @@
        (decf position)
        (push #.(char-code #\Return) octet-stack)))))
 
-(defun buffer-code-char (buffer char-code)
-  "Given a BUFFER, which is assumed to be a
-   to-string-conversion-buffer (see strings.lisp) and a character
-   code, convert to a character and perform newline processing for the
-   stream if the character is a #\Return.  This code basically repeats
-   what the stream-read-char ((stream flexi-cr-mixin)) does, but it
-   does so in an optimized manner to make octet->string conversion
-   faster."
+(defun code-char-with-newline-processing (char-code eol-style read-char-code-fn unread-char-code-fn)
+  "Perform newline conversion during octets-to-string processing.
+CHAR-CODE is the code of the current character.  If it denotes a
+#\Return character, newline processing accoring to EOL-STYLE is
+performed.  READ-CHAR-CODE-FN and UNREAD-CHAR-CODE-FN are called to
+read the next character code from the input, unread-char-code-fn is
+called to skip back in the input by one octet.  All this works under
+the assumption that #\Return and #\Linefeed are single-octet codes."
   (declare (optimize speed (safety 0))
-           (type to-string-conversion-buffer buffer)
-           (type fixnum char-code))
+           (type fixnum char-code)
+           (type symbol eol-style))
   (let ((char (code-char char-code)))
     (if (eql char #\Return)
-        (case (tscb-eol-style buffer)
+        (case eol-style
           (:cr
            #\Newline)
           (:crlf
-           (cond
-             ((= (tscb-position buffer) (tscb-end buffer))
+           (case (funcall read-char-code-fn)
+             (:eof
               :eof)
-             ((eql #.(char-code #\Newline) (aref (tscb-vector buffer) (tscb-position buffer)))
-              (incf (tscb-position buffer))
-              #\Newline)
+             (#.(char-code #\Newline)
+                #\Newline)
              (t
+              (funcall unread-char-code-fn)
               #\Return)))
           (t
            #\Return))
         char)))
-(declaim (inline buffer-code-char))
+(declaim (inline code-char-with-newline-processing))
 
 (defmacro define-char-reader ((stream-var stream-class) &body body)
   "Helper macro to define methods for STREAM-READ-CHAR and
@@ -284,7 +284,7 @@
 used only for dispatching.  The BUFFER-READ-CHAR generic function is
 used to shortcut through the streams mechanic from the
 OCTETS-TO-STRING function."
-  (with-unique-names (char-code body-fn dummy-stream)
+  (with-unique-names (char-code body-fn octets-var)
     (let ((body body))
       `(progn
          (defmethod stream-read-char ((,stream-var ,stream-class))
@@ -304,13 +304,33 @@
                ;; for UNREAD-CHAR
                (setq last-char-code ,char-code)
                (or (code-char ,char-code) ,char-code))))
-         (defmethod buffer-read-char (,stream-var (,dummy-stream ,stream-class))
-           (declare (optimize speed))
-           (declare (ignore ,dummy-stream)) ; used only for dispatch
-           (block stream-read-char ;; for RETURN-FROM in BODY
-             (let ((,char-code (progn , at body)))
-               (declare (type fixnum ,char-code))
-               (or (buffer-code-char ,stream-var ,char-code) ,char-code))))))))
+         (defmethod octets-to-string% ((,stream-var ,stream-class) ,octets-var &key start end)
+           (let ((position start)
+                 save-position
+                 (eol-style (external-format-eol-style (flexi-stream-external-format ,stream-var)))
+                 (string (make-array (- end start) :element-type 'character :fill-pointer 0)))
+             (labels ((read-byte* (stream)
+                        (declare (ignore stream))
+                        (if (< position end)
+                            (prog1
+                                (aref ,octets-var position)
+                              (incf position))
+                            :eof))
+                      (read-char-code ()
+                        (setf save-position position)
+                        (block stream-read-char ;; for RETURN-FROM in BODY
+                          , at body))
+                      (unread-char-code ()
+                        (setf position save-position)))
+               (do ((char-code (read-char-code) (read-char-code)))
+                   ((eql char-code :eof)
+                    string)
+                 (vector-push (or (code-char-with-newline-processing char-code
+                                                                     eol-style
+                                                                     #'read-char-code
+                                                                     #'unread-char-code)
+                                  char-code)
+                              string)))))))))
 
 (defun recover-from-encoding-error (flexi-stream format-control &rest format-args)
   "Helper function used by the STREAM-READ-CHAR methods below to deal

Modified: branches/hans/output.lisp
==============================================================================
--- branches/hans/output.lisp	(original)
+++ branches/hans/output.lisp	Thu May  1 11:49:13 2008
@@ -88,7 +88,9 @@
 
 (defmethod char-to-octets ((stream flexi-ascii-output-stream) char sink)
   (declare (optimize speed))
+  (declare (type character char))
   (let ((octet (char-code char)))
+    (declare (type fixnum char-code))
     (when (> octet 127)
       (signal-encoding-error stream "~S is not an ASCII character." char))
     (write-byte* octet sink))

Modified: branches/hans/strings.lisp
==============================================================================
--- branches/hans/strings.lisp	(original)
+++ branches/hans/strings.lisp	Thu May  1 11:49:13 2008
@@ -58,24 +58,6 @@
     (let ((flexi (make-flexi-stream out :external-format external-format)))
       (write-string string flexi :start start :end end))))
 
-;; TO-STRING-CONVERSION-BUFFER structures are used for fast conversion
-;; of octets to strings, circumventing streams.
-
-(defstruct (to-string-conversion-buffer
-             (:conc-name tscb-))
-  (vector nil :type (simple-array (unsigned-byte 8) *))
-  (position nil :type fixnum)
-  (end nil :type fixnum)
-  (eol-style nil :type (or null symbol)))
-
-(defmethod read-byte* ((to-string-conversion-buffer to-string-conversion-buffer))
-  (declare (optimize speed (safety 0)))
-  (let ((position (tscb-position to-string-conversion-buffer)))
-    (when (< position (tscb-end to-string-conversion-buffer))
-      (prog1
-          (aref (tscb-vector to-string-conversion-buffer) position)
-        (incf (tscb-position to-string-conversion-buffer))))))
-
 (defun octets-to-string (vector &key (external-format (make-external-format :latin1))
                                      (start 0) (end (length vector)))
   "Converts the Lisp vector VECTOR of octets from START to END to
@@ -83,18 +65,9 @@
   (declare (optimize speed (safety 0)))
   (declare (type (simple-array (unsigned-byte 8) *) vector)
            (type fixnum start end))
-  (let ((buffer (make-to-string-conversion-buffer :vector vector
-                                                  :position start
-                                                  :end end
-                                                  :eol-style (external-format-eol-style external-format)))
-        (dummy-input-stream (make-flexi-stream (make-string-input-stream "") :external-format external-format))
-        (string (make-array (the fixnum (- end start)) :element-type 'character :fill-pointer 0)))
-    (declare (type (array character (*)) string))
-    (do ((char (buffer-read-char buffer dummy-input-stream)
-               (buffer-read-char buffer dummy-input-stream)))
-        ((eql char :eof)
-         string)
-      (vector-push char string))))
+  (octets-to-string% (make-flexi-stream (make-string-input-stream "") :external-format external-format)
+                     vector
+                     :start start :end end))
 
 (defun octets-to-string* (vector &key (external-format (make-external-format :latin1))
                           (start 0) (end (length vector)))

Modified: branches/hans/test-speed.lisp
==============================================================================
--- branches/hans/test-speed.lisp	(original)
+++ branches/hans/test-speed.lisp	Thu May  1 11:49:13 2008
@@ -43,16 +43,16 @@
       (dotimes (i character-count)
         (setf (aref octets i) (+ 32 (random 96))))
       (format t "testing with latin-1 encoding, streams based~%")
-      (time (dotimes (i 10)
+      (time (dotimes (i 100)
               (null (octets-to-string* octets :external-format (make-external-format :latin-1)))))
       (format t "testing with utf-8 encoding, streams based~%")
-      (time (dotimes (i 10)
+      (time (dotimes (i 100)
               (null (octets-to-string* octets :external-format (make-external-format :utf-8)))))
       (format t "testing with latin-1 encoding, optimized~%")
-      (time (dotimes (i 10)
+      (time (dotimes (i 100)
               (null (octets-to-string octets :external-format (make-external-format :latin-1)))))
       (format t "testing with utf-8 encoding, optimized~%")
-      (time (dotimes (i 10)
+      (time (dotimes (i 100)
               (null (octets-to-string octets :external-format (make-external-format :utf-8))))))))
 
 (defmacro profile (&body body)



More information about the Flexi-streams-cvs mailing list