[flexi-streams-cvs] r8 - branches/hans
hhubner at common-lisp.net
hhubner at common-lisp.net
Thu May 1 15:49:14 UTC 2008
Author: hhubner
Date: Thu May 1 11:49:13 2008
New Revision: 8
Modified:
branches/hans/input.lisp
branches/hans/output.lisp
branches/hans/strings.lisp
branches/hans/test-speed.lisp
Log:
Fast and unhygienic version of OCTETS-TO-STRING. This gives a 10x speedup
compared to the streams-based implementation.
Modified: branches/hans/input.lisp
==============================================================================
--- branches/hans/input.lisp (original)
+++ branches/hans/input.lisp Thu May 1 11:49:13 2008
@@ -242,35 +242,35 @@
(decf position)
(push #.(char-code #\Return) octet-stack)))))
-(defun buffer-code-char (buffer char-code)
- "Given a BUFFER, which is assumed to be a
- to-string-conversion-buffer (see strings.lisp) and a character
- code, convert to a character and perform newline processing for the
- stream if the character is a #\Return. This code basically repeats
- what the stream-read-char ((stream flexi-cr-mixin)) does, but it
- does so in an optimized manner to make octet->string conversion
- faster."
+(defun code-char-with-newline-processing (char-code eol-style read-char-code-fn unread-char-code-fn)
+ "Perform newline conversion during octets-to-string processing.
+CHAR-CODE is the code of the current character. If it denotes a
+#\Return character, newline processing accoring to EOL-STYLE is
+performed. READ-CHAR-CODE-FN and UNREAD-CHAR-CODE-FN are called to
+read the next character code from the input, unread-char-code-fn is
+called to skip back in the input by one octet. All this works under
+the assumption that #\Return and #\Linefeed are single-octet codes."
(declare (optimize speed (safety 0))
- (type to-string-conversion-buffer buffer)
- (type fixnum char-code))
+ (type fixnum char-code)
+ (type symbol eol-style))
(let ((char (code-char char-code)))
(if (eql char #\Return)
- (case (tscb-eol-style buffer)
+ (case eol-style
(:cr
#\Newline)
(:crlf
- (cond
- ((= (tscb-position buffer) (tscb-end buffer))
+ (case (funcall read-char-code-fn)
+ (:eof
:eof)
- ((eql #.(char-code #\Newline) (aref (tscb-vector buffer) (tscb-position buffer)))
- (incf (tscb-position buffer))
- #\Newline)
+ (#.(char-code #\Newline)
+ #\Newline)
(t
+ (funcall unread-char-code-fn)
#\Return)))
(t
#\Return))
char)))
-(declaim (inline buffer-code-char))
+(declaim (inline code-char-with-newline-processing))
(defmacro define-char-reader ((stream-var stream-class) &body body)
"Helper macro to define methods for STREAM-READ-CHAR and
@@ -284,7 +284,7 @@
used only for dispatching. The BUFFER-READ-CHAR generic function is
used to shortcut through the streams mechanic from the
OCTETS-TO-STRING function."
- (with-unique-names (char-code body-fn dummy-stream)
+ (with-unique-names (char-code body-fn octets-var)
(let ((body body))
`(progn
(defmethod stream-read-char ((,stream-var ,stream-class))
@@ -304,13 +304,33 @@
;; for UNREAD-CHAR
(setq last-char-code ,char-code)
(or (code-char ,char-code) ,char-code))))
- (defmethod buffer-read-char (,stream-var (,dummy-stream ,stream-class))
- (declare (optimize speed))
- (declare (ignore ,dummy-stream)) ; used only for dispatch
- (block stream-read-char ;; for RETURN-FROM in BODY
- (let ((,char-code (progn , at body)))
- (declare (type fixnum ,char-code))
- (or (buffer-code-char ,stream-var ,char-code) ,char-code))))))))
+ (defmethod octets-to-string% ((,stream-var ,stream-class) ,octets-var &key start end)
+ (let ((position start)
+ save-position
+ (eol-style (external-format-eol-style (flexi-stream-external-format ,stream-var)))
+ (string (make-array (- end start) :element-type 'character :fill-pointer 0)))
+ (labels ((read-byte* (stream)
+ (declare (ignore stream))
+ (if (< position end)
+ (prog1
+ (aref ,octets-var position)
+ (incf position))
+ :eof))
+ (read-char-code ()
+ (setf save-position position)
+ (block stream-read-char ;; for RETURN-FROM in BODY
+ , at body))
+ (unread-char-code ()
+ (setf position save-position)))
+ (do ((char-code (read-char-code) (read-char-code)))
+ ((eql char-code :eof)
+ string)
+ (vector-push (or (code-char-with-newline-processing char-code
+ eol-style
+ #'read-char-code
+ #'unread-char-code)
+ char-code)
+ string)))))))))
(defun recover-from-encoding-error (flexi-stream format-control &rest format-args)
"Helper function used by the STREAM-READ-CHAR methods below to deal
Modified: branches/hans/output.lisp
==============================================================================
--- branches/hans/output.lisp (original)
+++ branches/hans/output.lisp Thu May 1 11:49:13 2008
@@ -88,7 +88,9 @@
(defmethod char-to-octets ((stream flexi-ascii-output-stream) char sink)
(declare (optimize speed))
+ (declare (type character char))
(let ((octet (char-code char)))
+ (declare (type fixnum char-code))
(when (> octet 127)
(signal-encoding-error stream "~S is not an ASCII character." char))
(write-byte* octet sink))
Modified: branches/hans/strings.lisp
==============================================================================
--- branches/hans/strings.lisp (original)
+++ branches/hans/strings.lisp Thu May 1 11:49:13 2008
@@ -58,24 +58,6 @@
(let ((flexi (make-flexi-stream out :external-format external-format)))
(write-string string flexi :start start :end end))))
-;; TO-STRING-CONVERSION-BUFFER structures are used for fast conversion
-;; of octets to strings, circumventing streams.
-
-(defstruct (to-string-conversion-buffer
- (:conc-name tscb-))
- (vector nil :type (simple-array (unsigned-byte 8) *))
- (position nil :type fixnum)
- (end nil :type fixnum)
- (eol-style nil :type (or null symbol)))
-
-(defmethod read-byte* ((to-string-conversion-buffer to-string-conversion-buffer))
- (declare (optimize speed (safety 0)))
- (let ((position (tscb-position to-string-conversion-buffer)))
- (when (< position (tscb-end to-string-conversion-buffer))
- (prog1
- (aref (tscb-vector to-string-conversion-buffer) position)
- (incf (tscb-position to-string-conversion-buffer))))))
-
(defun octets-to-string (vector &key (external-format (make-external-format :latin1))
(start 0) (end (length vector)))
"Converts the Lisp vector VECTOR of octets from START to END to
@@ -83,18 +65,9 @@
(declare (optimize speed (safety 0)))
(declare (type (simple-array (unsigned-byte 8) *) vector)
(type fixnum start end))
- (let ((buffer (make-to-string-conversion-buffer :vector vector
- :position start
- :end end
- :eol-style (external-format-eol-style external-format)))
- (dummy-input-stream (make-flexi-stream (make-string-input-stream "") :external-format external-format))
- (string (make-array (the fixnum (- end start)) :element-type 'character :fill-pointer 0)))
- (declare (type (array character (*)) string))
- (do ((char (buffer-read-char buffer dummy-input-stream)
- (buffer-read-char buffer dummy-input-stream)))
- ((eql char :eof)
- string)
- (vector-push char string))))
+ (octets-to-string% (make-flexi-stream (make-string-input-stream "") :external-format external-format)
+ vector
+ :start start :end end))
(defun octets-to-string* (vector &key (external-format (make-external-format :latin1))
(start 0) (end (length vector)))
Modified: branches/hans/test-speed.lisp
==============================================================================
--- branches/hans/test-speed.lisp (original)
+++ branches/hans/test-speed.lisp Thu May 1 11:49:13 2008
@@ -43,16 +43,16 @@
(dotimes (i character-count)
(setf (aref octets i) (+ 32 (random 96))))
(format t "testing with latin-1 encoding, streams based~%")
- (time (dotimes (i 10)
+ (time (dotimes (i 100)
(null (octets-to-string* octets :external-format (make-external-format :latin-1)))))
(format t "testing with utf-8 encoding, streams based~%")
- (time (dotimes (i 10)
+ (time (dotimes (i 100)
(null (octets-to-string* octets :external-format (make-external-format :utf-8)))))
(format t "testing with latin-1 encoding, optimized~%")
- (time (dotimes (i 10)
+ (time (dotimes (i 100)
(null (octets-to-string octets :external-format (make-external-format :latin-1)))))
(format t "testing with utf-8 encoding, optimized~%")
- (time (dotimes (i 10)
+ (time (dotimes (i 100)
(null (octets-to-string octets :external-format (make-external-format :utf-8))))))))
(defmacro profile (&body body)
More information about the Flexi-streams-cvs
mailing list