[flexi-streams-cvs] r4 - branches/hans
hhubner at common-lisp.net
hhubner at common-lisp.net
Thu May 1 06:31:48 UTC 2008
Author: hhubner
Date: Thu May 1 02:31:46 2008
New Revision: 4
Modified:
branches/hans/input.lisp
branches/hans/stream.lisp
branches/hans/strings.lisp
Log:
commit first set of changes to speed up octets-to-string
Modified: branches/hans/input.lisp
==============================================================================
--- branches/hans/input.lisp (original)
+++ branches/hans/input.lisp Thu May 1 02:31:46 2008
@@ -242,34 +242,78 @@
(decf position)
(push #.(char-code #\Return) octet-stack)))))
+(defun buffer-code-char (buffer char-code)
+ "Given a BUFFER, which is assumed to be a
+ to-string-conversion-buffer (see strings.lisp) and a character
+ code, convert to a character and perform newline processing for the
+ stream if the character is a #\Return. This code basically repeats
+ what the stream-read-char ((stream flexi-cr-mixin)) does, but it
+ does so in an optimized manner to make octet->string conversion
+ faster."
+ (declare (optimize speed (safety 0))
+ (type to-string-conversion-buffer buffer)
+ (type fixnum char-code))
+ (let ((char (code-char char-code)))
+ (if (eql char #\Return)
+ (case (tscb-eol-style buffer)
+ (:cr
+ #\Newline)
+ (:crlf
+ (cond
+ ((= (tscb-position buffer) (tscb-end buffer))
+ :eof)
+ ((eql #.(char-code #\Newline) (aref (tscb-vector buffer) (tscb-position buffer)))
+ (incf (tscb-position buffer))
+ #\Newline)
+ (t
+ #\Return)))
+ (t
+ #\Return))
+ char)))
+(declaim (inline buffer-code-char))
+
(defmacro define-char-reader ((stream-var stream-class) &body body)
- "Helper macro to define methods for STREAM-READ-CHAR. Defines a
-method for the class STREAM-CLASS using the variable STREAM-VAR and
-the code body BODY wrapped with some standard code common to all
-methods defined here. The return value of BODY is a character code.
-In case of encoding problems, BODY must return the value returned by
-\(RECOVER-FROM-ENCODING-ERROR ...)."
- (with-unique-names (char-code body-fn)
- `(defmethod stream-read-char ((,stream-var ,stream-class))
- "This method was generated with the DEFINE-CHAR-READER macro."
- (declare (optimize speed))
- ;; note that we do nothing for the :LF EOL style because we
- ;; assume that #\Newline is the same as #\Linefeed in all
- ;; Lisps which will use this library
- (with-accessors ((last-octet flexi-stream-last-octet)
- (last-char-code flexi-stream-last-char-code))
- ,stream-var
- ;; set LAST-OCTET slot to NIL because we can't UNREAD-BYTE after
- ;; this operation
- (setq last-octet nil)
- (let ((,char-code
- (flet ((,body-fn () , at body))
- (declare (inline ,body-fn) (dynamic-extent (function ,body-fn)))
- (,body-fn))))
- ;; remember this character and the current external format
- ;; for UNREAD-CHAR
- (setq last-char-code ,char-code)
- (or (code-char ,char-code) ,char-code))))))
+ "Helper macro to define methods for STREAM-READ-CHAR and
+BUFFER-READ-CHAR. Defines a method for the class STREAM-CLASS using
+the variable STREAM-VAR and the code body BODY wrapped with some
+standard code common to all methods defined here. The return value of
+BODY is a character code. In case of encoding problems, BODY must
+return the value returned by \(RECOVER-FROM-ENCODING-ERROR ...). In
+addition, a method on BUFFER-READ-CHAR is defined with the first
+argument being the buffer, the second argument a STREAM-CLASS instance
+used only for dispatching. The BUFFER-READ-CHAR generic function is
+used to shortcut through the streams mechanic from the
+OCTETS-TO-STRING function."
+ (with-unique-names (char-code body-fn dummy-stream)
+ (let ((body body))
+ `(progn
+ (defmethod stream-read-char ((,stream-var ,stream-class))
+ "This method was generated with the DEFINE-CHAR-READER macro."
+ (declare (optimize speed))
+ ;; note that we do nothing for the :LF EOL style because we
+ ;; assume that #\Newline is the same as #\Linefeed in all
+ ;; Lisps which will use this library
+ (with-accessors ((last-octet flexi-stream-last-octet)
+ (last-char-code flexi-stream-last-char-code))
+ ,stream-var
+ ;; set LAST-OCTET slot to NIL because we can't UNREAD-BYTE after
+ ;; this operation
+ (setq last-octet nil)
+ (let ((,char-code
+ (flet ((,body-fn () , at body))
+ (declare (inline ,body-fn) (dynamic-extent (function ,body-fn)))
+ (,body-fn))))
+ ;; remember this character and the current external format
+ ;; for UNREAD-CHAR
+ (setq last-char-code ,char-code)
+ (or (code-char ,char-code) ,char-code))))
+ (defmethod buffer-read-char (,stream-var (,dummy-stream ,stream-class))
+ (declare (optimize speed))
+ (declare (ignore ,dummy-stream)) ; used only for dispatch
+ (block stream-read-char ;; for RETURN-FROM in BODY
+ (let ((,char-code (progn , at body)))
+ (declare (type fixnum ,char-code))
+ (or (buffer-code-char ,stream-var ,char-code) ,char-code))))))))
(defun recover-from-encoding-error (flexi-stream format-control &rest format-args)
"Helper function used by the STREAM-READ-CHAR methods below to deal
@@ -582,4 +626,4 @@
(t (= octet peek-type)))
finally (unless (eql octet eof-value)
(unread-byte octet flexi-input-stream))
- (return octet)))
\ No newline at end of file
+ (return octet)))
Modified: branches/hans/stream.lisp
==============================================================================
--- branches/hans/stream.lisp (original)
+++ branches/hans/stream.lisp Thu May 1 02:31:46 2008
@@ -509,46 +509,50 @@
;; <http://thread.gmane.org/gmane.lisp.lispworks.general/6269>
(set-class stream))
+(defun input-stream-class-name (external-format)
+ "Given an EXTERNAL-FORMAT, return the flexi-stream class name that
+ needs to be used for reading such encoded data. Returns the class'
+ name (a symbol)."
+ (declare (optimize speed))
+ (let ((external-format-name (external-format-name external-format))
+ (external-format-cr (not (eq (external-format-eol-style external-format) :lf))))
+ (cond ((ascii-name-p external-format-name)
+ (if external-format-cr
+ 'flexi-cr-ascii-input-stream
+ 'flexi-ascii-input-stream))
+ ((eq external-format-name :iso-8859-1)
+ (if external-format-cr
+ 'flexi-cr-latin-1-input-stream
+ 'flexi-latin-1-input-stream))
+ ((or (koi8-r-name-p external-format-name)
+ (iso-8859-name-p external-format-name)
+ (code-page-name-p external-format-name))
+ (if external-format-cr
+ 'flexi-cr-8-bit-input-stream
+ 'flexi-8-bit-input-stream))
+ (t (case external-format-name
+ (:utf-8 (if external-format-cr
+ 'flexi-cr-utf-8-input-stream
+ 'flexi-utf-8-input-stream))
+ (:utf-16 (if external-format-cr
+ (if (external-format-little-endian external-format)
+ 'flexi-cr-utf-16-le-input-stream
+ 'flexi-cr-utf-16-be-input-stream)
+ (if (external-format-little-endian external-format)
+ 'flexi-utf-16-le-input-stream
+ 'flexi-utf-16-be-input-stream)))
+ (:utf-32 (if external-format-cr
+ (if (external-format-little-endian external-format)
+ 'flexi-cr-utf-32-le-input-stream
+ 'flexi-cr-utf-32-be-input-stream)
+ (if (external-format-little-endian external-format)
+ 'flexi-utf-32-le-input-stream
+ 'flexi-utf-32-be-input-stream))))))))
+
(defmethod set-class ((stream flexi-input-stream))
"Changes the actual class of STREAM depending on its external format."
- (declare (optimize speed))
- (with-accessors ((external-format flexi-stream-external-format))
- stream
- (let ((external-format-name (external-format-name external-format))
- (external-format-cr (not (eq (external-format-eol-style external-format) :lf))))
- (change-class stream
- (cond ((ascii-name-p external-format-name)
- (if external-format-cr
- 'flexi-cr-ascii-input-stream
- 'flexi-ascii-input-stream))
- ((eq external-format-name :iso-8859-1)
- (if external-format-cr
- 'flexi-cr-latin-1-input-stream
- 'flexi-latin-1-input-stream))
- ((or (koi8-r-name-p external-format-name)
- (iso-8859-name-p external-format-name)
- (code-page-name-p external-format-name))
- (if external-format-cr
- 'flexi-cr-8-bit-input-stream
- 'flexi-8-bit-input-stream))
- (t (case external-format-name
- (:utf-8 (if external-format-cr
- 'flexi-cr-utf-8-input-stream
- 'flexi-utf-8-input-stream))
- (:utf-16 (if external-format-cr
- (if (external-format-little-endian external-format)
- 'flexi-cr-utf-16-le-input-stream
- 'flexi-cr-utf-16-be-input-stream)
- (if (external-format-little-endian external-format)
- 'flexi-utf-16-le-input-stream
- 'flexi-utf-16-be-input-stream)))
- (:utf-32 (if external-format-cr
- (if (external-format-little-endian external-format)
- 'flexi-cr-utf-32-le-input-stream
- 'flexi-cr-utf-32-be-input-stream)
- (if (external-format-little-endian external-format)
- 'flexi-utf-32-le-input-stream
- 'flexi-utf-32-be-input-stream))))))))))
+ (change-class stream
+ (input-stream-class-name (flexi-stream-external-format stream))))
(defmethod set-class ((stream flexi-output-stream))
"Changes the actual class of STREAM depending on its external format."
Modified: branches/hans/strings.lisp
==============================================================================
--- branches/hans/strings.lisp (original)
+++ branches/hans/strings.lisp Thu May 1 02:31:46 2008
@@ -38,19 +38,58 @@
(let ((flexi (make-flexi-stream out :external-format external-format)))
(write-string string flexi :start start :end end))))
+;; TO-STRING-CONVERSION-BUFFER structures are used for fast conversion
+;; of octets to strings, circumventing streams.
+
+(defstruct (to-string-conversion-buffer
+ (:conc-name tscb-))
+ (vector nil :type (simple-array (unsigned-byte 8) *))
+ (position nil :type fixnum)
+ (end nil :type fixnum)
+ (eol-style nil :type (or null symbol)))
+
+(defmethod read-byte* ((to-string-conversion-buffer to-string-conversion-buffer))
+ (declare (optimize speed (safety 0)))
+ (let ((position (tscb-position to-string-conversion-buffer)))
+ (when (< position (tscb-end to-string-conversion-buffer))
+ (prog1
+ (aref (tscb-vector to-string-conversion-buffer) position)
+ (incf (tscb-position to-string-conversion-buffer))))))
+
(defun octets-to-string (vector &key (external-format (make-external-format :latin1))
(start 0) (end (length vector)))
"Converts the Lisp vector VECTOR of octets from START to END to
string using the external format EXTERNAL-FORMAT."
+ (declare (optimize speed (safety 0)))
+ (declare (type (simple-array (unsigned-byte 8) *) vector)
+ (type fixnum start end))
+ (let ((buffer (make-to-string-conversion-buffer :vector vector
+ :position start
+ :end end
+ :eol-style (external-format-eol-style external-format)))
+ (dummy-input-stream (make-flexi-stream (make-string-input-stream "") :external-format external-format))
+ (string (make-array (the fixnum (- end start)) :element-type 'character :fill-pointer 0)))
+ (declare (type (array character (*)) string))
+ (do ((char (buffer-read-char buffer dummy-input-stream)
+ (buffer-read-char buffer dummy-input-stream)))
+ ((eql char :eof)
+ string)
+ (vector-push char string))))
+
+(defun octets-to-string* (vector &key (external-format (make-external-format :latin1))
+ (start 0) (end (length vector)))
+ "Converts the Lisp vector VECTOR of octets from START to END to
+string using the external format EXTERNAL-FORMAT."
+ ;; This version of OCTETS-TO-STRING is here so that one can do speed
+ ;; comparisons. It should be significantly slower than the version
+ ;; above.
(declare (optimize speed))
(with-input-from-sequence (in vector :start start :end end)
(let ((flexi (make-flexi-stream in :external-format external-format))
(result (make-array (- end start)
:element-type #+:lispworks 'lw:simple-char
- #-:lispworks 'character
+ #-:lispworks 'character
:fill-pointer t)))
(setf (fill-pointer result)
(read-sequence result flexi))
result)))
-
-
More information about the Flexi-streams-cvs
mailing list