[flexi-streams-cvs] r35 - in branches/edi: . doc test

eweitz at common-lisp.net eweitz at common-lisp.net
Tue May 20 08:03:33 UTC 2008


Author: eweitz
Date: Tue May 20 04:03:28 2008
New Revision: 35

Modified:
   branches/edi/decode.lisp
   branches/edi/doc/index.html
   branches/edi/encode.lisp
   branches/edi/external-format.lisp
   branches/edi/input.lisp
   branches/edi/output.lisp
   branches/edi/strings.lisp
   branches/edi/test/test.lisp
Log:
Checkpoint


Modified: branches/edi/decode.lisp
==============================================================================
--- branches/edi/decode.lisp	(original)
+++ branches/edi/decode.lisp	Tue May 20 04:03:28 2008
@@ -1,5 +1,5 @@
 ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/decode.lisp,v 1.12 2008/05/19 22:32:56 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/decode.lisp,v 1.14 2008/05/20 07:51:09 edi Exp $
 
 ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz.  All rights reserved.
 
@@ -69,8 +69,7 @@
   (declare #.*standard-optimize-settings*)
   (declare (function reader))
   (when-let (octet (funcall reader))
-    (declare (type octet octet))
-    (if (> octet 127)
+    (if (> (the octet octet) 127)
       (recover-from-encoding-error format
                                    "No character which corresponds to octet #x~X." octet)
       octet)))
@@ -81,8 +80,8 @@
   (with-accessors ((decoding-table external-format-decoding-table))
       format
     (when-let (octet (funcall reader))
-      (declare (type octet octet))
-      (let ((char-code (aref (the (simple-array char-code-integer *) decoding-table) octet)))
+      (let ((char-code (aref (the (simple-array char-code-integer *) decoding-table)
+                             (the octet octet))))
         (if (or (null char-code)
                 (= (the char-code-integer char-code) 65533))
           (recover-from-encoding-error format

Modified: branches/edi/doc/index.html
==============================================================================
--- branches/edi/doc/index.html	(original)
+++ branches/edi/doc/index.html	Tue May 20 04:03:28 2008
@@ -196,7 +196,9 @@
 
 <p>
 For more examples see the source code
-of <a
+of 
+<a href="http://mr-co.de/projects/cl-rfc2047/">CL-RFC2047</a>,
+<a
 href="http://weitz.de/drakma/">Drakma</a>, <a
 href="http://weitz.de/chunga/">Chunga</a>,
 or <a href="http://weitz.de/cl-wbxml/">CL-WBXML</a>.
@@ -970,29 +972,25 @@
 <blockquote><br>
 
 Converts the Lisp string <code><i>string</i></code> from <code><i>start</i></code> to <code><i>end</i></code> to an array of
-<a href="#octet">octets</a> corresponding to the <a href="#external-formats">external format</a> <code><i>external-format</i></code>. The defaults for
+<a href="#octet">octets</a> corresponding to the external format designated by <a href="#external-formats">external format</a> <code><i>external-format</i></code>. The defaults for
 <code><i>start</i></code> and <code><i>end</i></code>
-are <code>0</code> and the length of the
-string.  The default for <code><i>external-format</i></code> is the
-value of
-evaluating <code>(<a
-href="#make-external-format">MAKE-EXTERNAL-FORMAT</a> :LATIN1)</code>
+are <code>0</code> and the length of the string.  The default
+for <code><i>external-format</i></code> is <code>:LATIN1</code>.
 
 </blockquote>
 
 <p><br>[Function]
 <br><a class=none name="octets-to-string"><b>octets-to-string</b> <i>sequence <tt>&key</tt> external-format start end</i> => <i>string</i></a>
 
-<blockquote><br> Converts the Lisp sequence <code><i>sequence</i></code>
-of <a href="#octet">octets</a> from <code><i>start</i></code>
-to <code><i>end</i></code> to string using
-the <a href="#external-formats">external
+<blockquote><br> Converts the Lisp
+sequence <code><i>sequence</i></code> of <a href="#octet">octets</a>
+from <code><i>start</i></code> to <code><i>end</i></code> to string
+using the external format designated
+by <a href="#external-formats">external
 format</a> <code><i>external-format</i></code>.  The defaults for
 <code><i>start</i></code> and <code><i>end</i></code>
 are <code>0</code> and the length of the sequence.  The default
-for <code><i>external-format</i></code> is the value of
-evaluating <code>(<a
-href="#make-external-format">MAKE-EXTERNAL-FORMAT</a> :LATIN1)</code>
+for <code><i>external-format</i></code> is <code>:LATIN1</code>.
 </blockquote>
 
 <br> <br><h3><a class=none name="position">File positions</a></h3>
@@ -1032,7 +1030,7 @@
 numerous patches and additions.
 
 <p>
-$Header: /usr/local/cvsrep/flexi-streams/doc/index.html,v 1.102 2008/05/19 07:57:10 edi Exp $
+$Header: /usr/local/cvsrep/flexi-streams/doc/index.html,v 1.104 2008/05/20 06:55:21 edi Exp $
 <p><a href="http://weitz.de/index.html">BACK TO MY HOMEPAGE</a>
 
 </body>

Modified: branches/edi/encode.lisp
==============================================================================
--- branches/edi/encode.lisp	(original)
+++ branches/edi/encode.lisp	Tue May 20 04:03:28 2008
@@ -1,5 +1,5 @@
 ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/encode.lisp,v 1.10 2008/05/19 22:32:56 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/encode.lisp,v 1.11 2008/05/20 08:02:49 edi Exp $
 
 ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz.  All rights reserved.
 
@@ -101,7 +101,7 @@
   (flet ((write-word (word)
            (funcall writer (ldb (byte 8 0) word))
            (funcall writer (ldb (byte 8 8) word))))
-    (declare (inline read-next-word))
+    (declare (inline write-word))
     (let ((char-code (char-code char)))
       (declare (type char-code-integer char-code))
       (cond ((< char-code #x10000)
@@ -116,7 +116,7 @@
   (flet ((write-word (word)
            (funcall writer (ldb (byte 8 8) word))
            (funcall writer (ldb (byte 8 0) word))))
-    (declare (inline read-next-word))
+    (declare (inline write-word))
     (let ((char-code (char-code char)))
       (declare (type char-code-integer char-code))
       (cond ((< char-code #x10000)

Modified: branches/edi/external-format.lisp
==============================================================================
--- branches/edi/external-format.lisp	(original)
+++ branches/edi/external-format.lisp	Tue May 20 04:03:28 2008
@@ -1,5 +1,5 @@
 ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/external-format.lisp,v 1.19 2008/05/19 11:20:11 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/external-format.lisp,v 1.20 2008/05/20 08:02:50 edi Exp $
 
 ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz.  All rights reserved.
 
@@ -219,7 +219,7 @@
   (declare #.*standard-optimize-settings*)
   (declare (ignore initargs))
   (with-accessors ((encoding-hash external-format-encoding-hash)
-                   (decoding-table flexi-stream-decoding-table)
+                   (decoding-table external-format-decoding-table)
                    (name external-format-name)
                    (id external-format-id))
       external-format

Modified: branches/edi/input.lisp
==============================================================================
--- branches/edi/input.lisp	(original)
+++ branches/edi/input.lisp	Tue May 20 04:03:28 2008
@@ -1,5 +1,5 @@
 ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/input.lisp,v 1.65 2008/05/19 22:54:10 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/input.lisp,v 1.66 2008/05/20 00:37:27 edi Exp $
 
 ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz.  All rights reserved.
 
@@ -196,6 +196,10 @@
        (stream-read-char stream)))
 
 (defmethod stream-read-sequence ((flexi-input-stream flexi-input-stream) sequence start end &key)
+  "An optimized version which uses a buffer underneath.  The function
+can deliver characters as well as octets and it decides what to do
+based on the element type of the sequence \(which takes precedence)
+and the element type of the stream."
   (declare #.*standard-optimize-settings*)
   (declare (fixnum start end))
   (with-accessors ((position flexi-stream-position)
@@ -207,21 +211,31 @@
                    (element-type flexi-stream-element-type)
                    (stream flexi-stream-stream))
       flexi-input-stream
-    (let ((buffer (make-octet-buffer))
-          (buffer-pos 0)
-          (buffer-end 0)
-          (index start))
-      (declare (fixnum buffer-pos buffer-end index)
+    (let* ((buffer (make-octet-buffer))
+           (buffer-pos 0)
+           (buffer-end 0)
+           (index start)
+           (want-chars-p (or (stringp sequence)
+                             (and (vectorp sequence)
+                                  (not (subtypep (array-element-type sequence) 'integer)))
+                             (type-equal element-type 'octet)))
+           (factor (if want-chars-p (encoding-factor external-format) 1))
+           (integer-factor (floor factor))
+           ;; it's an interesting question whether it makes sense
+           ;; performance-wise to make RESERVE significantly bigger
+           ;; (and thus put potentially a lot more octets into
+           ;; OCTET-STACK), especially for UTF-8
+           (reserve (if (floatp factor) (* 2 integer-factor) 0)))
+      (declare (fixnum buffer-pos buffer-end index integer-factor reserve)
                (type (array octet *) buffer))
       (flet ((compute-minimum ()
                "Computes the minimum amount of octets we can savely
 read into the buffer without violating the stream's bound \(if there
-is one) and without potentially reading more than we need."
-               ;; this has to be done conservatively, unfortunately -
-               ;; it is possible that we only fill the buffer in very
-               ;; small chunks once we're near END (but this is only
-               ;; relevant for multi-byte encodings, of course)
-               (let ((minimum (min (the fixnum (- end index)) +buffer-size+)))
+is one) and without potentially reading much more than we need."
+               (let ((minimum (min (the fixnum (+ (the fixnum (* integer-factor
+                                                                 (the fixnum (- end index))))
+                                                  reserve))
+                                   +buffer-size+)))
                  (cond (bound (min minimum (- bound position)))
                        (t minimum))))
              (fill-buffer (end)
@@ -286,7 +300,16 @@
                                            ,(sublis '((index . (1- index))) set-place)))
                                    (return-from stream-read-sequence index)))
                             (loop
-                             (when (>= index end) (leave))
+                             (when (>= index end)
+                               ;; check if there are octets in the
+                               ;; buffer we didn't use - see
+                               ;; COMPUTE-MINIMUM above
+                               (loop
+                                (when (>= buffer-pos buffer-end)
+                                  (return))
+                                (decf buffer-end)
+                                (push (aref buffer buffer-end) octet-stack))
+                               (leave))
                              (let ((next-thing ,(if octetp
                                                   '(next-octet)
                                                   '(octets-to-char-code external-format #'next-octet))))

Modified: branches/edi/output.lisp
==============================================================================
--- branches/edi/output.lisp	(original)
+++ branches/edi/output.lisp	Tue May 20 04:03:28 2008
@@ -1,5 +1,5 @@
 ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/output.lisp,v 1.52 2008/05/19 22:32:56 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/output.lisp,v 1.54 2008/05/20 06:15:44 edi Exp $
 
 ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz.  All rights reserved.
 
@@ -129,6 +129,19 @@
   ;; needed for AllegroCL - grrr...
   (stream-write-char stream #\Newline))
 
+;; TODO: file-position -> octet-stack (and others?)
+
+;; other way around: function "resync" trying to use File-position?
+
+;; "resync" independent function to empty octet-stack?
+;; (decrement-file-position) => success
+;; (resync ... &optional how-much (length octet-stack)) => success
+
+;; in stream-read-sequence: if file stream, read more into buffer,
+;; then resync with file-position?
+
+;; TODO: interaction between read and write
+
 (defmethod stream-write-sequence ((flexi-output-stream flexi-output-stream) sequence start end &key)
   "Writes all elements of the sequence SEQUENCE from START to END
 to the underlying stream.  The elements can be either octets or

Modified: branches/edi/strings.lisp
==============================================================================
--- branches/edi/strings.lisp	(original)
+++ branches/edi/strings.lisp	Tue May 20 04:03:28 2008
@@ -1,5 +1,5 @@
 ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/strings.lisp,v 1.19 2008/05/19 22:32:56 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/strings.lisp,v 1.20 2008/05/20 06:15:38 edi Exp $
 
 ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz.  All rights reserved.
 
@@ -30,10 +30,11 @@
 (in-package :flexi-streams)
 
 (defun string-to-octets (string &key
-                                (external-format (make-external-format :latin1))
+                                (external-format :latin1)
                                 (start 0) (end (length string)))
   "Converts the Lisp string STRING from START to END to an array of
-octets corresponding to the external format EXTERNAL-FORMAT."
+octets corresponding to the external format designated by
+EXTERNAL-FORMAT."
   (declare #.*standard-optimize-settings*)
   (declare (fixnum start end) (string string))
   (setq external-format (maybe-convert-external-format external-format))
@@ -87,10 +88,10 @@
            octets))))))
 
 (defun octets-to-string (sequence &key
-                                  (external-format (make-external-format :latin1))
+                                  (external-format :latin1)
                                   (start 0) (end (length sequence)))
   "Converts the Lisp sequence SEQUENCE of octets from START to END to
-string using the external format EXTERNAL-FORMAT."
+string using the external format designated by EXTERNAL-FORMAT."
   (declare #.*standard-optimize-settings*)
   (declare (fixnum start end))
   (setq external-format (maybe-convert-external-format external-format))

Modified: branches/edi/test/test.lisp
==============================================================================
--- branches/edi/test/test.lisp	(original)
+++ branches/edi/test/test.lisp	Tue May 20 04:03:28 2008
@@ -1,5 +1,5 @@
 ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS-TEST; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/test/test.lisp,v 1.28 2008/05/19 23:54:55 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/test/test.lisp,v 1.29 2008/05/20 00:37:30 edi Exp $
 
 ;;; Copyright (c) 2006-2008, Dr. Edmund Weitz.  All rights reserved.
 
@@ -367,19 +367,25 @@
       (with-open-file (in full-path :element-type 'octet)
         (let* ((in (make-flexi-stream in :external-format external-format))
                (list (make-list (- string-length 100))))
-          (check (sequence-equal (loop repeat 100
+          (check (sequence-equal (loop repeat 50
                                        collect (read-char in))
-                                 (subseq file-string 0 100)))
+                                 (subseq file-string 0 50)))
           (read-sequence list in)
-          (check (sequence-equal list (subseq file-string 100)))))
+          (check (sequence-equal list (subseq file-string 50 (- string-length 50))))
+          (check (sequence-equal (loop repeat 50
+                                       collect (read-char in))
+                                 (subseq file-string (- string-length 50))))))
       (with-open-file (in full-path :element-type 'octet)
         (let* ((in (make-flexi-stream in :external-format external-format))
                (array (make-array (- string-length 50))))
-          (check (sequence-equal (loop repeat 50
+          (check (sequence-equal (loop repeat 25
                                        collect (read-char in))
-                                 (subseq file-string 0 50)))
+                                 (subseq file-string 0 25)))
           (read-sequence array in)
-          (check (sequence-equal array (subseq file-string 50))))))))
+          (check (sequence-equal array (subseq file-string 25 (- string-length 25))))
+          (check (sequence-equal (loop repeat 25
+                                       collect (read-char in))
+                                 (subseq file-string (- string-length 25)))))))))
 
 (defmacro using-values ((&rest values) &body body)
   "Executes BODY and feeds an element from VALUES to the USE-VALUE
@@ -514,11 +520,9 @@
          (no-tests (* 8 (length compare-files-args-list))))
     #+:lispworks
     (setq no-tests (* 2 no-tests))
-    #+(or)
     (dolist (*copy-function* '(copy-stream copy-stream*))
       (dolist (args compare-files-args-list)
         (apply 'compare-files args)))
-    #+(or)
     (let ((string-test-args-list (loop for (file-name symbols) in *test-files*
                                        nconc (create-test-combinations file-name symbols t))))
       (incf no-tests (length string-test-args-list))
@@ -530,10 +534,8 @@
       (dolist (args read-sequence-test-args-list)
         (apply 'read-sequence-test args)))
     (incf no-tests)
-    #+(or)
     (error-handling-test)
     (incf no-tests)
-    #+(or)
     (unread-char-test)
     (format *error-output* "~%~%~:[~A of ~A tests failed..~;~*All ~A tests passed~].~%"
             (= no-tests *test-success-counter*) (- no-tests *test-success-counter*) no-tests)))



More information about the Flexi-streams-cvs mailing list