[flexi-streams-cvs] r61 - in branches/edi: . doc test
eweitz at common-lisp.net
eweitz at common-lisp.net
Sun May 25 23:43:23 UTC 2008
Author: eweitz
Date: Sun May 25 19:43:22 2008
New Revision: 61
Modified:
branches/edi/CHANGELOG
branches/edi/conditions.lisp
branches/edi/decode.lisp
branches/edi/doc/index.html
branches/edi/flexi-streams.asd
branches/edi/length.lisp
branches/edi/packages.lisp
branches/edi/strings.lisp
branches/edi/test/test.lisp
Log:
Ready for release
Modified: branches/edi/CHANGELOG
==============================================================================
--- branches/edi/CHANGELOG (original)
+++ branches/edi/CHANGELOG Sun May 25 19:43:22 2008
@@ -1,3 +1,10 @@
+Version 1.0.0
+2008-05-26
+More redesign for the sake of performance
+More checks for invalid data
+More tests
+Exported functions for length computation
+
Version 0.15.3
2008-05-23
Avoid CHANGE-CLASS on LispWorks if possible
Modified: branches/edi/conditions.lisp
==============================================================================
--- branches/edi/conditions.lisp (original)
+++ branches/edi/conditions.lisp Sun May 25 19:43:22 2008
@@ -1,5 +1,5 @@
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/conditions.lisp,v 1.8 2008/05/25 03:07:58 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/conditions.lisp,v 1.9 2008/05/25 22:23:58 edi Exp $
;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved.
@@ -93,21 +93,11 @@
()
(:documentation "Superclass for all errors related to external
formats."))
-
-(define-condition external-format-warning (external-format-condition warning)
- ()
- (:documentation "Superclass for all warnings related to external
-formats."))
(define-condition external-format-encoding-error (external-format-error)
()
(:documentation "Errors of this type are signalled if there is an
encoding problem."))
-
-(define-condition external-format-encoding-warning (external-format-warning)
- ()
- (:documentation "Warnings of this type are signalled if there is an
-encoding problem."))
(defun signal-encoding-error (external-format format-control &rest format-args)
"Convenience function similar to ERROR to signal conditions of type
@@ -116,11 +106,3 @@
:format-control format-control
:format-arguments format-args
:external-format external-format))
-
-(defun signal-encoding-warning (external-format format-control &rest format-args)
- "Convenience function similar to WARN to signal conditions of type
-EXTERNAL-FORMAT-ENCODING-WARNING."
- (warn 'external-format-encoding-warning
- :format-control format-control
- :format-arguments format-args
- :external-format external-format))
Modified: branches/edi/decode.lisp
==============================================================================
--- branches/edi/decode.lisp (original)
+++ branches/edi/decode.lisp Sun May 25 19:43:22 2008
@@ -1,5 +1,5 @@
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/decode.lisp,v 1.26 2008/05/25 20:44:03 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/decode.lisp,v 1.29 2008/05/25 23:19:19 edi Exp $
;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved.
@@ -202,7 +202,7 @@
(declare #.*standard-optimize-settings*)
(declare (fixnum start end))
(let* ((i start)
- (string-length (compute-number-of-chars format sequence start end nil))
+ (string-length (compute-number-of-chars format sequence start end))
(string (make-array string-length :element-type 'char*)))
(declare (fixnum i string-length))
(loop for j of-type fixnum from 0 below string-length
@@ -223,39 +223,46 @@
encoding as LF-FORMAT-CLASS but with CR instead of LF line endings and
similar for CRLF-FORMAT-CLASS, i.e. LF-FORMAT-CLASS is the base class.
BODY is a code template for the code to read octets and return one
-character. BODY must contain a symbol OCTET-GETTER representing the
-form which is used to obtain the next octet."
- `(progn
- (defmethod octets-to-char-code ((format ,lf-format-class) reader)
- (declare #.*fixnum-optimize-settings*)
- (declare (function reader))
- (symbol-macrolet ((octet-getter (funcall reader)))
- ,@(sublis '((char-decoder . octets-to-char-code))
- body)))
- (define-sequence-readers (,lf-format-class) , at body)
- (define-sequence-readers (,cr-format-class)
- ,(with-unique-names (char-code)
- `(let ((,char-code (progn , at body)))
- (case ,char-code
- (#.+cr+ #.(char-code #\Newline))
- (otherwise ,char-code)))))
- (define-sequence-readers (,crlf-format-class)
- ,(with-unique-names (char-code next-char-code get-char-code)
- `(flet ((,get-char-code () , at body))
- (let ((,char-code (,get-char-code)))
+character code. BODY must contain a symbol OCTET-GETTER representing
+the form which is used to obtain the next octet."
+ (let* ((body (with-unique-names (char-code)
+ `((let ((,char-code (progn , at body)))
+ (when (and ,char-code
+ (or (<= #xd8 (logand* #x00ff (ash* ,char-code -8)) #xdf)
+ (> ,char-code #x10ffff)))
+ (recover-from-encoding-error format "Illegal code point ~A \(#x~:*~X)." ,char-code))
+ ,char-code)))))
+ `(progn
+ (defmethod octets-to-char-code ((format ,lf-format-class) reader)
+ (declare #.*fixnum-optimize-settings*)
+ (declare (function reader))
+ (symbol-macrolet ((octet-getter (funcall reader)))
+ ,@(sublis '((char-decoder . octets-to-char-code))
+ body)))
+ (define-sequence-readers (,lf-format-class) , at body)
+ (define-sequence-readers (,cr-format-class)
+ ,(with-unique-names (char-code)
+ `(let ((,char-code (progn , at body)))
(case ,char-code
- (#.+cr+
- (let ((,next-char-code (,get-char-code)))
- (case ,next-char-code
- (#.+lf+ #.(char-code #\Newline))
- ;; we saw a CR but no LF afterwards, but then the data
- ;; ended, so we just return #\Return
- ((nil) +cr+)
- ;; if the character we peeked at wasn't a
- ;; linefeed character we unread its constituents
- (otherwise (unget (code-char ,next-char-code))
- ,char-code))))
- (otherwise ,char-code))))))))
+ (#.+cr+ #.(char-code #\Newline))
+ (otherwise ,char-code)))))
+ (define-sequence-readers (,crlf-format-class)
+ ,(with-unique-names (char-code next-char-code get-char-code)
+ `(flet ((,get-char-code () , at body))
+ (let ((,char-code (,get-char-code)))
+ (case ,char-code
+ (#.+cr+
+ (let ((,next-char-code (,get-char-code)))
+ (case ,next-char-code
+ (#.+lf+ #.(char-code #\Newline))
+ ;; we saw a CR but no LF afterwards, but then the data
+ ;; ended, so we just return #\Return
+ ((nil) +cr+)
+ ;; if the character we peeked at wasn't a
+ ;; linefeed character we unread its constituents
+ (otherwise (unget (code-char ,next-char-code))
+ ,char-code))))
+ (otherwise ,char-code)))))))))
(define-char-decoders (flexi-latin-1-format flexi-cr-latin-1-format flexi-crlf-latin-1-format)
octet-getter)
@@ -296,25 +303,28 @@
(multiple-value-bind (start count)
(cond ((not (logbitp 7 octet))
(values octet 0))
- ((= #b11000000 (logand octet #b11100000))
- (values (logand octet #b00011111) 1))
- ((= #b11100000 (logand octet #b11110000))
- (values (logand octet #b00001111) 2))
- ((= #b11110000 (logand octet #b11111000))
- (values (logand octet #b00000111) 3))
+ ((= #b11000000 (logand* octet #b11100000))
+ (when (= #b11000000 (logand* octet #b11111110))
+ (return-from char-decoder
+ (recover-from-encoding-error format
+ "Illegal value #x~X leads to `overlong' UTF-8 sequence."
+ octet)))
+ (values (logand* octet #b00011111) 1))
+ ((= #b11100000 (logand* octet #b11110000))
+ (values (logand* octet #b00001111) 2))
+ ((= #b11110000 (logand* octet #b11111000))
+ (values (logand* octet #b00000111) 3))
(t (return-from char-decoder
(recover-from-encoding-error format
"Unexpected value #x~X at start of UTF-8 sequence."
octet))))
(declare (fixnum count))
- ;; note that we currently don't check for "overlong"
- ;; sequences or other illegal values
(loop for result of-type code-point
- = start then (+ (ash result 6)
- (logand octet #b111111))
+ = start then (+ (ash* result 6)
+ (logand* octet #b111111))
repeat count
for octet of-type octet = (read-next-byte)
- unless (= #b10000000 (logand octet #b11000000))
+ unless (= #b10000000 (logand* octet #b11000000))
do (return-from char-decoder
(recover-from-encoding-error format
"Unexpected value #x~X in UTF-8 sequence." octet))
@@ -334,7 +344,7 @@
(setq first-octet-seen t))))
(flet ((read-next-word ()
(+ (the octet (read-next-byte))
- (ash (the octet (read-next-byte)) 8))))
+ (ash* (the octet (read-next-byte)) 8))))
(declare (inline read-next-word))
(let ((word (read-next-word)))
(declare (type (unsigned-byte 16) word))
@@ -346,8 +356,8 @@
(recover-from-encoding-error format
"Unexpected UTF-16 word #x~X following #x~X."
next-word word)))
- (+ (ash (logand #b1111111111 word) 10)
- (logand #b1111111111 next-word)
+ (+ (ash* (logand* #b1111111111 word) 10)
+ (logand* #b1111111111 next-word)
#x10000)))
(t word)))))))
@@ -364,7 +374,7 @@
(t (return-from char-decoder nil))))
(setq first-octet-seen t))))
(flet ((read-next-word ()
- (+ (ash (the octet (read-next-byte)) 8)
+ (+ (ash* (the octet (read-next-byte)) 8)
(the octet (read-next-byte)))))
(declare (inline read-next-word))
(let ((word (read-next-word)))
@@ -377,8 +387,8 @@
(recover-from-encoding-error format
"Unexpected UTF-16 word #x~X following #x~X."
next-word word)))
- (+ (ash (logand #b1111111111 word) 10)
- (logand #b1111111111 next-word)
+ (+ (ash* (logand* #b1111111111 word) 10)
+ (logand* #b1111111111 next-word)
#x10000)))
(t word)))))))
@@ -396,7 +406,7 @@
(setq first-octet-seen t))))
(loop for count of-type fixnum from 0 to 24 by 8
for octet of-type octet = (read-next-byte)
- sum (ash octet count)))))
+ sum (ash* octet count)))))
(define-char-decoders (flexi-utf-32-be-format flexi-cr-utf-32-be-format flexi-crlf-utf-32-be-format)
(let (first-octet-seen)
@@ -412,7 +422,7 @@
(setq first-octet-seen t))))
(loop for count of-type fixnum from 24 downto 0 by 8
for octet of-type octet = (read-next-byte)
- sum (ash octet count)))))
+ sum (ash* octet count)))))
(defmethod octets-to-char-code ((format flexi-cr-mixin) reader)
(declare #.*fixnum-optimize-settings*)
Modified: branches/edi/doc/index.html
==============================================================================
--- branches/edi/doc/index.html (original)
+++ branches/edi/doc/index.html Sun May 25 19:43:22 2008
@@ -72,7 +72,6 @@
<li><a href="#external-format-condition"><code>external-format-condition</code></a>
<li><a href="#external-format-condition-external-format"><code>external-format-condition-external-format</code></a>
<li><a href="#external-format-error"><code>external-format-error</code></a>
- <li><a href="#external-format-warning"><code>external-format-warning</code></a>
<li><a href="#external-format-encoding-error"><code>external-format-encoding-error</code></a>
<li><a href="#*substitution-char*"><code>*substitution-char*</code></a>
</ol>
@@ -229,7 +228,7 @@
<p>
FLEXI-STREAMS together with this documentation can be downloaded from <a
href="http://weitz.de/files/flexi-streams.tar.gz">http://weitz.de/files/flexi-streams.tar.gz</a>. The
-current version is 0.15.3.
+current version is 1.0.0.
<p>
Before you install FLEXI-STREAMS you first need to
install the <a
@@ -548,14 +547,6 @@
</blockquote>
<p><br>[Condition]
-<br><a class=none name="external-format-warning"><b>external-format-warning</b></a>
-
-<blockquote><br>
-All warnings related to <a href="#external-formats">external formats</a> are of this type.
-This is a subtype of <a href="#external-format-condition"><code>EXTERNAL-FORMAT-CONDITION</code></a>.
-</blockquote>
-
-<p><br>[Condition]
<br><a class=none name="external-format-error"><b>external-format-error</b></a>
<blockquote><br>
@@ -1063,7 +1054,7 @@
The defaults for
<code><i>start</i></code> and <code><i>end</i></code>
are <code>0</code> and the length of the sequence. The default
-for <code><i>external-format</i></code> is <code>:LATIN1</code>.
+for <code><i>external-format</i></code> is <code>:LATIN1</code>. Note that this function doesn't check for the validity of the data in <code><i>sequence</i></code>.
<p>
This function is optimized for the case
of <code><i>sequence</i></code> being
@@ -1110,7 +1101,7 @@
his work on making FLEXI-STREAMS faster.
<p>
-$Header: /usr/local/cvsrep/flexi-streams/doc/index.html,v 1.116 2008/05/25 19:07:55 edi Exp $
+$Header: /usr/local/cvsrep/flexi-streams/doc/index.html,v 1.119 2008/05/25 23:42:30 edi Exp $
<p><a href="http://weitz.de/index.html">BACK TO MY HOMEPAGE</a>
</body>
Modified: branches/edi/flexi-streams.asd
==============================================================================
--- branches/edi/flexi-streams.asd (original)
+++ branches/edi/flexi-streams.asd Sun May 25 19:43:22 2008
@@ -1,5 +1,5 @@
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-USER; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/flexi-streams.asd,v 1.70 2008/05/25 12:26:02 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/flexi-streams.asd,v 1.71 2008/05/25 23:42:28 edi Exp $
;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved.
@@ -35,7 +35,7 @@
(in-package :flexi-streams-system)
(defsystem :flexi-streams
- :version "0.15.3"
+ :version "1.0.0"
:serial t
:components ((:file "packages")
(:file "mapping")
Modified: branches/edi/length.lisp
==============================================================================
--- branches/edi/length.lisp (original)
+++ branches/edi/length.lisp Sun May 25 19:43:22 2008
@@ -1,5 +1,5 @@
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/length.lisp,v 1.3 2008/05/25 20:15:28 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/length.lisp,v 1.4 2008/05/25 22:23:58 edi Exp $
;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved.
@@ -72,51 +72,50 @@
;; the estimate unexact
(* 1.02d0 (call-next-method)))
-(defgeneric check-end (format start end i warnp)
+(defgeneric check-end (format start end i)
(declare #.*fixnum-optimize-settings*)
(:documentation "Helper function used below to determine if we tried
to read past the end of the sequence.")
- (:method (format start end i warnp)
+ (:method (format start end i)
(declare #.*fixnum-optimize-settings*)
(declare (fixnum start end i))
- (when (and warnp (> i end))
- (signal-encoding-warning format "These ~A octet~:P can't be ~
+ (when (> i end)
+ (signal-encoding-error format "These ~A octet~:P can't be ~
decoded using ~A as the sequence is too short. ~A octet~:P missing ~
at then end."
- (- end start)
- (external-format-name format)
- (- i end))))
- (:method ((format flexi-utf-16-format) start end i warnp)
+ (- end start)
+ (external-format-name format)
+ (- i end))))
+ (:method ((format flexi-utf-16-format) start end i)
(declare #.*fixnum-optimize-settings*)
(declare (fixnum start end i))
- (declare (ignore i warnp))
+ (declare (ignore i))
;; don't warn twice
(when (evenp (- end start))
(call-next-method))))
-(defgeneric compute-number-of-chars (format sequence start end warnp)
+(defgeneric compute-number-of-chars (format sequence start end)
(declare #.*standard-optimize-settings*)
(:documentation "Computes the exact number of characters required to
decode the sequence of octets in SEQUENCE from START to END using the
-external format FORMAT. If WARNP is NIL, warnings will be muffled."))
+external format FORMAT."))
-(defmethod compute-number-of-chars :around (format (list list) start end warnp)
+(defmethod compute-number-of-chars :around (format (list list) start end)
(declare #.*standard-optimize-settings*)
- (call-next-method format (coerce list 'vector) start end warnp))
+ (call-next-method format (coerce list 'vector) start end))
-(defmethod compute-number-of-chars ((format flexi-8-bit-format) sequence start end warnp)
+(defmethod compute-number-of-chars ((format flexi-8-bit-format) sequence start end)
(declare #.*fixnum-optimize-settings*)
(declare (fixnum start end))
- (declare (ignore sequence warnp))
+ (declare (ignore sequence))
(- end start))
-(defmethod compute-number-of-chars ((format flexi-crlf-mixin) sequence start end warnp)
+(defmethod compute-number-of-chars ((format flexi-crlf-mixin) sequence start end)
;; this method only applies to the 8-bit formats as all other
;; formats with CRLF line endings have their own specialized methods
;; below
(declare #.*fixnum-optimize-settings*)
(declare (fixnum start end) (vector sequence))
- (declare (ignore warnp))
(let ((i start)
(length (- end start)))
(declare (fixnum i length))
@@ -130,7 +129,7 @@
(decf length)))
length))
-(defmethod compute-number-of-chars ((format flexi-utf-8-format) sequence start end warnp)
+(defmethod compute-number-of-chars ((format flexi-utf-8-format) sequence start end)
(declare #.*fixnum-optimize-settings*)
(declare (fixnum start end) (vector sequence))
(let ((sum 0)
@@ -140,17 +139,18 @@
(when (>= i end)
(return))
(let* ((octet (aref sequence i))
+ ;; note that there are no validity checks here
(length (cond ((not (logbitp 7 octet)) 1)
- ((= #b11000000 (logand octet #b11100000)) 2)
- ((= #b11100000 (logand octet #b11110000)) 3)
+ ((= #b11000000 (logand* octet #b11100000)) 2)
+ ((= #b11100000 (logand* octet #b11110000)) 3)
(t 4))))
(declare (fixnum length) (type octet octet))
(incf sum)
(incf i length)))
- (check-end format start end i warnp)
+ (check-end format start end i)
sum))
-(defmethod compute-number-of-chars ((format flexi-crlf-utf-8-format) sequence start end warnp)
+(defmethod compute-number-of-chars ((format flexi-crlf-utf-8-format) sequence start end)
(declare #.*fixnum-optimize-settings*)
(declare (fixnum start end) (vector sequence))
(let ((sum 0)
@@ -161,28 +161,29 @@
(when (>= i end)
(return))
(let* ((octet (aref sequence i))
+ ;; note that there are no validity checks here
(length (cond ((not (logbitp 7 octet)) 1)
- ((= #b11000000 (logand octet #b11100000)) 2)
- ((= #b11100000 (logand octet #b11110000)) 3)
+ ((= #b11000000 (logand* octet #b11100000)) 2)
+ ((= #b11100000 (logand* octet #b11110000)) 3)
(t 4))))
(declare (fixnum length) (type octet octet))
(unless (and (= octet +lf+) (= last-octet +cr+))
(incf sum))
(incf i length)
(setq last-octet octet)))
- (check-end format start end i warnp)
+ (check-end format start end i)
sum))
-(defmethod compute-number-of-chars :before ((format flexi-utf-16-format) sequence start end warnp)
+(defmethod compute-number-of-chars :before ((format flexi-utf-16-format) sequence start end)
(declare #.*fixnum-optimize-settings*)
(declare (fixnum start end) (vector sequence))
(declare (ignore sequence))
- (when (and warnp (oddp (- end start)))
- (signal-encoding-warning format "~A octet~:P cannot be decoded ~
+ (when (oddp (- end start))
+ (signal-encoding-error format "~A octet~:P cannot be decoded ~
using UTF-16 as ~:*~A is not even."
- (- end start))))
+ (- end start))))
-(defmethod compute-number-of-chars ((format flexi-utf-16-le-format) sequence start end warnp)
+(defmethod compute-number-of-chars ((format flexi-utf-16-le-format) sequence start end)
(declare #.*fixnum-optimize-settings*)
(declare (fixnum start end))
(let ((sum 0)
@@ -198,10 +199,10 @@
(declare (fixnum length) (type octet high-octet))
(incf sum)
(incf i length)))
- (check-end format start (+ end 2) i warnp)
+ (check-end format start (+ end 2) i)
sum))
-(defmethod compute-number-of-chars ((format flexi-utf-16-be-format) sequence start end warnp)
+(defmethod compute-number-of-chars ((format flexi-utf-16-be-format) sequence start end)
(declare #.*fixnum-optimize-settings*)
(declare (fixnum start end) (vector sequence))
(let ((sum 0)
@@ -217,10 +218,10 @@
(declare (fixnum length) (type octet high-octet))
(incf sum)
(incf i length)))
- (check-end format start (+ end 2) i warnp)
+ (check-end format start (+ end 2) i)
sum))
-(defmethod compute-number-of-chars ((format flexi-crlf-utf-16-le-format) sequence start end warnp)
+(defmethod compute-number-of-chars ((format flexi-crlf-utf-16-le-format) sequence start end)
(declare #.*fixnum-optimize-settings*)
(declare (fixnum start end) (vector sequence))
(let ((sum 0)
@@ -243,10 +244,10 @@
(aref sequence i)
0))
(incf i length)))
- (check-end format start (+ end 2) i warnp)
+ (check-end format start (+ end 2) i)
sum))
-(defmethod compute-number-of-chars ((format flexi-crlf-utf-16-be-format) sequence start end warnp)
+(defmethod compute-number-of-chars ((format flexi-crlf-utf-16-be-format) sequence start end)
(declare #.*fixnum-optimize-settings*)
(declare (fixnum start end) (vector sequence))
(let ((sum 0)
@@ -269,29 +270,28 @@
(aref sequence (1+ i))
0))
(incf i length)))
- (check-end format start (+ end 2) i warnp)
+ (check-end format start (+ end 2) i)
sum))
-(defmethod compute-number-of-chars :before ((format flexi-utf-32-format) sequence start end warnp)
+(defmethod compute-number-of-chars :before ((format flexi-utf-32-format) sequence start end)
(declare #.*fixnum-optimize-settings*)
(declare (fixnum start end))
(declare (ignore sequence))
(let ((length (- end start)))
- (when (and warnp (plusp (mod length 4)))
- (signal-encoding-warning format "~A octet~:P cannot be decoded ~
+ (when (plusp (mod length 4))
+ (signal-encoding-error format "~A octet~:P cannot be decoded ~
using UTF-32 as ~:*~A is not a multiple-value of four."
- length))))
+ length))))
-(defmethod compute-number-of-chars ((format flexi-utf-32-format) sequence start end warnp)
+(defmethod compute-number-of-chars ((format flexi-utf-32-format) sequence start end)
(declare #.*fixnum-optimize-settings*)
(declare (fixnum start end))
- (declare (ignore sequence warnp))
+ (declare (ignore sequence))
(ceiling (- end start) 4))
-(defmethod compute-number-of-chars ((format flexi-crlf-utf-32-le-format) sequence start end warnp)
+(defmethod compute-number-of-chars ((format flexi-crlf-utf-32-le-format) sequence start end)
(declare #.*fixnum-optimize-settings*)
(declare (fixnum start end) (vector sequence))
- (declare (ignore warnp))
(let ((i start)
(length (ceiling (- end start) 4)))
(decf end 8)
@@ -306,10 +306,9 @@
(t (incf i 4))))
length))
-(defmethod compute-number-of-chars ((format flexi-crlf-utf-32-be-format) sequence start end warnp)
+(defmethod compute-number-of-chars ((format flexi-crlf-utf-32-be-format) sequence start end)
(declare #.*fixnum-optimize-settings*)
(declare (fixnum start end) (vector sequence))
- (declare (ignore warnp))
(let ((i start)
(length (ceiling (- end start) 4)))
(decf end 8)
Modified: branches/edi/packages.lisp
==============================================================================
--- branches/edi/packages.lisp (original)
+++ branches/edi/packages.lisp Sun May 25 19:43:22 2008
@@ -1,5 +1,5 @@
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-USER; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/packages.lisp,v 1.37 2008/05/25 03:07:59 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/packages.lisp,v 1.38 2008/05/25 22:23:58 edi Exp $
;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved.
@@ -46,12 +46,10 @@
:external-format-eol-style
:external-format-error
:external-format-encoding-error
- :external-format-encoding-warning
:external-format-equal
:external-format-id
:external-format-little-endian
:external-format-name
- :external-format-warning
:flexi-input-stream
:flexi-output-stream
:flexi-io-stream
Modified: branches/edi/strings.lisp
==============================================================================
--- branches/edi/strings.lisp (original)
+++ branches/edi/strings.lisp Sun May 25 19:43:22 2008
@@ -1,5 +1,5 @@
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/strings.lisp,v 1.30 2008/05/25 19:07:53 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/strings.lisp,v 1.32 2008/05/25 23:09:13 edi Exp $
;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved.
@@ -72,11 +72,12 @@
(defun char-length (sequence &key (external-format :latin1) (start 0) (end (length sequence)))
"Kind of the inverse of OCTET-LENGTH. Returns the length of the
subsequence \(of octets) of SEQUENCE from START to END in characters
-if decoded using the external format EXTERNAL-FORMAT.
+if decoded using the external format EXTERNAL-FORMAT. Note that this
+function doesn't check for the validity of the data in SEQUENCE.
This function is optimized for the case of SEQUENCE being a vector.
Don't use lists if you're in a hurry."
(declare #.*standard-optimize-settings*)
(declare (fixnum start end))
(setq external-format (maybe-convert-external-format external-format))
- (compute-number-of-chars external-format sequence start end t))
+ (compute-number-of-chars external-format sequence start end))
Modified: branches/edi/test/test.lisp
==============================================================================
--- branches/edi/test/test.lisp (original)
+++ branches/edi/test/test.lisp Sun May 25 19:43:22 2008
@@ -1,5 +1,5 @@
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS-TEST; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/test/test.lisp,v 1.33 2008/05/25 03:08:02 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/test/test.lisp,v 1.35 2008/05/25 23:10:47 edi Exp $
;;; Copyright (c) 2006-2008, Dr. Edmund Weitz. All rights reserved.
@@ -29,6 +29,48 @@
(in-package :flexi-streams-test)
+(defmacro with-test ((test-description) &body body)
+ "Defines a test. Two utilities are available inside of the body of
+the maco: The function FAIL, and the macro CHECK. FAIL, the lowest
+level utility, marks the test defined by WITH-TEST as failed. CHECK
+checks whether its argument is true, otherwise it calls FAIL. If
+during evaluation of the specified expression any condition is
+signalled, this is also considered a failure.
+
+WITH-TEST prints reports while the tests run. It also increments
+*TEST-SUCCESS-COUNT* if a test completes successfully."
+ (flex::with-unique-names (successp)
+ `(let ((,successp t))
+ (flet ((fail (format-str &rest format-args)
+ (setf ,successp nil)
+ (apply #'format *error-output* format-str format-args)))
+ (macrolet ((check (expression)
+ `(handler-case
+ (unless ,expression
+ (fail "Expression ~S failed.~%" ',expression))
+ (error (c)
+ (fail "Expression ~S failed signalling error of type ~A: ~A.~%"
+ ',expression (type-of c) c))))
+ (with-expected-error ((condition-type) &body body)
+ `(handler-case (progn , at body)
+ (,condition-type () t)
+ (:no-error (&rest args)
+ (declare (ignore args))
+ (fail "Expected condition ~S not signalled~%"
+ ',condition-type)))))
+ (format *error-output* "Test ~S~%" ,test-description)
+ , at body
+ (if ,successp
+ (incf *test-success-counter*)
+ (format *error-output* " Test failed!!!~%"))
+ (terpri *error-output*)
+ (terpri *error-output*))
+ ,successp))))
+
+;; LW can't indent this correctly because it's in a MACROLET
+#+:lispworks
+(editor:setup-indent "with-expected-error" 1 2 4)
+
(defconstant +buffer-size+ 8192
"Size of buffers for COPY-STREAM* below.")
@@ -245,37 +287,6 @@
(setf (fill-pointer string) (read-sequence string in))
string)))
-(defmacro with-test ((test-description) &body body)
- "Defines a test. Two utilities are available inside of the body of
-the maco: The function FAIL, and the macro CHECK. FAIL, the lowest
-level utility, marks the test defined by WITH-TEST as failed. CHECK
-checks whether its argument is true, otherwise it calls FAIL. If
-during evaluation of the specified expression any condition is
-signalled, this is also considered a failure.
-
-WITH-TEST prints reports while the tests run. It also increments
-*TEST-SUCCESS-COUNT* if a test completes successfully."
- (flex::with-unique-names (successp)
- `(let ((,successp t))
- (flet ((fail (format-str &rest format-args)
- (setf ,successp nil)
- (apply #'format *error-output* format-str format-args)))
- (macrolet ((check (expression)
- `(handler-case
- (unless ,expression
- (fail "Expression ~S failed.~%" ',expression))
- (error (c)
- (fail "Expression ~S failed signalling error of type ~A: ~A.~%"
- ',expression (type-of c) c)))))
- (format *error-output* "Test ~S~%" ,test-description)
- , at body
- (if ,successp
- (incf *test-success-counter*)
- (format *error-output* " Test failed!!!~%"))
- (terpri *error-output*)
- (terpri *error-output*))
- ,successp))))
-
(defun old-string-to-octets (string &key
(external-format (make-external-format :latin1))
(start 0) end)
@@ -460,7 +471,51 @@
(defun error-handling-test ()
"Tests several possible errors and how they are handled."
- (with-test ("Handling of errors.")
+ (with-test ("Illegal values.")
+ (macrolet ((want-encoding-error (input format)
+ `(with-expected-error (external-format-encoding-error)
+ (read-flexi-line* ,input ,format))))
+ ;; "overlong"
+ (want-encoding-error #(#b11000000) :utf-8)
+ (want-encoding-error #(#b11000001) :utf-8)
+ ;; examples of invalid lead octets
+ (want-encoding-error #(#b11111000) :utf-8)
+ (want-encoding-error #(#b11111001) :utf-8)
+ (want-encoding-error #(#b11111100) :utf-8)
+ (want-encoding-error #(#b11111101) :utf-8)
+ (want-encoding-error #(#b11111110) :utf-8)
+ (want-encoding-error #(#b11111111) :utf-8)
+ ;; illegal code points
+ (want-encoding-error #(#x00 #x00 #x11 #x00) :utf-32le)
+ (want-encoding-error #(#x00 #xd8) :utf-16le)
+ (want-encoding-error #(#xff #xdf) :utf-16le)))
+ (with-test ("Illegal lengths.")
+ (macrolet ((want-encoding-error (input format)
+ `(with-expected-error (external-format-encoding-error)
+ (read-flexi-line* ,input ,format))))
+ ;; UTF-8 sequences which are too short
+ (want-encoding-error #(#xe4 #xf6 #xfc) :utf8)
+ (want-encoding-error #(#xc0) :utf8)
+ (want-encoding-error #(#xe0 #xff) :utf8)
+ (want-encoding-error #(#xf0 #xff #xff) :utf8)
+ ;; UTF-16 wants an even number of octets
+ (want-encoding-error #(#x01) :utf-16le)
+ (want-encoding-error #(#x01 #x01 #x01) :utf-16le)
+ (want-encoding-error #(#x01) :utf-16be)
+ (want-encoding-error #(#x01 #x01 #x01) :utf-16be)
+ ;; another word should follow but it doesn't
+ (want-encoding-error #(#x01 #xd8) :utf-16le)
+ (want-encoding-error #(#xd8 #x01) :utf-16be)
+ ;; UTF-32 always wants four octets
+ (want-encoding-error #(#x01) :utf-32le)
+ (want-encoding-error #(#x01 #x01) :utf-32le)
+ (want-encoding-error #(#x01 #x01 #x01) :utf-32le)
+ (want-encoding-error #(#x01 #x01 #x01 #x01 #x01) :utf-32le)
+ (want-encoding-error #(#x01) :utf-32be)
+ (want-encoding-error #(#x01 #x01) :utf-32be)
+ (want-encoding-error #(#x01 #x01 #x01) :utf-32be)
+ (want-encoding-error #(#x01 #x01 #x01 #x01 #x01) :utf-32be)))
+ (with-test ("Errors while decoding and substitution of characters.")
;; handling of EOF in the middle of CRLF
(check (string= #.(string #\Return)
(read-flexi-line `(,(char-code #\Return)) '(:ascii :eol-style :crlf))))
@@ -472,11 +527,7 @@
(check (string= "a??" (read-flexi-line `(,(char-code #\a) 170 210) :windows-1253)))
(check (string= "a??" (read-flexi-line* `#(,(char-code #\a) 170 210) :windows-1253)))
;; not a valid UTF-8 sequence
- (check (string= "??" (read-flexi-line '(#xe4 #xf6 #xfc) :utf8)))
- (check (string= "?" (read-flexi-line* #(#xe4 #xf6 #xfc) :utf8)))
- ;; UTF-8 can't start neither with #b11111110 nor with #b11111111
- (check (string= "??" (read-flexi-line '(#b11111110 #b11111111) :utf8)))
- (check (string= "?" (read-flexi-line* #(#b11111110 #b11111111) :utf8))))
+ (check (string= "??" (read-flexi-line '(#xe4 #xf6 #xfc) :utf8))))
(let ((*substitution-char* nil))
;; :ASCII doesn't have characters with char codes > 127
(check (string= "abc" (using-values (#\b #\c)
@@ -490,16 +541,12 @@
(read-flexi-line* `#(,(char-code #\a) 170 210) :windows-1253))))
;; not a valid UTF-8 sequence
(check (string= "QW" (using-values (#\Q #\W) (read-flexi-line '(#xe4 #xf6 #xfc) :utf8))))
- (check (string= "Q" (using-values (#\Q) (read-flexi-line* #(#xe4 #xf6 #xfc) :utf8))))
;; UTF-8 can't start neither with #b11111110 nor with #b11111111
(check (string= "QW" (using-values (#\Q #\W) (read-flexi-line '(#b11111110 #b11111111) :utf8))))
- (check (string= "Q" (using-values (#\Q) (read-flexi-line* #(#b11111110 #b11111111) :utf8))))
;; only one byte
(check (string= "E" (using-values (#\E) (read-flexi-line '(#x01) :utf-16le))))
- (check (string= "" (read-flexi-line* #(#x01) :utf-16le)))
;; two bytes, but value of resulting word suggests that another word follows
(check (string= "R" (using-values (#\R) (read-flexi-line '(#x01 #xd8) :utf-16le))))
- (check (string= "R" (using-values (#\R) (read-flexi-line* #(#x01 #xd8) :utf-16le))))
;; the second word must fit into the [#xdc00; #xdfff] interval, but it is #xdbff
(check (string= "T" (using-values (#\T) (read-flexi-line '(#x01 #xd8 #xff #xdb) :utf-16le))))
(check (string= "T" (using-values (#\T) (read-flexi-line* #(#x01 #xd8 #xff #xdb) :utf-16le))))
@@ -507,11 +554,10 @@
(check (string= "E" (using-values (#\E) (read-flexi-line '(#x01) :utf-16be))))
(check (string= "R" (using-values (#\R) (read-flexi-line '(#xd8 #x01) :utf-16be))))
(check (string= "T" (using-values (#\T) (read-flexi-line '(#xd8 #x01 #xdb #xff) :utf-16be))))
- (check (string= "" (read-flexi-line* #(#x01) :utf-16be)))
- (check (string= "R" (using-values (#\R) (read-flexi-line* #(#xd8 #x01) :utf-16be))))
(check (string= "T" (using-values (#\T) (read-flexi-line* #(#xd8 #x01 #xdb #xff) :utf-16be))))
- ;; the only case when error is signalled for UTF-32 is at end of file
- ;; in the middle of 4-byte sequence, both for big and little endian
+ ;; the only case when errors are signalled for UTF-32 is at end
+ ;; of file in the middle of 4-byte sequence, both for big and
+ ;; little endian
(check (string= "Y" (using-values (#\Y) (read-flexi-line '(#x01) :utf-32le))))
(check (string= "Y" (using-values (#\Y) (read-flexi-line '(#x01 #x01) :utf-32le))))
(check (string= "Y" (using-values (#\Y) (read-flexi-line '(#x01 #x01 #x01) :utf-32le))))
@@ -521,17 +567,7 @@
(check (string= "Y" (using-values (#\Y) (read-flexi-line '(#x01 #x01) :utf-32be))))
(check (string= "Y" (using-values (#\Y) (read-flexi-line '(#x01 #x01 #x01) :utf-32be))))
(check (string= "aY" (using-values (#\Y)
- (read-flexi-line `(#x00 #x00 #x00 ,(char-code #\a) #x01) :utf-32be))))
- (check (string= "Y" (using-values (#\Y) (read-flexi-line* #(#x01) :utf-32le))))
- (check (string= "Y" (using-values (#\Y) (read-flexi-line* #(#x01 #x01) :utf-32le))))
- (check (string= "Y" (using-values (#\Y) (read-flexi-line* #(#x01 #x01 #x01) :utf-32le))))
- (check (string= "aY" (using-values (#\Y)
- (read-flexi-line* `#(,(char-code #\a) #x00 #x00 #x00 #x01) :utf-32le))))
- (check (string= "Y" (using-values (#\Y) (read-flexi-line* #(#x01) :utf-32be))))
- (check (string= "Y" (using-values (#\Y) (read-flexi-line* #(#x01 #x01) :utf-32be))))
- (check (string= "Y" (using-values (#\Y) (read-flexi-line* #(#x01 #x01 #x01) :utf-32be))))
- (check (string= "aY" (using-values (#\Y)
- (read-flexi-line* `#(#x00 #x00 #x00 ,(char-code #\a) #x01) :utf-32be)))))))
+ (read-flexi-line `(#x00 #x00 #x00 ,(char-code #\a) #x01) :utf-32be)))))))
(defun unread-char-test ()
"Tests whether UNREAD-CHAR behaves as expected."
@@ -572,7 +608,7 @@
(incf no-tests (length read-sequence-test-args-list))
(dolist (args read-sequence-test-args-list)
(apply 'sequence-test args)))
- (incf no-tests)
+ (incf no-tests 3)
(error-handling-test)
(incf no-tests)
(unread-char-test)
More information about the Flexi-streams-cvs
mailing list