[flexi-streams-cvs] r57 - branches/edi

eweitz at common-lisp.net eweitz at common-lisp.net
Sun May 25 12:26:48 UTC 2008


Author: eweitz
Date: Sun May 25 08:26:47 2008
New Revision: 57

Added:
   branches/edi/length.lisp   (contents, props changed)
Modified:
   branches/edi/decode.lisp
   branches/edi/encode.lisp
   branches/edi/external-format.lisp
   branches/edi/flexi-streams.asd
Log:
Re-org


Modified: branches/edi/decode.lisp
==============================================================================
--- branches/edi/decode.lisp	(original)
+++ branches/edi/decode.lisp	Sun May 25 08:26:47 2008
@@ -1,5 +1,5 @@
 ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/decode.lisp,v 1.20 2008/05/25 03:25:30 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/decode.lisp,v 1.21 2008/05/25 12:26:02 edi Exp $
 
 ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz.  All rights reserved.
 
@@ -29,256 +29,6 @@
 
 (in-package :flexi-streams)
 
-(defgeneric compute-number-of-chars (format sequence start end warnp)
-  (declare #.*standard-optimize-settings*)
-  (:documentation "Computes the exact number of characters required to
-decode the sequence of octets in SEQUENCE from START to END using the
-external format FORMAT.  If WARNP is NIL, warnings will be muffled."))
-
-(defmethod compute-number-of-chars :around (format (list list) start end warnp)
-  (declare #.*standard-optimize-settings*)
-  (call-next-method format (coerce list 'vector) start end warnp))
-
-(defmethod compute-number-of-chars ((format flexi-8-bit-format) sequence start end warnp)
-  (declare #.*fixnum-optimize-settings*)
-  (declare (fixnum start end))
-  (declare (ignore sequence warnp))
-  (- end start))
-
-(defmethod compute-number-of-chars ((format flexi-crlf-mixin) sequence start end warnp)
-  ;; this method only applies to the 8-bit formats as all other
-  ;; formats with CRLF line endings have their own specialized methods
-  ;; below
-  (declare #.*fixnum-optimize-settings*)
-  (declare (fixnum start end))
-  (declare (ignore warnp))
-  (let ((i start)
-        (length (- end start)))
-    (declare (fixnum i length))
-    (loop
-     (when (>= i end)
-       (return))
-     (let ((position (search #.(vector +cr+ +lf+) sequence :start2 i :end2 end :test #'=)))
-       (unless position
-         (return))
-       (setq i (1+ position))
-       (decf length)))
-    length))
-
-(defgeneric check-end (format start end i warnp)
-  (declare #.*fixnum-optimize-settings*)
-  (:method (format start end i warnp)
-   (declare #.*fixnum-optimize-settings*)
-   (declare (fixnum start end i))
-   (when (and warnp (> i end))
-     (signal-encoding-warning format "These ~A octet~:P can't be ~
-decoded using ~A as the sequence is too short.  ~A octet~:P missing ~
-at then end."
-                              (- end start)
-                              (external-format-name format)
-                              (- i end))))
-  (:method ((format flexi-utf-16-format) start end i warnp)
-   (declare #.*fixnum-optimize-settings*)
-   (declare (fixnum start end i))
-   (declare (ignore i warnp))
-   ;; don't warn twice
-   (when (evenp (- end start))
-     (call-next-method))))
-
-(defmethod compute-number-of-chars ((format flexi-utf-8-format) sequence start end warnp)
-  (declare #.*fixnum-optimize-settings*)
-  (declare (fixnum start end))
-  (let ((sum 0)
-        (i start))
-    (declare (fixnum i sum))
-    (loop
-     (when (>= i end)
-       (return))
-     (let* ((octet (aref sequence i))
-            (length (cond ((not (logbitp 7 octet)) 1)
-                          ((= #b11000000 (logand octet #b11100000)) 2)
-                          ((= #b11100000 (logand octet #b11110000)) 3)
-                          (t 4))))
-       (declare (fixnum length) (type octet octet))
-       (incf sum)
-       (incf i length)))
-    (check-end format start end i warnp)
-    sum))
-
-(defmethod compute-number-of-chars ((format flexi-crlf-utf-8-format) sequence start end warnp)
-  (declare #.*fixnum-optimize-settings*)
-  (declare (fixnum start end))
-  (let ((sum 0)
-        (i start)
-        (last-octet 0))
-    (declare (fixnum i sum) (type octet last-octet))
-    (loop
-     (when (>= i end)
-       (return))
-     (let* ((octet (aref sequence i))
-            (length (cond ((not (logbitp 7 octet)) 1)
-                          ((= #b11000000 (logand octet #b11100000)) 2)
-                          ((= #b11100000 (logand octet #b11110000)) 3)
-                          (t 4))))
-       (declare (fixnum length) (type octet octet))
-       (unless (and (= octet +lf+) (= last-octet +cr+))
-         (incf sum))
-       (incf i length)
-       (setq last-octet octet)))
-    (check-end format start end i warnp)
-    sum))
-
-(defmethod compute-number-of-chars :before ((format flexi-utf-16-format) sequence start end warnp)
-  (declare #.*fixnum-optimize-settings*)
-  (declare (fixnum start end))
-  (declare (ignore sequence))
-  (when (and warnp (oddp (- end start)))
-    (signal-encoding-warning format "~A octet~:P cannot be decoded ~
-using UTF-16 as ~:*~A is not even."
-                             (- end start))))  
-  
-(defmethod compute-number-of-chars ((format flexi-utf-16-le-format) sequence start end warnp)
-  (declare #.*fixnum-optimize-settings*)
-  (declare (fixnum start end))
-  (let ((sum 0)
-        (i start))
-    (declare (fixnum i sum))
-    (decf end 2)
-    (loop
-     (when (> i end)
-       (return))
-     (let* ((high-octet (aref sequence (1+ i)))
-            (length (cond ((<= #xd8 high-octet #xdf) 4)
-                          (t 2))))
-       (declare (fixnum length) (type octet high-octet))
-       (incf sum)
-       (incf i length)))
-    (check-end format start (+ end 2) i warnp)
-    sum))
-
-(defmethod compute-number-of-chars ((format flexi-utf-16-be-format) sequence start end warnp)
-  (declare #.*fixnum-optimize-settings*)
-  (declare (fixnum start end))
-  (let ((sum 0)
-        (i start))
-    (declare (fixnum i sum))
-    (decf end 2)
-    (loop
-     (when (> i end)
-       (return))
-     (let* ((high-octet (aref sequence i))
-            (length (cond ((<= #xd8 high-octet #xdf) 4)
-                          (t 2))))
-       (declare (fixnum length) (type octet high-octet))
-       (incf sum)
-       (incf i length)))
-    (check-end format start (+ end 2) i warnp)
-    sum))
-
-(defmethod compute-number-of-chars ((format flexi-crlf-utf-16-le-format) sequence start end warnp)
-  (declare #.*fixnum-optimize-settings*)
-  (declare (fixnum start end))
-  (let ((sum 0)
-        (i start)
-        (last-octet 0))
-    (declare (fixnum i sum) (type octet last-octet))
-    (decf end 2)
-    (loop
-     (when (> i end)
-       (return))
-     (let* ((high-octet (aref sequence (1+ i)))
-            (length (cond ((<= #xd8 high-octet #xdf) 4)
-                          (t 2))))
-       (declare (fixnum length) (type octet high-octet))
-       (unless (and (zerop high-octet)
-                    (= (the octet (aref sequence i)) +lf+)
-                    (= last-octet +cr+))         
-         (incf sum))
-       (setq last-octet (if (zerop high-octet)
-                          (aref sequence i)
-                          0))
-       (incf i length)))
-    (check-end format start (+ end 2) i warnp)
-    sum))
-
-(defmethod compute-number-of-chars ((format flexi-crlf-utf-16-be-format) sequence start end warnp)
-  (declare #.*fixnum-optimize-settings*)
-  (declare (fixnum start end))
-  (let ((sum 0)
-        (i start)
-        (last-octet 0))
-    (declare (fixnum i sum) (type octet last-octet))
-    (decf end 2)
-    (loop
-     (when (> i end)
-       (return))
-     (let* ((high-octet (aref sequence i))
-            (length (cond ((<= #xd8 high-octet #xdf) 4)
-                          (t 2))))
-       (declare (fixnum length) (type octet high-octet))
-       (unless (and (zerop high-octet)
-                    (= (the octet (aref sequence (1+ i))) +lf+)
-                    (= last-octet +cr+))
-         (incf sum))
-       (setq last-octet (if (zerop high-octet)
-                          (aref sequence (1+ i))
-                          0))
-       (incf i length)))
-    (check-end format start (+ end 2) i warnp)
-    sum))
-
-(defmethod compute-number-of-chars :before ((format flexi-utf-32-format) sequence start end warnp)
-  (declare #.*fixnum-optimize-settings*)
-  (declare (fixnum start end))
-  (declare (ignore sequence))
-  (let ((length (- end start)))
-    (when (and warnp (plusp (mod length 4)))
-      (signal-encoding-warning format "~A octet~:P cannot be decoded ~
-using UTF-32 as ~:*~A is not a multiple-value of four."
-                               length))))
-
-(defmethod compute-number-of-chars ((format flexi-utf-32-format) sequence start end warnp)
-  (declare #.*fixnum-optimize-settings*)
-  (declare (fixnum start end))
-  (declare (ignore sequence warnp))
-  (ceiling (- end start) 4))
-
-(defmethod compute-number-of-chars ((format flexi-crlf-utf-32-le-format) sequence start end warnp)
-  (declare #.*fixnum-optimize-settings*)
-  (declare (fixnum start end))
-  (declare (ignore warnp))
-  (let ((i start)
-        (length (ceiling (- end start) 4)))
-    (decf end 8)
-    (loop
-     (when (> i end)
-       (return))
-     (cond ((loop for j of-type fixnum from i
-                  for octet across #.(vector +cr+ 0 0 0 +lf+ 0 0 0)
-                  always (= octet (aref sequence j)))
-            (decf length)
-            (incf i 8))
-           (t (incf i 4))))
-    length))
-
-(defmethod compute-number-of-chars ((format flexi-crlf-utf-32-be-format) sequence start end warnp)
-  (declare #.*fixnum-optimize-settings*)
-  (declare (fixnum start end))
-  (declare (ignore warnp))
-  (let ((i start)
-        (length (ceiling (- end start) 4)))
-    (decf end 8)
-    (loop
-     (when (> i end)
-       (return))
-     (cond ((loop for j of-type fixnum from i
-                  for octet across #.(vector 0 0 0 +cr+ 0 0 0 +lf+)
-                  always (= octet (aref sequence j)))
-            (decf length)
-            (incf i 8))
-           (t (incf i 4))))
-    length))
-
 (defun recover-from-encoding-error (external-format format-control &rest format-args)
   "Helper function used by OCTETS-TO-CHAR-CODE below to deal with
 encoding errors.  Checks if *SUBSTITUTION-CHAR* is not NIL and returns

Modified: branches/edi/encode.lisp
==============================================================================
--- branches/edi/encode.lisp	(original)
+++ branches/edi/encode.lisp	Sun May 25 08:26:47 2008
@@ -1,5 +1,5 @@
 ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/encode.lisp,v 1.17 2008/05/25 03:25:30 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/encode.lisp,v 1.18 2008/05/25 12:26:02 edi Exp $
 
 ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz.  All rights reserved.
 
@@ -29,125 +29,6 @@
 
 (in-package :flexi-streams)
 
-(defgeneric compute-number-of-octets (format sequence start end)
-  (declare #.*standard-optimize-settings*)
-  (:documentation "Computes the exact number of octets required to
-encode the sequence of characters in SEQUENCE from START to END using
-the external format FORMAT."))
-
-(defmethod compute-number-of-octets ((format flexi-8-bit-format) sequence start end)
-  (declare #.*fixnum-optimize-settings*)
-  (declare (fixnum start end))
-  (declare (ignore sequence))  
-  (- end start))
-
-(defmethod compute-number-of-octets ((format flexi-utf-8-format) sequence start end)
-  (declare #.*fixnum-optimize-settings*)
-  (declare (fixnum start end))
-  (let ((sum 0)
-        (i start))
-    (declare (fixnum i sum))
-    (loop
-     (when (>= i end)
-       (return))
-     (let* ((char-code (char-code (aref sequence i)))
-            (char-length (cond ((< char-code #x80) 1)
-                               ((< char-code #x800) 2)
-                               ((< char-code #x10000) 3)
-                               (t 4))))
-       (declare (fixnum char-length) (type char-code-integer char-code))
-       (incf sum char-length)
-       (incf i)))
-    sum))
-
-(defmethod compute-number-of-octets ((format flexi-crlf-utf-8-format) sequence start end)
-  (declare #.*fixnum-optimize-settings*)
-  (declare (fixnum start end))
-  (let ((sum 0)
-        (i start))
-    (declare (fixnum i sum))
-    (loop
-     (when (>= i end)
-       (return))
-     (let* ((char-code (char-code (aref sequence i)))
-            (char-length (cond ((= char-code #.(char-code #\Newline)) 2)
-                               ((< char-code #x80) 1)
-                               ((< char-code #x800) 2)
-                               ((< char-code #x10000) 3)
-                               (t 4))))
-       (declare (fixnum char-length) (type char-code-integer char-code))
-       (incf sum char-length)
-       (incf i)))
-    sum))
-
-(defmethod compute-number-of-octets ((format flexi-utf-16-format) sequence start end)
-  (declare #.*fixnum-optimize-settings*)
-  (declare (fixnum start end))
-  (let ((sum 0)
-        (i start))
-    (declare (fixnum i sum))
-    (loop
-     (when (>= i end)
-       (return))
-     (let* ((char-code (char-code (aref sequence i)))
-            (char-length (cond ((< char-code #x10000) 2)
-                               (t 4))))
-       (declare (fixnum char-length) (type char-code-integer char-code))
-       (incf sum char-length)
-       (incf i)))
-    sum))
-
-(defmethod compute-number-of-octets ((format flexi-crlf-utf-16-le-format) sequence start end)
-  (declare #.*fixnum-optimize-settings*)
-  (declare (fixnum start end))
-  (let ((sum 0)
-        (i start))
-    (declare (fixnum i sum))
-    (loop
-     (when (>= i end)
-       (return))
-     (let* ((char-code (char-code (aref sequence i)))
-            (char-length (cond ((= char-code #.(char-code #\Newline)) 4)
-                               ((< char-code #x10000) 2)
-                               (t 4))))
-       (declare (fixnum char-length) (type char-code-integer char-code))
-       (incf sum char-length)
-       (incf i)))
-    sum))
-
-(defmethod compute-number-of-octets ((format flexi-crlf-utf-16-be-format) sequence start end)
-  (declare #.*fixnum-optimize-settings*)
-  (declare (fixnum start end))
-  (let ((sum 0)
-        (i start))
-    (declare (fixnum i sum))
-    (loop
-     (when (>= i end)
-       (return))
-     (let* ((char-code (char-code (aref sequence i)))
-            (char-length (cond ((= char-code #.(char-code #\Newline)) 4)
-                               ((< char-code #x10000) 2)
-                               (t 4))))
-       (declare (fixnum char-length) (type char-code-integer char-code))
-       (incf sum char-length)
-       (incf i)))
-    sum))
-
-(defmethod compute-number-of-octets ((format flexi-utf-32-format) sequence start end)
-  (declare #.*fixnum-optimize-settings*)
-  (declare (fixnum start end))
-  (declare (ignore sequence))
-  (* 4 (- end start)))
-
-(defmethod compute-number-of-octets ((format flexi-crlf-mixin) sequence start end)
-  (declare #.*fixnum-optimize-settings*)
-  (declare (fixnum start end))
-  (+ (call-next-method)
-     (* (case (external-format-name format)
-          (:utf-32 4)
-          (otherwise 1))
-        (count #\Newline sequence :start start :end end :test #'char=))))
-
 (defgeneric char-to-octets (format char writer)
   (declare #.*standard-optimize-settings*)
   (:documentation "Converts the character CHAR to a sequence of octets

Modified: branches/edi/external-format.lisp
==============================================================================
--- branches/edi/external-format.lisp	(original)
+++ branches/edi/external-format.lisp	Sun May 25 08:26:47 2008
@@ -1,5 +1,5 @@
 ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/external-format.lisp,v 1.21 2008/05/20 23:44:45 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/external-format.lisp,v 1.22 2008/05/25 12:26:02 edi Exp $
 
 ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz.  All rights reserved.
 
@@ -387,46 +387,3 @@
 NORMALIZE-EXTERNAL-FORMAT."
   (print-unreadable-object (object stream :type t :identity t)
     (prin1 (normalize-external-format object) stream)))
-
-(defgeneric encoding-factor (format)
-  (:documentation "Given an external format FORMAT, returns a factor
-which denotes the octets to characters ratio to expect when
-encoding/decoding.  If the returned value is an integer, the factor is
-assumed to be exact.  If it is a \(double) float, the factor is
-supposed to be based on heuristics and usually not exact.
-
-This factor is used in string.lisp.")
-  (declare #.*standard-optimize-settings*))
-
-(defmethod encoding-factor ((format flexi-8-bit-format))
-  (declare #.*standard-optimize-settings*)
-  ;; 8-bit encodings map octets to characters in an exact one-to-one
-  ;; fashion
-  1)
-
-(defmethod encoding-factor ((format flexi-utf-8-format))
-  (declare #.*standard-optimize-settings*)
-  ;; UTF-8 characters can be anything from one to six octets, but we
-  ;; assume that the "overhead" is only about 5 percent - this
-  ;; estimate is obviously very much dependant on the content
-  1.05d0)
-
-(defmethod encoding-factor ((format flexi-utf-16-format))
-  (declare #.*standard-optimize-settings*)
-  ;; usually one character maps to two octets, but characters with
-  ;; code points above #x10000 map to four octets - we assume that we
-  ;; usually don't see these characters but of course have to return a
-  ;; float
-  2.0d0)
-
-(defmethod encoding-factor ((format flexi-utf-32-format))
-  (declare #.*standard-optimize-settings*)
-  ;; UTF-32 always matches every character to four octets
-  4)
-
-(defmethod encoding-factor ((format flexi-crlf-mixin))
-  (declare #.*standard-optimize-settings*)
-  ;; if the sequence #\Return #\Linefeed is the line-end marker, this
-  ;; obviously makes encodings potentially longer and definitely makes
-  ;; the estimate unexact
-  (* 1.02d0 (call-next-method)))

Modified: branches/edi/flexi-streams.asd
==============================================================================
--- branches/edi/flexi-streams.asd	(original)
+++ branches/edi/flexi-streams.asd	Sun May 25 08:26:47 2008
@@ -1,5 +1,5 @@
 ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-USER; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/flexi-streams.asd,v 1.69 2008/05/23 14:56:46 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/flexi-streams.asd,v 1.70 2008/05/25 12:26:02 edi Exp $
 
 ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz.  All rights reserved.
 
@@ -47,6 +47,7 @@
                (:file "util")
                (:file "conditions")
                (:file "external-format")
+               (:file "length")
                (:file "encode")
                (:file "decode")
                (:file "in-memory")

Added: branches/edi/length.lisp
==============================================================================
--- (empty file)
+++ branches/edi/length.lisp	Sun May 25 08:26:47 2008
@@ -0,0 +1,444 @@
+;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
+;;; $Header: /usr/local/cvsrep/flexi-streams/length.lisp,v 1.1 2008/05/25 12:26:02 edi Exp $
+
+;;; Copyright (c) 2005-2008, Dr. Edmund Weitz.  All rights reserved.
+
+;;; Redistribution and use in source and binary forms, with or without
+;;; modification, are permitted provided that the following conditions
+;;; are met:
+
+;;;   * Redistributions of source code must retain the above copyright
+;;;     notice, this list of conditions and the following disclaimer.
+
+;;;   * Redistributions in binary form must reproduce the above
+;;;     copyright notice, this list of conditions and the following
+;;;     disclaimer in the documentation and/or other materials
+;;;     provided with the distribution.
+
+;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED
+;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+;;; ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
+;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+(in-package :flexi-streams)
+
+(defgeneric encoding-factor (format)
+  (:documentation "Given an external format FORMAT, returns a factor
+which denotes the octets to characters ratio to expect when
+encoding/decoding.  If the returned value is an integer, the factor is
+assumed to be exact.  If it is a \(double) float, the factor is
+supposed to be based on heuristics and usually not exact.
+
+This factor is used in string.lisp.")
+  (declare #.*standard-optimize-settings*))
+
+(defmethod encoding-factor ((format flexi-8-bit-format))
+  (declare #.*standard-optimize-settings*)
+  ;; 8-bit encodings map octets to characters in an exact one-to-one
+  ;; fashion
+  1)
+
+(defmethod encoding-factor ((format flexi-utf-8-format))
+  (declare #.*standard-optimize-settings*)
+  ;; UTF-8 characters can be anything from one to six octets, but we
+  ;; assume that the "overhead" is only about 5 percent - this
+  ;; estimate is obviously very much dependant on the content
+  1.05d0)
+
+(defmethod encoding-factor ((format flexi-utf-16-format))
+  (declare #.*standard-optimize-settings*)
+  ;; usually one character maps to two octets, but characters with
+  ;; code points above #x10000 map to four octets - we assume that we
+  ;; usually don't see these characters but of course have to return a
+  ;; float
+  2.0d0)
+
+(defmethod encoding-factor ((format flexi-utf-32-format))
+  (declare #.*standard-optimize-settings*)
+  ;; UTF-32 always matches every character to four octets
+  4)
+
+(defmethod encoding-factor ((format flexi-crlf-mixin))
+  (declare #.*standard-optimize-settings*)
+  ;; if the sequence #\Return #\Linefeed is the line-end marker, this
+  ;; obviously makes encodings potentially longer and definitely makes
+  ;; the estimate unexact
+  (* 1.02d0 (call-next-method)))
+
+(defgeneric check-end (format start end i warnp)
+  (declare #.*fixnum-optimize-settings*)
+  (:documentation "Helper function used below to determine if we tried
+to read past the end of the sequence.")
+  (:method (format start end i warnp)
+   (declare #.*fixnum-optimize-settings*)
+   (declare (fixnum start end i))
+   (when (and warnp (> i end))
+     (signal-encoding-warning format "These ~A octet~:P can't be ~
+decoded using ~A as the sequence is too short.  ~A octet~:P missing ~
+at then end."
+                              (- end start)
+                              (external-format-name format)
+                              (- i end))))
+  (:method ((format flexi-utf-16-format) start end i warnp)
+   (declare #.*fixnum-optimize-settings*)
+   (declare (fixnum start end i))
+   (declare (ignore i warnp))
+   ;; don't warn twice
+   (when (evenp (- end start))
+     (call-next-method))))
+
+(defgeneric compute-number-of-chars (format sequence start end warnp)
+  (declare #.*standard-optimize-settings*)
+  (:documentation "Computes the exact number of characters required to
+decode the sequence of octets in SEQUENCE from START to END using the
+external format FORMAT.  If WARNP is NIL, warnings will be muffled."))
+
+(defmethod compute-number-of-chars :around (format (list list) start end warnp)
+  (declare #.*standard-optimize-settings*)
+  (call-next-method format (coerce list 'vector) start end warnp))
+
+(defmethod compute-number-of-chars ((format flexi-8-bit-format) sequence start end warnp)
+  (declare #.*fixnum-optimize-settings*)
+  (declare (fixnum start end))
+  (declare (ignore sequence warnp))
+  (- end start))
+
+(defmethod compute-number-of-chars ((format flexi-crlf-mixin) sequence start end warnp)
+  ;; this method only applies to the 8-bit formats as all other
+  ;; formats with CRLF line endings have their own specialized methods
+  ;; below
+  (declare #.*fixnum-optimize-settings*)
+  (declare (fixnum start end))
+  (declare (ignore warnp))
+  (let ((i start)
+        (length (- end start)))
+    (declare (fixnum i length))
+    (loop
+     (when (>= i end)
+       (return))
+     (let ((position (search #.(vector +cr+ +lf+) sequence :start2 i :end2 end :test #'=)))
+       (unless position
+         (return))
+       (setq i (1+ position))
+       (decf length)))
+    length))
+
+(defmethod compute-number-of-chars ((format flexi-utf-8-format) sequence start end warnp)
+  (declare #.*fixnum-optimize-settings*)
+  (declare (fixnum start end))
+  (let ((sum 0)
+        (i start))
+    (declare (fixnum i sum))
+    (loop
+     (when (>= i end)
+       (return))
+     (let* ((octet (aref sequence i))
+            (length (cond ((not (logbitp 7 octet)) 1)
+                          ((= #b11000000 (logand octet #b11100000)) 2)
+                          ((= #b11100000 (logand octet #b11110000)) 3)
+                          (t 4))))
+       (declare (fixnum length) (type octet octet))
+       (incf sum)
+       (incf i length)))
+    (check-end format start end i warnp)
+    sum))
+
+(defmethod compute-number-of-chars ((format flexi-crlf-utf-8-format) sequence start end warnp)
+  (declare #.*fixnum-optimize-settings*)
+  (declare (fixnum start end))
+  (let ((sum 0)
+        (i start)
+        (last-octet 0))
+    (declare (fixnum i sum) (type octet last-octet))
+    (loop
+     (when (>= i end)
+       (return))
+     (let* ((octet (aref sequence i))
+            (length (cond ((not (logbitp 7 octet)) 1)
+                          ((= #b11000000 (logand octet #b11100000)) 2)
+                          ((= #b11100000 (logand octet #b11110000)) 3)
+                          (t 4))))
+       (declare (fixnum length) (type octet octet))
+       (unless (and (= octet +lf+) (= last-octet +cr+))
+         (incf sum))
+       (incf i length)
+       (setq last-octet octet)))
+    (check-end format start end i warnp)
+    sum))
+
+(defmethod compute-number-of-chars :before ((format flexi-utf-16-format) sequence start end warnp)
+  (declare #.*fixnum-optimize-settings*)
+  (declare (fixnum start end))
+  (declare (ignore sequence))
+  (when (and warnp (oddp (- end start)))
+    (signal-encoding-warning format "~A octet~:P cannot be decoded ~
+using UTF-16 as ~:*~A is not even."
+                             (- end start))))  
+  
+(defmethod compute-number-of-chars ((format flexi-utf-16-le-format) sequence start end warnp)
+  (declare #.*fixnum-optimize-settings*)
+  (declare (fixnum start end))
+  (let ((sum 0)
+        (i start))
+    (declare (fixnum i sum))
+    (decf end 2)
+    (loop
+     (when (> i end)
+       (return))
+     (let* ((high-octet (aref sequence (1+ i)))
+            (length (cond ((<= #xd8 high-octet #xdf) 4)
+                          (t 2))))
+       (declare (fixnum length) (type octet high-octet))
+       (incf sum)
+       (incf i length)))
+    (check-end format start (+ end 2) i warnp)
+    sum))
+
+(defmethod compute-number-of-chars ((format flexi-utf-16-be-format) sequence start end warnp)
+  (declare #.*fixnum-optimize-settings*)
+  (declare (fixnum start end))
+  (let ((sum 0)
+        (i start))
+    (declare (fixnum i sum))
+    (decf end 2)
+    (loop
+     (when (> i end)
+       (return))
+     (let* ((high-octet (aref sequence i))
+            (length (cond ((<= #xd8 high-octet #xdf) 4)
+                          (t 2))))
+       (declare (fixnum length) (type octet high-octet))
+       (incf sum)
+       (incf i length)))
+    (check-end format start (+ end 2) i warnp)
+    sum))
+
+(defmethod compute-number-of-chars ((format flexi-crlf-utf-16-le-format) sequence start end warnp)
+  (declare #.*fixnum-optimize-settings*)
+  (declare (fixnum start end))
+  (let ((sum 0)
+        (i start)
+        (last-octet 0))
+    (declare (fixnum i sum) (type octet last-octet))
+    (decf end 2)
+    (loop
+     (when (> i end)
+       (return))
+     (let* ((high-octet (aref sequence (1+ i)))
+            (length (cond ((<= #xd8 high-octet #xdf) 4)
+                          (t 2))))
+       (declare (fixnum length) (type octet high-octet))
+       (unless (and (zerop high-octet)
+                    (= (the octet (aref sequence i)) +lf+)
+                    (= last-octet +cr+))         
+         (incf sum))
+       (setq last-octet (if (zerop high-octet)
+                          (aref sequence i)
+                          0))
+       (incf i length)))
+    (check-end format start (+ end 2) i warnp)
+    sum))
+
+(defmethod compute-number-of-chars ((format flexi-crlf-utf-16-be-format) sequence start end warnp)
+  (declare #.*fixnum-optimize-settings*)
+  (declare (fixnum start end))
+  (let ((sum 0)
+        (i start)
+        (last-octet 0))
+    (declare (fixnum i sum) (type octet last-octet))
+    (decf end 2)
+    (loop
+     (when (> i end)
+       (return))
+     (let* ((high-octet (aref sequence i))
+            (length (cond ((<= #xd8 high-octet #xdf) 4)
+                          (t 2))))
+       (declare (fixnum length) (type octet high-octet))
+       (unless (and (zerop high-octet)
+                    (= (the octet (aref sequence (1+ i))) +lf+)
+                    (= last-octet +cr+))
+         (incf sum))
+       (setq last-octet (if (zerop high-octet)
+                          (aref sequence (1+ i))
+                          0))
+       (incf i length)))
+    (check-end format start (+ end 2) i warnp)
+    sum))
+
+(defmethod compute-number-of-chars :before ((format flexi-utf-32-format) sequence start end warnp)
+  (declare #.*fixnum-optimize-settings*)
+  (declare (fixnum start end))
+  (declare (ignore sequence))
+  (let ((length (- end start)))
+    (when (and warnp (plusp (mod length 4)))
+      (signal-encoding-warning format "~A octet~:P cannot be decoded ~
+using UTF-32 as ~:*~A is not a multiple-value of four."
+                               length))))
+
+(defmethod compute-number-of-chars ((format flexi-utf-32-format) sequence start end warnp)
+  (declare #.*fixnum-optimize-settings*)
+  (declare (fixnum start end))
+  (declare (ignore sequence warnp))
+  (ceiling (- end start) 4))
+
+(defmethod compute-number-of-chars ((format flexi-crlf-utf-32-le-format) sequence start end warnp)
+  (declare #.*fixnum-optimize-settings*)
+  (declare (fixnum start end))
+  (declare (ignore warnp))
+  (let ((i start)
+        (length (ceiling (- end start) 4)))
+    (decf end 8)
+    (loop
+     (when (> i end)
+       (return))
+     (cond ((loop for j of-type fixnum from i
+                  for octet across #.(vector +cr+ 0 0 0 +lf+ 0 0 0)
+                  always (= octet (aref sequence j)))
+            (decf length)
+            (incf i 8))
+           (t (incf i 4))))
+    length))
+
+(defmethod compute-number-of-chars ((format flexi-crlf-utf-32-be-format) sequence start end warnp)
+  (declare #.*fixnum-optimize-settings*)
+  (declare (fixnum start end))
+  (declare (ignore warnp))
+  (let ((i start)
+        (length (ceiling (- end start) 4)))
+    (decf end 8)
+    (loop
+     (when (> i end)
+       (return))
+     (cond ((loop for j of-type fixnum from i
+                  for octet across #.(vector 0 0 0 +cr+ 0 0 0 +lf+)
+                  always (= octet (aref sequence j)))
+            (decf length)
+            (incf i 8))
+           (t (incf i 4))))
+    length))
+
+(defgeneric compute-number-of-octets (format sequence start end)
+  (declare #.*standard-optimize-settings*)
+  (:documentation "Computes the exact number of octets required to
+encode the sequence of characters in SEQUENCE from START to END using
+the external format FORMAT."))
+
+(defmethod compute-number-of-octets ((format flexi-8-bit-format) sequence start end)
+  (declare #.*fixnum-optimize-settings*)
+  (declare (fixnum start end))
+  (declare (ignore sequence))  
+  (- end start))
+
+(defmethod compute-number-of-octets ((format flexi-utf-8-format) sequence start end)
+  (declare #.*fixnum-optimize-settings*)
+  (declare (fixnum start end))
+  (let ((sum 0)
+        (i start))
+    (declare (fixnum i sum))
+    (loop
+     (when (>= i end)
+       (return))
+     (let* ((char-code (char-code (aref sequence i)))
+            (char-length (cond ((< char-code #x80) 1)
+                               ((< char-code #x800) 2)
+                               ((< char-code #x10000) 3)
+                               (t 4))))
+       (declare (fixnum char-length) (type char-code-integer char-code))
+       (incf sum char-length)
+       (incf i)))
+    sum))
+
+(defmethod compute-number-of-octets ((format flexi-crlf-utf-8-format) sequence start end)
+  (declare #.*fixnum-optimize-settings*)
+  (declare (fixnum start end))
+  (let ((sum 0)
+        (i start))
+    (declare (fixnum i sum))
+    (loop
+     (when (>= i end)
+       (return))
+     (let* ((char-code (char-code (aref sequence i)))
+            (char-length (cond ((= char-code #.(char-code #\Newline)) 2)
+                               ((< char-code #x80) 1)
+                               ((< char-code #x800) 2)
+                               ((< char-code #x10000) 3)
+                               (t 4))))
+       (declare (fixnum char-length) (type char-code-integer char-code))
+       (incf sum char-length)
+       (incf i)))
+    sum))
+
+(defmethod compute-number-of-octets ((format flexi-utf-16-format) sequence start end)
+  (declare #.*fixnum-optimize-settings*)
+  (declare (fixnum start end))
+  (let ((sum 0)
+        (i start))
+    (declare (fixnum i sum))
+    (loop
+     (when (>= i end)
+       (return))
+     (let* ((char-code (char-code (aref sequence i)))
+            (char-length (cond ((< char-code #x10000) 2)
+                               (t 4))))
+       (declare (fixnum char-length) (type char-code-integer char-code))
+       (incf sum char-length)
+       (incf i)))
+    sum))
+
+(defmethod compute-number-of-octets ((format flexi-crlf-utf-16-le-format) sequence start end)
+  (declare #.*fixnum-optimize-settings*)
+  (declare (fixnum start end))
+  (let ((sum 0)
+        (i start))
+    (declare (fixnum i sum))
+    (loop
+     (when (>= i end)
+       (return))
+     (let* ((char-code (char-code (aref sequence i)))
+            (char-length (cond ((= char-code #.(char-code #\Newline)) 4)
+                               ((< char-code #x10000) 2)
+                               (t 4))))
+       (declare (fixnum char-length) (type char-code-integer char-code))
+       (incf sum char-length)
+       (incf i)))
+    sum))
+
+(defmethod compute-number-of-octets ((format flexi-crlf-utf-16-be-format) sequence start end)
+  (declare #.*fixnum-optimize-settings*)
+  (declare (fixnum start end))
+  (let ((sum 0)
+        (i start))
+    (declare (fixnum i sum))
+    (loop
+     (when (>= i end)
+       (return))
+     (let* ((char-code (char-code (aref sequence i)))
+            (char-length (cond ((= char-code #.(char-code #\Newline)) 4)
+                               ((< char-code #x10000) 2)
+                               (t 4))))
+       (declare (fixnum char-length) (type char-code-integer char-code))
+       (incf sum char-length)
+       (incf i)))
+    sum))
+
+(defmethod compute-number-of-octets ((format flexi-utf-32-format) sequence start end)
+  (declare #.*fixnum-optimize-settings*)
+  (declare (fixnum start end))
+  (declare (ignore sequence))
+  (* 4 (- end start)))
+
+(defmethod compute-number-of-octets ((format flexi-crlf-mixin) sequence start end)
+  (declare #.*fixnum-optimize-settings*)
+  (declare (fixnum start end))
+  (+ (call-next-method)
+     (* (case (external-format-name format)
+          (:utf-32 4)
+          (otherwise 1))
+        (count #\Newline sequence :start start :end end :test #'char=))))
\ No newline at end of file



More information about the Flexi-streams-cvs mailing list