[flexi-streams-cvs] r57 - branches/edi
eweitz at common-lisp.net
eweitz at common-lisp.net
Sun May 25 12:26:48 UTC 2008
Author: eweitz
Date: Sun May 25 08:26:47 2008
New Revision: 57
Added:
branches/edi/length.lisp (contents, props changed)
Modified:
branches/edi/decode.lisp
branches/edi/encode.lisp
branches/edi/external-format.lisp
branches/edi/flexi-streams.asd
Log:
Re-org
Modified: branches/edi/decode.lisp
==============================================================================
--- branches/edi/decode.lisp (original)
+++ branches/edi/decode.lisp Sun May 25 08:26:47 2008
@@ -1,5 +1,5 @@
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/decode.lisp,v 1.20 2008/05/25 03:25:30 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/decode.lisp,v 1.21 2008/05/25 12:26:02 edi Exp $
;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved.
@@ -29,256 +29,6 @@
(in-package :flexi-streams)
-(defgeneric compute-number-of-chars (format sequence start end warnp)
- (declare #.*standard-optimize-settings*)
- (:documentation "Computes the exact number of characters required to
-decode the sequence of octets in SEQUENCE from START to END using the
-external format FORMAT. If WARNP is NIL, warnings will be muffled."))
-
-(defmethod compute-number-of-chars :around (format (list list) start end warnp)
- (declare #.*standard-optimize-settings*)
- (call-next-method format (coerce list 'vector) start end warnp))
-
-(defmethod compute-number-of-chars ((format flexi-8-bit-format) sequence start end warnp)
- (declare #.*fixnum-optimize-settings*)
- (declare (fixnum start end))
- (declare (ignore sequence warnp))
- (- end start))
-
-(defmethod compute-number-of-chars ((format flexi-crlf-mixin) sequence start end warnp)
- ;; this method only applies to the 8-bit formats as all other
- ;; formats with CRLF line endings have their own specialized methods
- ;; below
- (declare #.*fixnum-optimize-settings*)
- (declare (fixnum start end))
- (declare (ignore warnp))
- (let ((i start)
- (length (- end start)))
- (declare (fixnum i length))
- (loop
- (when (>= i end)
- (return))
- (let ((position (search #.(vector +cr+ +lf+) sequence :start2 i :end2 end :test #'=)))
- (unless position
- (return))
- (setq i (1+ position))
- (decf length)))
- length))
-
-(defgeneric check-end (format start end i warnp)
- (declare #.*fixnum-optimize-settings*)
- (:method (format start end i warnp)
- (declare #.*fixnum-optimize-settings*)
- (declare (fixnum start end i))
- (when (and warnp (> i end))
- (signal-encoding-warning format "These ~A octet~:P can't be ~
-decoded using ~A as the sequence is too short. ~A octet~:P missing ~
-at then end."
- (- end start)
- (external-format-name format)
- (- i end))))
- (:method ((format flexi-utf-16-format) start end i warnp)
- (declare #.*fixnum-optimize-settings*)
- (declare (fixnum start end i))
- (declare (ignore i warnp))
- ;; don't warn twice
- (when (evenp (- end start))
- (call-next-method))))
-
-(defmethod compute-number-of-chars ((format flexi-utf-8-format) sequence start end warnp)
- (declare #.*fixnum-optimize-settings*)
- (declare (fixnum start end))
- (let ((sum 0)
- (i start))
- (declare (fixnum i sum))
- (loop
- (when (>= i end)
- (return))
- (let* ((octet (aref sequence i))
- (length (cond ((not (logbitp 7 octet)) 1)
- ((= #b11000000 (logand octet #b11100000)) 2)
- ((= #b11100000 (logand octet #b11110000)) 3)
- (t 4))))
- (declare (fixnum length) (type octet octet))
- (incf sum)
- (incf i length)))
- (check-end format start end i warnp)
- sum))
-
-(defmethod compute-number-of-chars ((format flexi-crlf-utf-8-format) sequence start end warnp)
- (declare #.*fixnum-optimize-settings*)
- (declare (fixnum start end))
- (let ((sum 0)
- (i start)
- (last-octet 0))
- (declare (fixnum i sum) (type octet last-octet))
- (loop
- (when (>= i end)
- (return))
- (let* ((octet (aref sequence i))
- (length (cond ((not (logbitp 7 octet)) 1)
- ((= #b11000000 (logand octet #b11100000)) 2)
- ((= #b11100000 (logand octet #b11110000)) 3)
- (t 4))))
- (declare (fixnum length) (type octet octet))
- (unless (and (= octet +lf+) (= last-octet +cr+))
- (incf sum))
- (incf i length)
- (setq last-octet octet)))
- (check-end format start end i warnp)
- sum))
-
-(defmethod compute-number-of-chars :before ((format flexi-utf-16-format) sequence start end warnp)
- (declare #.*fixnum-optimize-settings*)
- (declare (fixnum start end))
- (declare (ignore sequence))
- (when (and warnp (oddp (- end start)))
- (signal-encoding-warning format "~A octet~:P cannot be decoded ~
-using UTF-16 as ~:*~A is not even."
- (- end start))))
-
-(defmethod compute-number-of-chars ((format flexi-utf-16-le-format) sequence start end warnp)
- (declare #.*fixnum-optimize-settings*)
- (declare (fixnum start end))
- (let ((sum 0)
- (i start))
- (declare (fixnum i sum))
- (decf end 2)
- (loop
- (when (> i end)
- (return))
- (let* ((high-octet (aref sequence (1+ i)))
- (length (cond ((<= #xd8 high-octet #xdf) 4)
- (t 2))))
- (declare (fixnum length) (type octet high-octet))
- (incf sum)
- (incf i length)))
- (check-end format start (+ end 2) i warnp)
- sum))
-
-(defmethod compute-number-of-chars ((format flexi-utf-16-be-format) sequence start end warnp)
- (declare #.*fixnum-optimize-settings*)
- (declare (fixnum start end))
- (let ((sum 0)
- (i start))
- (declare (fixnum i sum))
- (decf end 2)
- (loop
- (when (> i end)
- (return))
- (let* ((high-octet (aref sequence i))
- (length (cond ((<= #xd8 high-octet #xdf) 4)
- (t 2))))
- (declare (fixnum length) (type octet high-octet))
- (incf sum)
- (incf i length)))
- (check-end format start (+ end 2) i warnp)
- sum))
-
-(defmethod compute-number-of-chars ((format flexi-crlf-utf-16-le-format) sequence start end warnp)
- (declare #.*fixnum-optimize-settings*)
- (declare (fixnum start end))
- (let ((sum 0)
- (i start)
- (last-octet 0))
- (declare (fixnum i sum) (type octet last-octet))
- (decf end 2)
- (loop
- (when (> i end)
- (return))
- (let* ((high-octet (aref sequence (1+ i)))
- (length (cond ((<= #xd8 high-octet #xdf) 4)
- (t 2))))
- (declare (fixnum length) (type octet high-octet))
- (unless (and (zerop high-octet)
- (= (the octet (aref sequence i)) +lf+)
- (= last-octet +cr+))
- (incf sum))
- (setq last-octet (if (zerop high-octet)
- (aref sequence i)
- 0))
- (incf i length)))
- (check-end format start (+ end 2) i warnp)
- sum))
-
-(defmethod compute-number-of-chars ((format flexi-crlf-utf-16-be-format) sequence start end warnp)
- (declare #.*fixnum-optimize-settings*)
- (declare (fixnum start end))
- (let ((sum 0)
- (i start)
- (last-octet 0))
- (declare (fixnum i sum) (type octet last-octet))
- (decf end 2)
- (loop
- (when (> i end)
- (return))
- (let* ((high-octet (aref sequence i))
- (length (cond ((<= #xd8 high-octet #xdf) 4)
- (t 2))))
- (declare (fixnum length) (type octet high-octet))
- (unless (and (zerop high-octet)
- (= (the octet (aref sequence (1+ i))) +lf+)
- (= last-octet +cr+))
- (incf sum))
- (setq last-octet (if (zerop high-octet)
- (aref sequence (1+ i))
- 0))
- (incf i length)))
- (check-end format start (+ end 2) i warnp)
- sum))
-
-(defmethod compute-number-of-chars :before ((format flexi-utf-32-format) sequence start end warnp)
- (declare #.*fixnum-optimize-settings*)
- (declare (fixnum start end))
- (declare (ignore sequence))
- (let ((length (- end start)))
- (when (and warnp (plusp (mod length 4)))
- (signal-encoding-warning format "~A octet~:P cannot be decoded ~
-using UTF-32 as ~:*~A is not a multiple-value of four."
- length))))
-
-(defmethod compute-number-of-chars ((format flexi-utf-32-format) sequence start end warnp)
- (declare #.*fixnum-optimize-settings*)
- (declare (fixnum start end))
- (declare (ignore sequence warnp))
- (ceiling (- end start) 4))
-
-(defmethod compute-number-of-chars ((format flexi-crlf-utf-32-le-format) sequence start end warnp)
- (declare #.*fixnum-optimize-settings*)
- (declare (fixnum start end))
- (declare (ignore warnp))
- (let ((i start)
- (length (ceiling (- end start) 4)))
- (decf end 8)
- (loop
- (when (> i end)
- (return))
- (cond ((loop for j of-type fixnum from i
- for octet across #.(vector +cr+ 0 0 0 +lf+ 0 0 0)
- always (= octet (aref sequence j)))
- (decf length)
- (incf i 8))
- (t (incf i 4))))
- length))
-
-(defmethod compute-number-of-chars ((format flexi-crlf-utf-32-be-format) sequence start end warnp)
- (declare #.*fixnum-optimize-settings*)
- (declare (fixnum start end))
- (declare (ignore warnp))
- (let ((i start)
- (length (ceiling (- end start) 4)))
- (decf end 8)
- (loop
- (when (> i end)
- (return))
- (cond ((loop for j of-type fixnum from i
- for octet across #.(vector 0 0 0 +cr+ 0 0 0 +lf+)
- always (= octet (aref sequence j)))
- (decf length)
- (incf i 8))
- (t (incf i 4))))
- length))
-
(defun recover-from-encoding-error (external-format format-control &rest format-args)
"Helper function used by OCTETS-TO-CHAR-CODE below to deal with
encoding errors. Checks if *SUBSTITUTION-CHAR* is not NIL and returns
Modified: branches/edi/encode.lisp
==============================================================================
--- branches/edi/encode.lisp (original)
+++ branches/edi/encode.lisp Sun May 25 08:26:47 2008
@@ -1,5 +1,5 @@
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/encode.lisp,v 1.17 2008/05/25 03:25:30 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/encode.lisp,v 1.18 2008/05/25 12:26:02 edi Exp $
;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved.
@@ -29,125 +29,6 @@
(in-package :flexi-streams)
-(defgeneric compute-number-of-octets (format sequence start end)
- (declare #.*standard-optimize-settings*)
- (:documentation "Computes the exact number of octets required to
-encode the sequence of characters in SEQUENCE from START to END using
-the external format FORMAT."))
-
-(defmethod compute-number-of-octets ((format flexi-8-bit-format) sequence start end)
- (declare #.*fixnum-optimize-settings*)
- (declare (fixnum start end))
- (declare (ignore sequence))
- (- end start))
-
-(defmethod compute-number-of-octets ((format flexi-utf-8-format) sequence start end)
- (declare #.*fixnum-optimize-settings*)
- (declare (fixnum start end))
- (let ((sum 0)
- (i start))
- (declare (fixnum i sum))
- (loop
- (when (>= i end)
- (return))
- (let* ((char-code (char-code (aref sequence i)))
- (char-length (cond ((< char-code #x80) 1)
- ((< char-code #x800) 2)
- ((< char-code #x10000) 3)
- (t 4))))
- (declare (fixnum char-length) (type char-code-integer char-code))
- (incf sum char-length)
- (incf i)))
- sum))
-
-(defmethod compute-number-of-octets ((format flexi-crlf-utf-8-format) sequence start end)
- (declare #.*fixnum-optimize-settings*)
- (declare (fixnum start end))
- (let ((sum 0)
- (i start))
- (declare (fixnum i sum))
- (loop
- (when (>= i end)
- (return))
- (let* ((char-code (char-code (aref sequence i)))
- (char-length (cond ((= char-code #.(char-code #\Newline)) 2)
- ((< char-code #x80) 1)
- ((< char-code #x800) 2)
- ((< char-code #x10000) 3)
- (t 4))))
- (declare (fixnum char-length) (type char-code-integer char-code))
- (incf sum char-length)
- (incf i)))
- sum))
-
-(defmethod compute-number-of-octets ((format flexi-utf-16-format) sequence start end)
- (declare #.*fixnum-optimize-settings*)
- (declare (fixnum start end))
- (let ((sum 0)
- (i start))
- (declare (fixnum i sum))
- (loop
- (when (>= i end)
- (return))
- (let* ((char-code (char-code (aref sequence i)))
- (char-length (cond ((< char-code #x10000) 2)
- (t 4))))
- (declare (fixnum char-length) (type char-code-integer char-code))
- (incf sum char-length)
- (incf i)))
- sum))
-
-(defmethod compute-number-of-octets ((format flexi-crlf-utf-16-le-format) sequence start end)
- (declare #.*fixnum-optimize-settings*)
- (declare (fixnum start end))
- (let ((sum 0)
- (i start))
- (declare (fixnum i sum))
- (loop
- (when (>= i end)
- (return))
- (let* ((char-code (char-code (aref sequence i)))
- (char-length (cond ((= char-code #.(char-code #\Newline)) 4)
- ((< char-code #x10000) 2)
- (t 4))))
- (declare (fixnum char-length) (type char-code-integer char-code))
- (incf sum char-length)
- (incf i)))
- sum))
-
-(defmethod compute-number-of-octets ((format flexi-crlf-utf-16-be-format) sequence start end)
- (declare #.*fixnum-optimize-settings*)
- (declare (fixnum start end))
- (let ((sum 0)
- (i start))
- (declare (fixnum i sum))
- (loop
- (when (>= i end)
- (return))
- (let* ((char-code (char-code (aref sequence i)))
- (char-length (cond ((= char-code #.(char-code #\Newline)) 4)
- ((< char-code #x10000) 2)
- (t 4))))
- (declare (fixnum char-length) (type char-code-integer char-code))
- (incf sum char-length)
- (incf i)))
- sum))
-
-(defmethod compute-number-of-octets ((format flexi-utf-32-format) sequence start end)
- (declare #.*fixnum-optimize-settings*)
- (declare (fixnum start end))
- (declare (ignore sequence))
- (* 4 (- end start)))
-
-(defmethod compute-number-of-octets ((format flexi-crlf-mixin) sequence start end)
- (declare #.*fixnum-optimize-settings*)
- (declare (fixnum start end))
- (+ (call-next-method)
- (* (case (external-format-name format)
- (:utf-32 4)
- (otherwise 1))
- (count #\Newline sequence :start start :end end :test #'char=))))
-
(defgeneric char-to-octets (format char writer)
(declare #.*standard-optimize-settings*)
(:documentation "Converts the character CHAR to a sequence of octets
Modified: branches/edi/external-format.lisp
==============================================================================
--- branches/edi/external-format.lisp (original)
+++ branches/edi/external-format.lisp Sun May 25 08:26:47 2008
@@ -1,5 +1,5 @@
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/external-format.lisp,v 1.21 2008/05/20 23:44:45 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/external-format.lisp,v 1.22 2008/05/25 12:26:02 edi Exp $
;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved.
@@ -387,46 +387,3 @@
NORMALIZE-EXTERNAL-FORMAT."
(print-unreadable-object (object stream :type t :identity t)
(prin1 (normalize-external-format object) stream)))
-
-(defgeneric encoding-factor (format)
- (:documentation "Given an external format FORMAT, returns a factor
-which denotes the octets to characters ratio to expect when
-encoding/decoding. If the returned value is an integer, the factor is
-assumed to be exact. If it is a \(double) float, the factor is
-supposed to be based on heuristics and usually not exact.
-
-This factor is used in string.lisp.")
- (declare #.*standard-optimize-settings*))
-
-(defmethod encoding-factor ((format flexi-8-bit-format))
- (declare #.*standard-optimize-settings*)
- ;; 8-bit encodings map octets to characters in an exact one-to-one
- ;; fashion
- 1)
-
-(defmethod encoding-factor ((format flexi-utf-8-format))
- (declare #.*standard-optimize-settings*)
- ;; UTF-8 characters can be anything from one to six octets, but we
- ;; assume that the "overhead" is only about 5 percent - this
- ;; estimate is obviously very much dependant on the content
- 1.05d0)
-
-(defmethod encoding-factor ((format flexi-utf-16-format))
- (declare #.*standard-optimize-settings*)
- ;; usually one character maps to two octets, but characters with
- ;; code points above #x10000 map to four octets - we assume that we
- ;; usually don't see these characters but of course have to return a
- ;; float
- 2.0d0)
-
-(defmethod encoding-factor ((format flexi-utf-32-format))
- (declare #.*standard-optimize-settings*)
- ;; UTF-32 always matches every character to four octets
- 4)
-
-(defmethod encoding-factor ((format flexi-crlf-mixin))
- (declare #.*standard-optimize-settings*)
- ;; if the sequence #\Return #\Linefeed is the line-end marker, this
- ;; obviously makes encodings potentially longer and definitely makes
- ;; the estimate unexact
- (* 1.02d0 (call-next-method)))
Modified: branches/edi/flexi-streams.asd
==============================================================================
--- branches/edi/flexi-streams.asd (original)
+++ branches/edi/flexi-streams.asd Sun May 25 08:26:47 2008
@@ -1,5 +1,5 @@
;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: CL-USER; Base: 10 -*-
-;;; $Header: /usr/local/cvsrep/flexi-streams/flexi-streams.asd,v 1.69 2008/05/23 14:56:46 edi Exp $
+;;; $Header: /usr/local/cvsrep/flexi-streams/flexi-streams.asd,v 1.70 2008/05/25 12:26:02 edi Exp $
;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved.
@@ -47,6 +47,7 @@
(:file "util")
(:file "conditions")
(:file "external-format")
+ (:file "length")
(:file "encode")
(:file "decode")
(:file "in-memory")
Added: branches/edi/length.lisp
==============================================================================
--- (empty file)
+++ branches/edi/length.lisp Sun May 25 08:26:47 2008
@@ -0,0 +1,444 @@
+;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
+;;; $Header: /usr/local/cvsrep/flexi-streams/length.lisp,v 1.1 2008/05/25 12:26:02 edi Exp $
+
+;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved.
+
+;;; Redistribution and use in source and binary forms, with or without
+;;; modification, are permitted provided that the following conditions
+;;; are met:
+
+;;; * Redistributions of source code must retain the above copyright
+;;; notice, this list of conditions and the following disclaimer.
+
+;;; * Redistributions in binary form must reproduce the above
+;;; copyright notice, this list of conditions and the following
+;;; disclaimer in the documentation and/or other materials
+;;; provided with the distribution.
+
+;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED
+;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
+;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+(in-package :flexi-streams)
+
+(defgeneric encoding-factor (format)
+ (:documentation "Given an external format FORMAT, returns a factor
+which denotes the octets to characters ratio to expect when
+encoding/decoding. If the returned value is an integer, the factor is
+assumed to be exact. If it is a \(double) float, the factor is
+supposed to be based on heuristics and usually not exact.
+
+This factor is used in string.lisp.")
+ (declare #.*standard-optimize-settings*))
+
+(defmethod encoding-factor ((format flexi-8-bit-format))
+ (declare #.*standard-optimize-settings*)
+ ;; 8-bit encodings map octets to characters in an exact one-to-one
+ ;; fashion
+ 1)
+
+(defmethod encoding-factor ((format flexi-utf-8-format))
+ (declare #.*standard-optimize-settings*)
+ ;; UTF-8 characters can be anything from one to six octets, but we
+ ;; assume that the "overhead" is only about 5 percent - this
+ ;; estimate is obviously very much dependant on the content
+ 1.05d0)
+
+(defmethod encoding-factor ((format flexi-utf-16-format))
+ (declare #.*standard-optimize-settings*)
+ ;; usually one character maps to two octets, but characters with
+ ;; code points above #x10000 map to four octets - we assume that we
+ ;; usually don't see these characters but of course have to return a
+ ;; float
+ 2.0d0)
+
+(defmethod encoding-factor ((format flexi-utf-32-format))
+ (declare #.*standard-optimize-settings*)
+ ;; UTF-32 always matches every character to four octets
+ 4)
+
+(defmethod encoding-factor ((format flexi-crlf-mixin))
+ (declare #.*standard-optimize-settings*)
+ ;; if the sequence #\Return #\Linefeed is the line-end marker, this
+ ;; obviously makes encodings potentially longer and definitely makes
+ ;; the estimate unexact
+ (* 1.02d0 (call-next-method)))
+
+(defgeneric check-end (format start end i warnp)
+ (declare #.*fixnum-optimize-settings*)
+ (:documentation "Helper function used below to determine if we tried
+to read past the end of the sequence.")
+ (:method (format start end i warnp)
+ (declare #.*fixnum-optimize-settings*)
+ (declare (fixnum start end i))
+ (when (and warnp (> i end))
+ (signal-encoding-warning format "These ~A octet~:P can't be ~
+decoded using ~A as the sequence is too short. ~A octet~:P missing ~
+at then end."
+ (- end start)
+ (external-format-name format)
+ (- i end))))
+ (:method ((format flexi-utf-16-format) start end i warnp)
+ (declare #.*fixnum-optimize-settings*)
+ (declare (fixnum start end i))
+ (declare (ignore i warnp))
+ ;; don't warn twice
+ (when (evenp (- end start))
+ (call-next-method))))
+
+(defgeneric compute-number-of-chars (format sequence start end warnp)
+ (declare #.*standard-optimize-settings*)
+ (:documentation "Computes the exact number of characters required to
+decode the sequence of octets in SEQUENCE from START to END using the
+external format FORMAT. If WARNP is NIL, warnings will be muffled."))
+
+(defmethod compute-number-of-chars :around (format (list list) start end warnp)
+ (declare #.*standard-optimize-settings*)
+ (call-next-method format (coerce list 'vector) start end warnp))
+
+(defmethod compute-number-of-chars ((format flexi-8-bit-format) sequence start end warnp)
+ (declare #.*fixnum-optimize-settings*)
+ (declare (fixnum start end))
+ (declare (ignore sequence warnp))
+ (- end start))
+
+(defmethod compute-number-of-chars ((format flexi-crlf-mixin) sequence start end warnp)
+ ;; this method only applies to the 8-bit formats as all other
+ ;; formats with CRLF line endings have their own specialized methods
+ ;; below
+ (declare #.*fixnum-optimize-settings*)
+ (declare (fixnum start end))
+ (declare (ignore warnp))
+ (let ((i start)
+ (length (- end start)))
+ (declare (fixnum i length))
+ (loop
+ (when (>= i end)
+ (return))
+ (let ((position (search #.(vector +cr+ +lf+) sequence :start2 i :end2 end :test #'=)))
+ (unless position
+ (return))
+ (setq i (1+ position))
+ (decf length)))
+ length))
+
+(defmethod compute-number-of-chars ((format flexi-utf-8-format) sequence start end warnp)
+ (declare #.*fixnum-optimize-settings*)
+ (declare (fixnum start end))
+ (let ((sum 0)
+ (i start))
+ (declare (fixnum i sum))
+ (loop
+ (when (>= i end)
+ (return))
+ (let* ((octet (aref sequence i))
+ (length (cond ((not (logbitp 7 octet)) 1)
+ ((= #b11000000 (logand octet #b11100000)) 2)
+ ((= #b11100000 (logand octet #b11110000)) 3)
+ (t 4))))
+ (declare (fixnum length) (type octet octet))
+ (incf sum)
+ (incf i length)))
+ (check-end format start end i warnp)
+ sum))
+
+(defmethod compute-number-of-chars ((format flexi-crlf-utf-8-format) sequence start end warnp)
+ (declare #.*fixnum-optimize-settings*)
+ (declare (fixnum start end))
+ (let ((sum 0)
+ (i start)
+ (last-octet 0))
+ (declare (fixnum i sum) (type octet last-octet))
+ (loop
+ (when (>= i end)
+ (return))
+ (let* ((octet (aref sequence i))
+ (length (cond ((not (logbitp 7 octet)) 1)
+ ((= #b11000000 (logand octet #b11100000)) 2)
+ ((= #b11100000 (logand octet #b11110000)) 3)
+ (t 4))))
+ (declare (fixnum length) (type octet octet))
+ (unless (and (= octet +lf+) (= last-octet +cr+))
+ (incf sum))
+ (incf i length)
+ (setq last-octet octet)))
+ (check-end format start end i warnp)
+ sum))
+
+(defmethod compute-number-of-chars :before ((format flexi-utf-16-format) sequence start end warnp)
+ (declare #.*fixnum-optimize-settings*)
+ (declare (fixnum start end))
+ (declare (ignore sequence))
+ (when (and warnp (oddp (- end start)))
+ (signal-encoding-warning format "~A octet~:P cannot be decoded ~
+using UTF-16 as ~:*~A is not even."
+ (- end start))))
+
+(defmethod compute-number-of-chars ((format flexi-utf-16-le-format) sequence start end warnp)
+ (declare #.*fixnum-optimize-settings*)
+ (declare (fixnum start end))
+ (let ((sum 0)
+ (i start))
+ (declare (fixnum i sum))
+ (decf end 2)
+ (loop
+ (when (> i end)
+ (return))
+ (let* ((high-octet (aref sequence (1+ i)))
+ (length (cond ((<= #xd8 high-octet #xdf) 4)
+ (t 2))))
+ (declare (fixnum length) (type octet high-octet))
+ (incf sum)
+ (incf i length)))
+ (check-end format start (+ end 2) i warnp)
+ sum))
+
+(defmethod compute-number-of-chars ((format flexi-utf-16-be-format) sequence start end warnp)
+ (declare #.*fixnum-optimize-settings*)
+ (declare (fixnum start end))
+ (let ((sum 0)
+ (i start))
+ (declare (fixnum i sum))
+ (decf end 2)
+ (loop
+ (when (> i end)
+ (return))
+ (let* ((high-octet (aref sequence i))
+ (length (cond ((<= #xd8 high-octet #xdf) 4)
+ (t 2))))
+ (declare (fixnum length) (type octet high-octet))
+ (incf sum)
+ (incf i length)))
+ (check-end format start (+ end 2) i warnp)
+ sum))
+
+(defmethod compute-number-of-chars ((format flexi-crlf-utf-16-le-format) sequence start end warnp)
+ (declare #.*fixnum-optimize-settings*)
+ (declare (fixnum start end))
+ (let ((sum 0)
+ (i start)
+ (last-octet 0))
+ (declare (fixnum i sum) (type octet last-octet))
+ (decf end 2)
+ (loop
+ (when (> i end)
+ (return))
+ (let* ((high-octet (aref sequence (1+ i)))
+ (length (cond ((<= #xd8 high-octet #xdf) 4)
+ (t 2))))
+ (declare (fixnum length) (type octet high-octet))
+ (unless (and (zerop high-octet)
+ (= (the octet (aref sequence i)) +lf+)
+ (= last-octet +cr+))
+ (incf sum))
+ (setq last-octet (if (zerop high-octet)
+ (aref sequence i)
+ 0))
+ (incf i length)))
+ (check-end format start (+ end 2) i warnp)
+ sum))
+
+(defmethod compute-number-of-chars ((format flexi-crlf-utf-16-be-format) sequence start end warnp)
+ (declare #.*fixnum-optimize-settings*)
+ (declare (fixnum start end))
+ (let ((sum 0)
+ (i start)
+ (last-octet 0))
+ (declare (fixnum i sum) (type octet last-octet))
+ (decf end 2)
+ (loop
+ (when (> i end)
+ (return))
+ (let* ((high-octet (aref sequence i))
+ (length (cond ((<= #xd8 high-octet #xdf) 4)
+ (t 2))))
+ (declare (fixnum length) (type octet high-octet))
+ (unless (and (zerop high-octet)
+ (= (the octet (aref sequence (1+ i))) +lf+)
+ (= last-octet +cr+))
+ (incf sum))
+ (setq last-octet (if (zerop high-octet)
+ (aref sequence (1+ i))
+ 0))
+ (incf i length)))
+ (check-end format start (+ end 2) i warnp)
+ sum))
+
+(defmethod compute-number-of-chars :before ((format flexi-utf-32-format) sequence start end warnp)
+ (declare #.*fixnum-optimize-settings*)
+ (declare (fixnum start end))
+ (declare (ignore sequence))
+ (let ((length (- end start)))
+ (when (and warnp (plusp (mod length 4)))
+ (signal-encoding-warning format "~A octet~:P cannot be decoded ~
+using UTF-32 as ~:*~A is not a multiple-value of four."
+ length))))
+
+(defmethod compute-number-of-chars ((format flexi-utf-32-format) sequence start end warnp)
+ (declare #.*fixnum-optimize-settings*)
+ (declare (fixnum start end))
+ (declare (ignore sequence warnp))
+ (ceiling (- end start) 4))
+
+(defmethod compute-number-of-chars ((format flexi-crlf-utf-32-le-format) sequence start end warnp)
+ (declare #.*fixnum-optimize-settings*)
+ (declare (fixnum start end))
+ (declare (ignore warnp))
+ (let ((i start)
+ (length (ceiling (- end start) 4)))
+ (decf end 8)
+ (loop
+ (when (> i end)
+ (return))
+ (cond ((loop for j of-type fixnum from i
+ for octet across #.(vector +cr+ 0 0 0 +lf+ 0 0 0)
+ always (= octet (aref sequence j)))
+ (decf length)
+ (incf i 8))
+ (t (incf i 4))))
+ length))
+
+(defmethod compute-number-of-chars ((format flexi-crlf-utf-32-be-format) sequence start end warnp)
+ (declare #.*fixnum-optimize-settings*)
+ (declare (fixnum start end))
+ (declare (ignore warnp))
+ (let ((i start)
+ (length (ceiling (- end start) 4)))
+ (decf end 8)
+ (loop
+ (when (> i end)
+ (return))
+ (cond ((loop for j of-type fixnum from i
+ for octet across #.(vector 0 0 0 +cr+ 0 0 0 +lf+)
+ always (= octet (aref sequence j)))
+ (decf length)
+ (incf i 8))
+ (t (incf i 4))))
+ length))
+
+(defgeneric compute-number-of-octets (format sequence start end)
+ (declare #.*standard-optimize-settings*)
+ (:documentation "Computes the exact number of octets required to
+encode the sequence of characters in SEQUENCE from START to END using
+the external format FORMAT."))
+
+(defmethod compute-number-of-octets ((format flexi-8-bit-format) sequence start end)
+ (declare #.*fixnum-optimize-settings*)
+ (declare (fixnum start end))
+ (declare (ignore sequence))
+ (- end start))
+
+(defmethod compute-number-of-octets ((format flexi-utf-8-format) sequence start end)
+ (declare #.*fixnum-optimize-settings*)
+ (declare (fixnum start end))
+ (let ((sum 0)
+ (i start))
+ (declare (fixnum i sum))
+ (loop
+ (when (>= i end)
+ (return))
+ (let* ((char-code (char-code (aref sequence i)))
+ (char-length (cond ((< char-code #x80) 1)
+ ((< char-code #x800) 2)
+ ((< char-code #x10000) 3)
+ (t 4))))
+ (declare (fixnum char-length) (type char-code-integer char-code))
+ (incf sum char-length)
+ (incf i)))
+ sum))
+
+(defmethod compute-number-of-octets ((format flexi-crlf-utf-8-format) sequence start end)
+ (declare #.*fixnum-optimize-settings*)
+ (declare (fixnum start end))
+ (let ((sum 0)
+ (i start))
+ (declare (fixnum i sum))
+ (loop
+ (when (>= i end)
+ (return))
+ (let* ((char-code (char-code (aref sequence i)))
+ (char-length (cond ((= char-code #.(char-code #\Newline)) 2)
+ ((< char-code #x80) 1)
+ ((< char-code #x800) 2)
+ ((< char-code #x10000) 3)
+ (t 4))))
+ (declare (fixnum char-length) (type char-code-integer char-code))
+ (incf sum char-length)
+ (incf i)))
+ sum))
+
+(defmethod compute-number-of-octets ((format flexi-utf-16-format) sequence start end)
+ (declare #.*fixnum-optimize-settings*)
+ (declare (fixnum start end))
+ (let ((sum 0)
+ (i start))
+ (declare (fixnum i sum))
+ (loop
+ (when (>= i end)
+ (return))
+ (let* ((char-code (char-code (aref sequence i)))
+ (char-length (cond ((< char-code #x10000) 2)
+ (t 4))))
+ (declare (fixnum char-length) (type char-code-integer char-code))
+ (incf sum char-length)
+ (incf i)))
+ sum))
+
+(defmethod compute-number-of-octets ((format flexi-crlf-utf-16-le-format) sequence start end)
+ (declare #.*fixnum-optimize-settings*)
+ (declare (fixnum start end))
+ (let ((sum 0)
+ (i start))
+ (declare (fixnum i sum))
+ (loop
+ (when (>= i end)
+ (return))
+ (let* ((char-code (char-code (aref sequence i)))
+ (char-length (cond ((= char-code #.(char-code #\Newline)) 4)
+ ((< char-code #x10000) 2)
+ (t 4))))
+ (declare (fixnum char-length) (type char-code-integer char-code))
+ (incf sum char-length)
+ (incf i)))
+ sum))
+
+(defmethod compute-number-of-octets ((format flexi-crlf-utf-16-be-format) sequence start end)
+ (declare #.*fixnum-optimize-settings*)
+ (declare (fixnum start end))
+ (let ((sum 0)
+ (i start))
+ (declare (fixnum i sum))
+ (loop
+ (when (>= i end)
+ (return))
+ (let* ((char-code (char-code (aref sequence i)))
+ (char-length (cond ((= char-code #.(char-code #\Newline)) 4)
+ ((< char-code #x10000) 2)
+ (t 4))))
+ (declare (fixnum char-length) (type char-code-integer char-code))
+ (incf sum char-length)
+ (incf i)))
+ sum))
+
+(defmethod compute-number-of-octets ((format flexi-utf-32-format) sequence start end)
+ (declare #.*fixnum-optimize-settings*)
+ (declare (fixnum start end))
+ (declare (ignore sequence))
+ (* 4 (- end start)))
+
+(defmethod compute-number-of-octets ((format flexi-crlf-mixin) sequence start end)
+ (declare #.*fixnum-optimize-settings*)
+ (declare (fixnum start end))
+ (+ (call-next-method)
+ (* (case (external-format-name format)
+ (:utf-32 4)
+ (otherwise 1))
+ (count #\Newline sequence :start start :end end :test #'char=))))
\ No newline at end of file
More information about the Flexi-streams-cvs
mailing list