[cxml-cvs] CVS update: cxml/runes/encodings-data.lisp cxml/runes/encodings.lisp cxml/runes/package.lisp cxml/runes/xstream.lisp

David Lichteblau dlichteblau at common-lisp.net
Sun Nov 27 17:19:15 UTC 2005


Update of /project/cxml/cvsroot/cxml/runes
In directory common-lisp.net:/tmp/cvs-serv4026/runes

Modified Files:
	encodings-data.lisp encodings.lisp package.lisp xstream.lisp 
Log Message:
encoding-fehler resignalisieren

Date: Sun Nov 27 18:19:12 2005
Author: dlichteblau

Index: cxml/runes/encodings-data.lisp
diff -u cxml/runes/encodings-data.lisp:1.1.1.1 cxml/runes/encodings-data.lisp:1.2
--- cxml/runes/encodings-data.lisp:1.1.1.1	Sun Mar 13 19:02:27 2005
+++ cxml/runes/encodings-data.lisp	Sun Nov 27 18:19:12 2005
@@ -1,4 +1,4 @@
-(in-package :encoding)
+(in-package :runes-encoding)
 
 (progn
   (add-name :us-ascii "ANSI_X3.4-1968") 


Index: cxml/runes/encodings.lisp
diff -u cxml/runes/encodings.lisp:1.1.1.1 cxml/runes/encodings.lisp:1.2
--- cxml/runes/encodings.lisp:1.1.1.1	Sun Mar 13 19:02:27 2005
+++ cxml/runes/encodings.lisp	Sun Nov 27 18:19:12 2005
@@ -1,4 +1,9 @@
-(in-package :encoding)
+(in-package :runes-encoding)
+
+(define-condition encoding-error (simple-error) ())
+
+(defun xerror (fmt &rest args)
+  (error 'encoding-error :format-control fmt :format-arguments args))
 
 ;;;; ---------------------------------------------------------------------------
 ;;;; Encoding names
@@ -115,6 +120,9 @@
       (let ((hi (aref in rptr))
             (lo (aref in (%+ 1 rptr))))
         (setf rptr (%+ 2 rptr))
+	;; FIXME: Wenn wir hier ein Surrogate sehen, muessen wir das naechste
+	;; Zeichen abwarten und nachgucken, dass nicht etwa die andere
+	;; Haelfte fehlt!
         (setf (aref out wptr) (logior (ash hi 8) lo))
         (setf wptr (%+ 1 wptr))))
     (values wptr rptr)))
@@ -132,6 +140,9 @@
       (let ((lo (aref in (%+ 0 rptr)))
             (hi (aref in (%+ 1 rptr))))
         (setf rptr (%+ 2 rptr))
+	;; FIXME: Wenn wir hier ein Surrogate sehen, muessen wir das naechste
+	;; Zeichen abwarten und nachgucken, dass nicht etwa die andere
+	;; Haelfte fehlt!
         (setf (aref out wptr) (logior (ash hi 8) lo))
         (setf wptr (%+ 1 wptr))))
     (values wptr rptr)))
@@ -147,13 +158,9 @@
         byte0)
     (macrolet ((put (x)
                  `((lambda (x)
-                     (cond ((or (<= #xD800 x #xDBFF)
-                                (<= #xDC00 x #xDFFF))
-                            (error "Encoding UTF-16 in UTF-8? : #x~x." x)))
-                     '(unless (data-char-p x)
-                       (error "#x~x is not a data character." x))
-                     ;;(fresh-line)
-                     ;;(prin1 x) (princ "-> ")
+                     (when (or (<= #xD800 x #xDBFF)
+			       (<= #xDC00 x #xDFFF))
+		       (xerror "surrogate encoded in UTF-8: #x~x." x))
                      (cond ((%> x #xFFFF)
                             (setf (aref out (%+ 0 wptr)) (%+ #xD7C0 (ash x -10))
                                   (aref out (%+ 1 wptr)) (%ior #xDC00 (%and x #x3FF)))
@@ -196,7 +203,7 @@
                (setf rptr (%+ rptr 1)))
             
               ((%<= #|#b10000000|# byte0 #b10111111)
-               (error "Corrupted UTF-8 input (initial byte was #b~8,'0B)" byte0)
+               (xerror "Corrupted UTF-8 input (initial byte was #b~8,'0B)" byte0)
                (setf rptr (%+ rptr 1)))
             
               ((%<= #|#b11000000|# byte0 #b11011111)
@@ -260,7 +267,7 @@
                       (return))))
             
               (t
-               (error "Corrupted UTF-8 input (initial byte was #b~8,'0B)" byte0)) ) )) 
+               (xerror "Corrupted UTF-8 input (initial byte was #b~8,'0B)" byte0)) ) )) 
     (values wptr rptr))  )
 
 (defmethod encoding-p ((object (eql :utf-16-little-endian))) t)
@@ -343,5 +350,4 @@
 
 (defun find-charset (name)
   (or (gethash name *charsets*)
-      (error "There is no character set named ~S." name)))
-
+      (xerror "There is no character set named ~S." name)))


Index: cxml/runes/package.lisp
diff -u cxml/runes/package.lisp:1.2 cxml/runes/package.lisp:1.3
--- cxml/runes/package.lisp:1.2	Fri Mar 25 19:16:56 2005
+++ cxml/runes/package.lisp	Sun Nov 27 18:19:12 2005
@@ -61,8 +61,9 @@
            #:set-to-full-speed
            #:xstream-name))
 
-(defpackage :encoding
+(defpackage :runes-encoding
   (:use :cl :runes)
   (:export
+   #:encoding-error
    #:find-encoding
    #:decode-sequence))


Index: cxml/runes/xstream.lisp
diff -u cxml/runes/xstream.lisp:1.2 cxml/runes/xstream.lisp:1.3
--- cxml/runes/xstream.lisp:1.2	Fri Mar 25 19:16:56 2005
+++ cxml/runes/xstream.lisp	Sun Nov 27 18:19:12 2005
@@ -1,4 +1,4 @@
-;;; -*- Mode: Lisp; Syntax: Common-Lisp; Package: runes; readtable: runes; Encoding: utf-8; -*-
+;;; -*- Mode: Lisp; Syntax: Common-Lisp; readtable: runes; Encoding: utf-8; -*-
 ;;; ---------------------------------------------------------------------------
 ;;;     Title: Fast streams
 ;;;   Created: 1999-07-17
@@ -66,9 +66,7 @@
 ;;
 
 (eval-when (:compile-toplevel :load-toplevel :execute)
-  (defparameter *fast* '(optimize (speed 3) (safety 0)))
-  ;;(defparameter *fast* '(optimize (speed 2) (safety 3)))
-  )
+  (defparameter *fast* '(optimize (speed 3) (safety 0))))
 
 ;; Let us first define fast fixnum arithmetric get rid of type
 ;; checks. (After all we know what we do here).
@@ -277,10 +275,14 @@
                :end2 (xstream-os-left-end input))
       ;; then we take care that the buffer is large enough to carry at
       ;; least 100 bytes (a random number)
+      ;;
+      ;; david: was heisst da random?  ich nehme an, dass 100 einfach
+      ;; ausreichend sein soll, um die laengste utf-8 bytesequenz oder die
+      ;; beiden utf-16 surrogates zu halten?  dann ist 100 ja wohl dicke
+      ;; ausreichend und koennte in make-xstream ordentlich geprueft werden.
+      ;; oder was geht hier vor?
       (unless (>= (length (xstream-os-buffer input)) 100)
-        (error "You lost")
-        ;; todo: enlarge buffer
-        ))
+        (error "You lost")))
     (setf n
       (read-octets (xstream-os-buffer input) (xstream-os-stream input)
                    m (min (1- (length (xstream-os-buffer input)))
@@ -292,7 +294,7 @@
            :eof)
           (t
            (multiple-value-bind (fnw fnr) 
-               (encoding:decode-sequence
+               (runes-encoding:decode-sequence
                 (xstream-encoding input) 
                 (xstream-os-buffer input) 0 n
                 (xstream-buffer input) 0 (1- (length (xstream-buffer input)))




More information about the Cxml-cvs mailing list