[elephant-devel] UTF seriazer/desiriali patch
Hiroyuki Komatsu
kom at narihara-lab.jp
Mon Aug 3 20:14:28 UTC 2009
Sorry, I'm not familiar to English.
BDB btree stores utf16/utf32 string into illegal sort order.
There is 2 problems in string serializer:
UTF serializers serialize into big endian
UTF32 compator in libberkeley-db.c does not work correctly
attached patch fix these problems.
-------------- next part --------------
diff -rN -u old-elephant/src/db-bdb/libberkeley-db.c new-elephant/src/db-bdb/libberkeley-db.c
--- old-elephant/src/db-bdb/libberkeley-db.c 2009-08-04 04:34:01.000000000 +0900
+++ new-elephant/src/db-bdb/libberkeley-db.c 2009-08-04 04:34:01.000000000 +0900
@@ -1122,7 +1122,7 @@
/*****
printf("Doing a 32-bit compare\n");
*****/
- return wcs_cmp((wchar_t*)ad+5+offset, read_int32(ad+offset, 1), (wchar_t*)bd+5+offset, read_int32(bd+offset, 1));
+ return wcs_cmp((wchar_t*)(ad+5+offset), read_int32(ad+offset, 1), (wchar_t*)(bd+5+offset), read_int32(bd+offset, 1));
default:
/*****
printf("Doing a lex compare\n");
@@ -1313,7 +1313,7 @@
int min, sizediff, diff;
sizediff = length1 - length2;
min = sizediff > 0 ? length2 : length1;
- diff = wcsncmp(a, b, min /4);
+ diff = wcsncmp(a, b, min);
if (diff == 0) return sizediff;
return diff;
}
diff -rN -u old-elephant/src/elephant/unicode.lisp new-elephant/src/elephant/unicode.lisp
--- old-elephant/src/elephant/unicode.lisp 2009-08-04 04:34:01.000000000 +0900
+++ new-elephant/src/elephant/unicode.lisp 2009-08-04 04:34:01.000000000 +0900
@@ -145,10 +145,10 @@
(loop for i fixnum from 0 below characters do
(let ((code (char-code (funcall char string i))))
(when (> code #xFFFF) (fail))
- (setf (uffi:deref-array buffer '(:array :unsigned-char) (+ (* i 2) size))
+ (setf (uffi:deref-array buffer '(:array :unsigned-char) (+ (* i 2) size 1))
;; (coerce (ldb (byte 8 8) code) '(signed 8)))
(ldb (byte 8 8) code))
- (setf (uffi:deref-array buffer '(:array :unsigned-char) (+ (* i 2) size 1))
+ (setf (uffi:deref-array buffer '(:array :unsigned-char) (+ (* i 2) size 0))
;; (coerce (ldb (byte 8 0) code) '(signed 8))))))
(ldb (byte 8 0) code))))
(incf size (* characters 2))
@@ -174,13 +174,13 @@
(loop for i fixnum from 0 below characters do
(let ((code (char-code (funcall char string i))))
(when (> code #x10FFFF) (error "Invalid unicode code type"))
- (setf (uffi:deref-array buffer '(:array :unsigned-char) (+ (* i 4) size 0))
+ (setf (uffi:deref-array buffer '(:array :unsigned-char) (+ (* i 4) size 3))
(ldb (byte 8 24) code))
- (setf (uffi:deref-array buffer '(:array :unsigned-char) (+ (* i 4) size 1))
- (ldb (byte 8 16) code))
(setf (uffi:deref-array buffer '(:array :unsigned-char) (+ (* i 4) size 2))
+ (ldb (byte 8 16) code))
+ (setf (uffi:deref-array buffer '(:array :unsigned-char) (+ (* i 4) size 1))
(ldb (byte 8 8) code))
- (setf (uffi:deref-array buffer '(:array :unsigned-char) (+ (* i 4) size 3))
+ (setf (uffi:deref-array buffer '(:array :unsigned-char) (+ (* i 4) size 0))
(ldb (byte 8 0) code)))))
(incf size (* characters 4))
t)))
@@ -274,8 +274,8 @@
(assert (subtypep (type-of string) 'simple-string))
(assert (compatible-unicode-support-p :utf16le))
(loop for i fixnum from 0 below length do
- (setf code (dpb (next-byte 0) (byte 8 8) 0))
- (setf code (dpb (next-byte 1) (byte 8 0) code))
+ (setf code (dpb (next-byte 1) (byte 8 8) 0))
+ (setf code (dpb (next-byte 0) (byte 8 0) code))
(setf (schar string i) (code-char code)))
(incf (elephant-memutil::buffer-stream-position bstream)
(* length 2)))
@@ -294,10 +294,10 @@
(assert (subtypep (type-of string) 'simple-string))
(assert (compatible-unicode-support-p :utf32le))
(loop for i fixnum from 0 below length do
- (setf code (dpb (next-byte 0) (byte 8 24) 0))
- (setf code (dpb (next-byte 1) (byte 8 16) code))
- (setf code (dpb (next-byte 2) (byte 8 8) code))
- (setf code (dpb (next-byte 3) (byte 8 0) code))
+ (setf code (dpb (next-byte 3) (byte 8 24) 0))
+ (setf code (dpb (next-byte 2) (byte 8 16) code))
+ (setf code (dpb (next-byte 1) (byte 8 8) code))
+ (setf code (dpb (next-byte 0) (byte 8 0) code))
(setf (char string i) (code-char code)))
(incf (elephant-memutil::buffer-stream-position bstream)
(* length 4))
More information about the elephant-devel
mailing list