[cffi-devel] A question about C and Unicode
nitralime
nitralime at googlemail.com
Wed Mar 23 17:53:43 UTC 2011
Hi folks!
I am playing around with a C library and came across
some issues related to unicode in C. In the unicode enabled
version of this C library which implements UCS-2 (i.e. just BMP)
the "unicode character type" is defined in the header file
as follows:
======================================
/*
* ZAB_CHAR
* ZAB_UC
*/
#ifdef ZABonAIX
#if defined(_AIX51) && defined(ZABwith64_BIT)
#define ZABonAIX_wchar_is_4B
#else
#define ZABonAIX_wchar_is_2B
#endif
#endif
#ifdef ZABonAIX
#if defined(_AIX51) && defined(ZABwith64_BIT)
#define ZABonAIX_wchar_is_4B
#elif defined(ZABccQ)
#define ZABonAIX_wchar_is_4B
#else
#define ZABonAIX_wchar_is_2B
#endif
#endif
#if defined(ZABonNT) || \
defined(ZABonOS400) || \
(defined(ZABonOS390) && !defined(_LP64)) || \
defined(ZABonAIX) && defined(ZABonAIX_wchar_is_2B)
#define WCHAR_is_2B
#else
#define WCHAR_is_4B
#endif
#if defined(ZABonLIN) && defined(GCC_UTF16_PATCH)
#if __GNUC_PREREQ (4,3)
#include <uchar.h>
#define ZAB_UC_is_char16
#endif
#endif
#ifndef ZABwithUNICODE
#define ZAB_UC_is_1B
typedef char ZAB_CHAR;
typedef char ZAB_UC;
#else /* ZABwithUNICODE */
#if defined(WCHAR_is_2B)
#define ZAB_UC_is_wchar
typedef wchar_t ZAB_CHAR;
typedef wchar_t ZAB_UC;
#elif defined(ZAB_UC_is_char16)
typedef char16_t ZAB_CHAR;
typedef char16_t ZAB_UC;
#else
#define ZAB_UC_is_UTF16_without_wchar
typedef unsigned short ZAB_CHAR;
typedef unsigned short ZAB_UC;
#endif
#endif /* ZABwithUNICODE or not */
/*
* CFRSDKwith(out)UTF16_LITERALS
* for CFR SDK applications: controls use of UTF-16
* literal enabled compilers.
*/
#if defined(CFRSDKwithUTF16_LITERALS)
#elif defined(CFRSDKwithoutUTF16_LITERALS)
#define ZABwithoutUTF16_LITERALS
#elif defined(WCHAR_is_2B) || \
defined(ZABonHP_UX) || \
(defined(ZABonLIN) && defined(__i386__) && defined(__GNUC__) &&
(__GNUC__<3)) || \
(defined(ZABonLIN) && defined(GCC_UTF16_PATCH)) || \
defined(ZABonSUN) || defined(ZABonAIX)
/* we have literals for UTF-16 */
#else
#define ZABwithoutUTF16_LITERALS
#endif
======================================
All this boils down to
+---------------------------+
+------>-| typedef wchar_t ZAB_CHAR; |
| | typedef wchar_t ZAB_UC; |
| +---------------------------+
|
+---------+ | +----------------------------+
| Unicode |------+------>-| typedef char16_t ZAB_CHAR; |
+---------+ | | typedef char16_t ZAB_UC; |
| +----------------------------+
|
| +----------------------------------+
+------>-| typedef unsigned short ZAB_CHAR; |
| typedef unsigned short ZAB_UC; |
+----------------------------------+
The question is now: Is it correct (resp. safe)
to /*defctype*/ ZAB_UC just as :uint16 and interpret
it as a UFT-16 code point (with an appropriate endiannness)?
How can the types like wchar_t and char_16_t be defined (resp. used) in
CFFI?
Regards
Nik
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://mailman.common-lisp.net/pipermail/cffi-devel/attachments/20110323/40596fca/attachment.html>
More information about the cffi-devel
mailing list