Hi folks!
I am playing around with a C library and came across some issues related to unicode in C. In the unicode enabled version of this C library which implements UCS-2 (i.e. just BMP) the "unicode character type" is defined in the header file as follows: ====================================== /* * ZAB_CHAR * ZAB_UC */
#ifdef ZABonAIX #if defined(_AIX51) && defined(ZABwith64_BIT) #define ZABonAIX_wchar_is_4B #else #define ZABonAIX_wchar_is_2B #endif #endif
#ifdef ZABonAIX #if defined(_AIX51) && defined(ZABwith64_BIT) #define ZABonAIX_wchar_is_4B #elif defined(ZABccQ) #define ZABonAIX_wchar_is_4B #else #define ZABonAIX_wchar_is_2B #endif #endif
#if defined(ZABonNT) || \ defined(ZABonOS400) || \ (defined(ZABonOS390) && !defined(_LP64)) || \ defined(ZABonAIX) && defined(ZABonAIX_wchar_is_2B) #define WCHAR_is_2B #else #define WCHAR_is_4B #endif
#if defined(ZABonLIN) && defined(GCC_UTF16_PATCH) #if __GNUC_PREREQ (4,3) #include <uchar.h> #define ZAB_UC_is_char16 #endif #endif
#ifndef ZABwithUNICODE #define ZAB_UC_is_1B typedef char ZAB_CHAR; typedef char ZAB_UC; #else /* ZABwithUNICODE */ #if defined(WCHAR_is_2B) #define ZAB_UC_is_wchar typedef wchar_t ZAB_CHAR; typedef wchar_t ZAB_UC; #elif defined(ZAB_UC_is_char16) typedef char16_t ZAB_CHAR; typedef char16_t ZAB_UC; #else #define ZAB_UC_is_UTF16_without_wchar typedef unsigned short ZAB_CHAR; typedef unsigned short ZAB_UC; #endif #endif /* ZABwithUNICODE or not */
/* * CFRSDKwith(out)UTF16_LITERALS * for CFR SDK applications: controls use of UTF-16 * literal enabled compilers. */ #if defined(CFRSDKwithUTF16_LITERALS) #elif defined(CFRSDKwithoutUTF16_LITERALS) #define ZABwithoutUTF16_LITERALS #elif defined(WCHAR_is_2B) || \ defined(ZABonHP_UX) || \ (defined(ZABonLIN) && defined(__i386__) && defined(__GNUC__) && (__GNUC__<3)) || \ (defined(ZABonLIN) && defined(GCC_UTF16_PATCH)) || \ defined(ZABonSUN) || defined(ZABonAIX) /* we have literals for UTF-16 */ #else #define ZABwithoutUTF16_LITERALS #endif ======================================
All this boils down to
+---------------------------+
+------>-| typedef wchar_t ZAB_CHAR; |
| | typedef wchar_t ZAB_UC; |
| +---------------------------+
|
+---------+ | +----------------------------+
| Unicode |------+------>-| typedef char16_t ZAB_CHAR; |
+---------+ | | typedef char16_t ZAB_UC; |
| +----------------------------+
|
| +----------------------------------+
+------>-| typedef unsigned short ZAB_CHAR; |
| typedef unsigned short ZAB_UC; |
+----------------------------------+
The question is now: Is it correct (resp. safe) to /*defctype*/ ZAB_UC just as :uint16 and interpret it as a UFT-16 code point (with an appropriate endiannness)?
How can the types like wchar_t and char_16_t be defined (resp. used) in CFFI?
Regards Nik
On Wed, Mar 23, 2011 at 5:53 PM, nitralime nitralime@googlemail.com wrote:
+------>-| typedef wchar_t ZAB_CHAR; | | | typedef wchar_t ZAB_UC; |
| Unicode |------+------>-| typedef char16_t ZAB_CHAR; | +---------+ | | typedef char16_t ZAB_UC; |
+------>-| typedef unsigned short ZAB_CHAR; | | typedef unsigned short ZAB_UC; |
The question is now: Is it correct (resp. safe) to defctype ZAB_UC just as :uint16 and interpret it as a UFT-16 code point (with an appropriate endiannness)?
Maybe. Which typedef applies? This might be a job for cffi-grovel.
How can the types like wchar_t and char_16_t be defined (resp. used) in CFFI?
I'm not sure. You'll have to figure out whether those types are equivalent to (un)signed short on your platform. Again, cffi-grovel might help.