Added remaining xlib patch required for gb18030 support (#1573).

This commit is contained in:
Stefan Dirsch 2008-11-22 19:40:54 +01:00
parent 55782a0a1f
commit 67e34d7a82
7 changed files with 13137 additions and 5 deletions

View file

@ -51,13 +51,21 @@ _XlcUtf8Loader(
return lcd;
/* The official IANA name for UTF-8 is "UTF-8" in upper case with a dash. */
if (!XLC_PUBLIC_PART(lcd)->codeset ||
(_XlcCompareISOLatin1(XLC_PUBLIC_PART(lcd)->codeset, "UTF-8"))) {
if (!XLC_PUBLIC_PART(lcd)->codeset) {
_XlcDestroyLC(lcd);
return (XLCd) NULL;
}
else if (!_XlcCompareISOLatin1(XLC_PUBLIC_PART(lcd)->codeset, "UTF-8")) {
_XlcAddUtf8LocaleConverters(lcd);
}
else if (!_XlcCompareISOLatin1(XLC_PUBLIC_PART(lcd)->codeset, "GB18030")) {
_XlcAddGB18030LocaleConverters(lcd);
}
else {
_XlcDestroyLC(lcd);
return (XLCd) NULL;
}
_XlcAddUtf8LocaleConverters(lcd);
_XlcAddUtf8Converters(lcd);
return lcd;

View file

@ -912,6 +912,11 @@ extern void _XlcAddUtf8LocaleConverters(
XLCd lcd
);
/* Registers GB18030 converters for a GB18030 locale. */
extern void _XlcAddGB18030LocaleConverters(
XLCd lcd
);
/* The default locale loader. Assumes an ASCII encoding. */
extern XLCd _XlcDefaultLoader(
const char* name

View file

@ -125,6 +125,8 @@ static const CTDataRec default_ct_data[] =
#endif
/* For use by utf8 -> ctext */
{ "BIG5-0:GLGR", "\033%/2"},
{ "BIG5HKSCS-0:GLGR", "\033%/2"},
{ "GBK-0:GLGR", "\033%/2"},
/* used by Emacs, but not backed by ISO-IR */
{ "BIG5-E0:GL", "\033$(0" },
{ "BIG5-E0:GR", "\033$)0" },

View file

@ -212,6 +212,8 @@ typedef struct {
#include "lcUniConv/ksc5601.h"
#include "lcUniConv/big5.h"
#include "lcUniConv/big5_emacs.h"
#include "lcUniConv/big5hkscs.h"
#include "lcUniConv/gbk.h"
static Utf8ConvRec all_charsets[] = {
/* The ISO10646-1/UTF-8 entry occurs twice, once at the beginning
@ -332,14 +334,20 @@ static Utf8ConvRec all_charsets[] = {
cp1256_mbtowc, cp1256_wctomb
},
{ "BIG5-0", NULLQUARK,
big5_mbtowc, big5_wctomb
},
big5_mbtowc, big5_wctomb
},
{ "BIG5-E0", NULLQUARK,
big5_0_mbtowc, big5_0_wctomb
},
{ "BIG5-E1", NULLQUARK,
big5_1_mbtowc, big5_1_wctomb
},
{ "GBK-0", NULLQUARK,
gbk_mbtowc, gbk_wctomb
},
{ "BIG5HKSCS-0", NULLQUARK,
big5hkscs_mbtowc, big5hkscs_wctomb
},
/* The ISO10646-1/UTF-8 entry occurs twice, once at the beginning
(for lookup speed), once at the end (as a fallback). */
@ -1807,6 +1815,585 @@ open_utf8tofcs(
return create_tofontcs_conv(from_lcd, &methods_utf8tocs);
}
/* ========================== iconv Stuff ================================ */
/* from XlcNCharSet to XlcNMultiByte */
static int
iconv_cstombs(conv, from, from_left, to, to_left, args, num_args)
XlcConv conv;
XPointer *from;
int *from_left;
XPointer *to;
int *to_left;
XPointer *args;
int num_args;
{
XlcCharSet charset;
char *name;
Utf8Conv convptr;
int i;
unsigned char const *src;
unsigned char const *srcend;
unsigned char *dst;
unsigned char *dstend;
int unconv_num;
if (from == NULL || *from == NULL)
return 0;
if (num_args < 1)
return -1;
charset = (XlcCharSet) args[0];
name = charset->encoding_name;
/* not charset->name because the latter has a ":GL"/":GR" suffix */
for (convptr = all_charsets, i = all_charsets_count-1; i > 0; convptr++, i--)
if (!strcmp(convptr->name, name))
break;
if (i == 0)
return -1;
src = (unsigned char const *) *from;
srcend = src + *from_left;
dst = (unsigned char *) *to;
dstend = dst + *to_left;
unconv_num = 0;
while (src < srcend) {
ucs4_t wc;
int consumed;
int count;
consumed = convptr->cstowc(conv, &wc, src, srcend-src);
if (consumed == RET_ILSEQ)
return -1;
if (consumed == RET_TOOFEW(0))
break;
/* Use stdc iconv to convert widechar -> multibyte */
count = wctomb(dst, wc);
if (count == 0)
break;
if (count == -1) {
count = wctomb(dst, BAD_WCHAR);
if (count == 0)
break;
unconv_num++;
}
src += consumed;
dst += count;
}
*from = (XPointer) src;
*from_left = srcend - src;
*to = (XPointer) dst;
*to_left = dstend - dst;
return unconv_num;
}
static XlcConvMethodsRec iconv_cstombs_methods = {
close_converter,
iconv_cstombs,
NULL
};
static XlcConv
open_iconv_cstombs(from_lcd, from_type, to_lcd, to_type)
XLCd from_lcd;
char *from_type;
XLCd to_lcd;
char *to_type;
{
lazy_init_all_charsets();
return create_conv(from_lcd, &iconv_cstombs_methods);
}
static int
iconv_mbstocs(conv, from, from_left, to, to_left, args, num_args)
XlcConv conv;
XPointer *from;
int *from_left;
XPointer *to;
int *to_left;
XPointer *args;
int num_args;
{
Utf8Conv *preferred_charsets;
XlcCharSet last_charset = NULL;
unsigned char const *src;
unsigned char const *srcend;
unsigned char *dst;
unsigned char *dstend;
int unconv_num;
if (from == NULL || *from == NULL)
return 0;
preferred_charsets = (Utf8Conv *) conv->state;
src = (unsigned char const *) *from;
srcend = src + *from_left;
dst = (unsigned char *) *to;
dstend = dst + *to_left;
unconv_num = 0;
while (src < srcend && dst < dstend) {
Utf8Conv chosen_charset = NULL;
XlcSide chosen_side = XlcNONE;
wchar_t wc;
int consumed;
int count;
/* Uses stdc iconv to convert multibyte -> widechar */
consumed = mbtowc(&wc, src, srcend-src);
if (consumed == 0)
break;
if (consumed == -1) {
src++;
unconv_num++;
continue;
}
count = charset_wctocs(preferred_charsets, &chosen_charset, &chosen_side, conv, dst, wc, dstend-dst);
if (count == RET_TOOSMALL)
break;
if (count == RET_ILSEQ) {
src += consumed;
unconv_num++;
continue;
}
if (last_charset == NULL) {
last_charset =
_XlcGetCharSetWithSide(chosen_charset->name, chosen_side);
if (last_charset == NULL) {
src += consumed;
unconv_num++;
continue;
}
} else {
if (!(last_charset->xrm_encoding_name == chosen_charset->xrm_name
&& (last_charset->side == XlcGLGR
|| last_charset->side == chosen_side)))
break;
}
src += consumed;
dst += count;
}
if (last_charset == NULL)
return -1;
*from = (XPointer) src;
*from_left = srcend - src;
*to = (XPointer) dst;
*to_left = dstend - dst;
if (num_args >= 1)
*((XlcCharSet *)args[0]) = last_charset;
return unconv_num;
}
static XlcConvMethodsRec iconv_mbstocs_methods = {
close_tocs_converter,
iconv_mbstocs,
NULL
};
static XlcConv
open_iconv_mbstocs(from_lcd, from_type, to_lcd, to_type)
XLCd from_lcd;
char *from_type;
XLCd to_lcd;
char *to_type;
{
return create_tocs_conv(from_lcd, &iconv_mbstocs_methods);
}
/* from XlcNMultiByte to XlcNChar */
static int
iconv_mbtocs(conv, from, from_left, to, to_left, args, num_args)
XlcConv conv;
XPointer *from;
int *from_left;
XPointer *to;
int *to_left;
XPointer *args;
int num_args;
{
Utf8Conv *preferred_charsets;
XlcCharSet last_charset = NULL;
unsigned char const *src;
unsigned char const *srcend;
unsigned char *dst;
unsigned char *dstend;
int unconv_num;
if (from == NULL || *from == NULL)
return 0;
preferred_charsets = (Utf8Conv *) conv->state;
src = (unsigned char const *) *from;
srcend = src + *from_left;
dst = (unsigned char *) *to;
dstend = dst + *to_left;
unconv_num = 0;
while (src < srcend && dst < dstend) {
Utf8Conv chosen_charset = NULL;
XlcSide chosen_side = XlcNONE;
wchar_t wc;
int consumed;
int count;
/* Uses stdc iconv to convert multibyte -> widechar */
consumed = mbtowc(&wc, src, srcend-src);
if (consumed == 0)
break;
if (consumed == -1) {
src++;
unconv_num++;
continue;
}
count = charset_wctocs(preferred_charsets, &chosen_charset, &chosen_side, conv, dst, wc, dstend-dst);
if (count == RET_TOOSMALL)
break;
if (count == RET_ILSEQ) {
src += consumed;
unconv_num++;
continue;
}
if (last_charset == NULL) {
last_charset =
_XlcGetCharSetWithSide(chosen_charset->name, chosen_side);
if (last_charset == NULL) {
src += consumed;
unconv_num++;
continue;
}
} else {
if (!(last_charset->xrm_encoding_name == chosen_charset->xrm_name
&& (last_charset->side == XlcGLGR
|| last_charset->side == chosen_side)))
break;
}
src += consumed;
dst += count;
}
if (last_charset == NULL)
return -1;
*from = (XPointer) src;
*from_left = srcend - src;
*to = (XPointer) dst;
*to_left = dstend - dst;
if (num_args >= 1)
*((XlcCharSet *)args[0]) = last_charset;
return unconv_num;
}
static XlcConvMethodsRec iconv_mbtocs_methods = {
close_tocs_converter,
iconv_mbtocs,
NULL
};
static XlcConv
open_iconv_mbtocs (from_lcd, from_type, to_lcd, to_type)
XLCd from_lcd;
char *from_type;
XLCd to_lcd;
char *to_type;
{
return create_tocs_conv(from_lcd, &iconv_mbtocs_methods );
}
/* from XlcNMultiByte to XlcNString */
static int
iconv_mbstostr(conv, from, from_left, to, to_left, args, num_args)
XlcConv conv;
XPointer *from;
int *from_left;
XPointer *to;
int *to_left;
XPointer *args;
int num_args;
{
unsigned char const *src;
unsigned char const *srcend;
unsigned char *dst;
unsigned char *dstend;
int unconv_num;
if (from == NULL || *from == NULL)
return 0;
src = (unsigned char const *) *from;
srcend = src + *from_left;
dst = (unsigned char *) *to;
dstend = dst + *to_left;
unconv_num = 0;
while (src < srcend) {
unsigned char c;
wchar_t wc;
int consumed;
/* Uses stdc iconv to convert multibyte -> widechar */
consumed = mbtowc(&wc, src, srcend-src);
if (consumed == 0)
break;
if (dst == dstend)
break;
if (consumed == -1) {
consumed = 1;
c = BAD_CHAR;
unconv_num++;
} else {
if ((wc & ~(wchar_t)0xff) != 0) {
c = BAD_CHAR;
unconv_num++;
} else
c = (unsigned char) wc;
}
*dst++ = c;
src += consumed;
}
*from = (XPointer) src;
*from_left = srcend - src;
*to = (XPointer) dst;
*to_left = dstend - dst;
return unconv_num;
}
static XlcConvMethodsRec iconv_mbstostr_methods = {
close_converter,
iconv_mbstostr,
NULL
};
static XlcConv
open_iconv_mbstostr(from_lcd, from_type, to_lcd, to_type)
XLCd from_lcd;
char *from_type;
XLCd to_lcd;
char *to_type;
{
return create_conv(from_lcd, &iconv_mbstostr_methods);
}
/* from XlcNString to XlcNMultiByte */
static int
iconv_strtombs(conv, from, from_left, to, to_left, args, num_args)
XlcConv conv;
XPointer *from;
int *from_left;
XPointer *to;
int *to_left;
XPointer *args;
int num_args;
{
unsigned char const *src;
unsigned char const *srcend;
unsigned char *dst;
unsigned char *dstend;
if (from == NULL || *from == NULL)
return 0;
src = (unsigned char const *) *from;
srcend = src + *from_left;
dst = (unsigned char *) *to;
dstend = dst + *to_left;
while (src < srcend) {
int count = wctomb(dst, *src);
if (count < 0)
break;
dst += count;
src++;
}
*from = (XPointer) src;
*from_left = srcend - src;
*to = (XPointer) dst;
*to_left = dstend - dst;
return 0;
}
static XlcConvMethodsRec iconv_strtombs_methods= {
close_converter,
iconv_strtombs,
NULL
};
static XlcConv
open_iconv_strtombs(from_lcd, from_type, to_lcd, to_type)
XLCd from_lcd;
char *from_type;
XLCd to_lcd;
char *to_type;
{
return create_conv(from_lcd, &iconv_strtombs_methods);
}
/***************************************************************************/
/* Part II: An iconv locale loader.
*
*Here we can assume that "multi-byte" is iconv and that `wchar_t' is Unicode.
*/
/* from XlcNMultiByte to XlcNWideChar */
static int
iconv_mbstowcs(conv, from, from_left, to, to_left, args, num_args)
XlcConv conv;
XPointer *from;
int *from_left;
XPointer *to;
int *to_left;
XPointer *args;
int num_args;
{
char *src = *((char **) from);
wchar_t *dst = *((wchar_t **) to);
int src_left = *from_left;
int dst_left = *to_left;
int length, unconv_num = 0;
while (src_left > 0 && dst_left > 0) {
length = mbtowc(dst, src, src_left);
if (length > 0) {
src += length;
src_left -= length;
if (dst)
dst++;
dst_left--;
} else if (length < 0) {
src++;
src_left--;
unconv_num++;
} else {
/* null ? */
src++;
src_left--;
if (dst)
*dst++ = L'\0';
dst_left--;
}
}
*from = (XPointer) src;
if (dst)
*to = (XPointer) dst;
*from_left = src_left;
*to_left = dst_left;
return unconv_num;
}
static XlcConvMethodsRec iconv_mbstowcs_methods = {
close_converter,
iconv_mbstowcs,
NULL
} ;
static XlcConv
open_iconv_mbstowcs(from_lcd, from_type, to_lcd, to_type)
XLCd from_lcd;
char *from_type;
XLCd to_lcd;
char *to_type;
{
return create_conv(from_lcd, &iconv_mbstowcs_methods);
}
static int
iconv_wcstombs(conv, from, from_left, to, to_left, args, num_args)
XlcConv conv;
XPointer *from;
int *from_left;
XPointer *to;
int *to_left;
XPointer *args;
int num_args;
{
wchar_t *src = *((wchar_t **) from);
char *dst = *((char **) to);
int src_left = *from_left;
int dst_left = *to_left;
int length, unconv_num = 0;
while (src_left > 0 && dst_left >= MB_CUR_MAX) {
length = wctomb(dst, *src); /* XXX */
if (length > 0) {
src++;
src_left--;
if (dst)
dst += length;
dst_left -= length;
} else if (length < 0) {
src++;
src_left--;
unconv_num++;
}
}
*from = (XPointer) src;
if (dst)
*to = (XPointer) dst;
*from_left = src_left;
*to_left = dst_left;
return unconv_num;
}
static XlcConvMethodsRec iconv_wcstombs_methods = {
close_converter,
iconv_wcstombs,
NULL
} ;
static XlcConv
open_iconv_wcstombs(from_lcd, from_type, to_lcd, to_type)
XLCd from_lcd;
char *from_type;
XLCd to_lcd;
char *to_type;
{
return create_conv(from_lcd, &iconv_wcstombs_methods);
}
static XlcConv
open_iconv_mbstofcs(
XLCd from_lcd,
const char *from_type,
XLCd to_lcd,
const char *to_type)
{
return create_tofontcs_conv(from_lcd, &iconv_mbstocs_methods);
}
/* Registers UTF-8 converters for a UTF-8 locale. */
void
@ -1842,3 +2429,34 @@ _XlcAddUtf8LocaleConverters(
_XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNFontCharSet, open_utf8tofcs);
_XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNFontCharSet, open_wcstofcs);
}
void
_XlcAddGB18030LocaleConverters(
XLCd lcd)
{
/* Register elementary converters. */
_XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNWideChar, open_iconv_mbstowcs);
_XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNMultiByte, open_iconv_wcstombs);
/* Register converters for XlcNCharSet. This implicitly provides
* converters from and to XlcNCompoundText. */
_XlcSetConverter(lcd, XlcNCharSet, lcd, XlcNMultiByte, open_iconv_cstombs);
_XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNCharSet, open_iconv_mbstocs);
_XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNChar, open_iconv_mbtocs);
_XlcSetConverter(lcd, XlcNString, lcd, XlcNMultiByte, open_iconv_strtombs);
_XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNString, open_iconv_mbstostr);
/* Register converters for XlcNFontCharSet */
_XlcSetConverter(lcd, XlcNMultiByte, lcd, XlcNFontCharSet, open_iconv_mbstofcs);
_XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNString, open_wcstostr);
_XlcSetConverter(lcd, XlcNString, lcd, XlcNWideChar, open_strtowcs);
_XlcSetConverter(lcd, XlcNCharSet, lcd, XlcNWideChar, open_cstowcs);
_XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNCharSet, open_wcstocs);
_XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNChar, open_wcstocs1);
/* Register converters for XlcNFontCharSet */
_XlcSetConverter(lcd, XlcNWideChar, lcd, XlcNFontCharSet, open_wcstofcs);
}

File diff suppressed because it is too large Load diff

View file

@ -18,6 +18,8 @@
* ./cjk_tab_to_h BIG5 big5 > big5.h < BIG5.TXT
*
* ./cjk_tab_to_h JOHAB johab > johab.h < JOHAB.TXT
*
* ./cjk_tab_to_h BIG5HKSCS-0 big5hkscs >big5hkscs.h < BIG5HKSCS.TXT
*/
#include <stdio.h>
@ -892,6 +894,49 @@ static void do_big5 (const char* name)
invert(&enc); output_uni2charset_sparse(name,&enc);
}
/* Big5-HKSCS specifics */
static int row_byte_big5hkscs (int row) {
return 0x81+row;
}
static int col_byte_big5hkscs (int col) {
return (col >= 0x3f ? 0x62 : 0x40) + col;
}
static int byte_row_big5hkscs (int byte) {
if (byte >= 0x81 && byte < 0xff)
return byte-0x81;
else
return -1;
}
static int byte_col_big5hkscs (int byte) {
if (byte >= 0x40 && byte < 0x7f)
return byte-0x40;
else if (byte >= 0xa1 && byte < 0xff)
return byte-0x62;
else
return -1;
}
static void do_big5hkscs (const char* name)
{
Encoding enc;
enc.rows = 126;
enc.cols = 157;
enc.row_byte = row_byte_big5hkscs;
enc.col_byte = col_byte_big5hkscs;
enc.byte_row = byte_row_big5hkscs;
enc.byte_col = byte_col_big5hkscs;
enc.check_row_expr = "%1$s >= 0x81 && %1$s < 0xff";
enc.check_col_expr = "(%1$s >= 0x40 && %1$s < 0x7f) || (%1$s >= 0xa1 && %1$s < 0xff)";
enc.byte_row_expr = "%1$s - 0x81";
enc.byte_col_expr = "%1$s - (%1$s >= 0xa1 ? 0x62 : 0x40)";
read_table(&enc);
output_charset2uni(name,&enc);
invert(&enc); output_uni2charset_sparse(name,&enc);
}
/* Johab Hangul specifics */
static int row_byte_johab_hangul (int row) {
@ -1014,6 +1059,8 @@ int main (int argc, char *argv[])
do_ksc5601(name);
else if (!strcmp(name,"big5") || !strcmp(name,"cp950ext"))
do_big5(name);
else if (!strcmp(name,"big5hkscs"))
do_big5hkscs(name);
else if (!strcmp(name,"johab_hangul"))
do_johab_hangul(name);
else if (!strcmp(name,"cp932ext"))

6200
src/xlibi18n/lcUniConv/gbk.h Normal file

File diff suppressed because it is too large Load diff