123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202 |
- #ifndef _XS_UNICODE_H
- #define _XS_UNICODE_H
- xs_str *xs_utf8_enc(xs_str *str, unsigned int cpoint);
- unsigned int xs_utf8_dec(char **str);
- unsigned int *_xs_unicode_upper_search(unsigned int cpoint);
- unsigned int *_xs_unicode_lower_search(unsigned int cpoint);
- #define xs_unicode_is_upper(cpoint) (!!_xs_unicode_upper_search(cpoint))
- #define xs_unicode_is_lower(cpoint) (!!_xs_unicode_lower_search(cpoint))
- unsigned int xs_unicode_to_upper(unsigned int cpoint);
- unsigned int xs_unicode_to_lower(unsigned int cpoint);
- int xs_unicode_nfd(unsigned int cpoint, unsigned int *base, unsigned int *diac);
- int xs_unicode_nfc(unsigned int base, unsigned int diac, unsigned int *cpoint);
- #ifdef XS_IMPLEMENTATION
- char *_xs_utf8_enc(char buf[4], unsigned int cpoint)
- {
- unsigned char *p = (unsigned char *)buf;
- if (cpoint < 0x80)
- *p++ = cpoint & 0xff;
- else {
- if (cpoint < 0x800)
- *p++ = 0xc0 | (cpoint >> 6);
- else {
- if (cpoint < 0x10000)
- *p++ = 0xe0 | (cpoint >> 12);
- else {
- *p++ = 0xf0 | (cpoint >> 18);
- *p++ = 0x80 | ((cpoint >> 12) & 0x3f);
- }
- *p++ = 0x80 | ((cpoint >> 6) & 0x3f);
- }
- *p++ = 0x80 | (cpoint & 0x3f);
- }
- return (char *)p;
- }
- xs_str *xs_utf8_enc(xs_str *str, unsigned int cpoint)
- {
- char tmp[4], *p;
- p = _xs_utf8_enc(tmp, cpoint);
- return xs_append_m(str, tmp, p - tmp);
- }
- unsigned int xs_utf8_dec(char **str)
- {
- unsigned char *p = (unsigned char *)*str;
- unsigned int cpoint = 0;
- int c = *p++;
- int cb = 0;
- if ((c & 0x80) == 0) {
- cpoint = c;
- }
- else
- if ((c & 0xe0) == 0xc0) {
- cpoint = (c & 0x1f) << 6;
- cb = 1;
- }
- else
- if ((c & 0xf0) == 0xe0) {
- cpoint = (c & 0x0f) << 12;
- cb = 2;
- }
- else
- if ((c & 0xf8) == 0xf0) {
- cpoint = (c & 0x07) << 18;
- cb = 3;
- }
-
- while (cb--) {
- if ((*p & 0xc0) == 0x80)
- cpoint |= (*p++ & 0x3f) << (cb * 6);
- else {
- cpoint = 0xfffd;
- break;
- }
- }
- *str = (char *)p;
- return cpoint;
- }
- #ifdef _XS_UNICODE_TBL_H
- static int int_cmp(const void *p1, const void *p2)
- {
- const unsigned int *a = p1;
- const unsigned int *b = p2;
- return *a < *b ? -1 : *a > *b ? 1 : 0;
- }
- unsigned int *_xs_unicode_upper_search(unsigned int cpoint)
- {
- return bsearch(&cpoint, xs_unicode_case_fold_table,
- sizeof(xs_unicode_case_fold_table) / (sizeof(unsigned int) * 2),
- sizeof(unsigned int) * 2,
- int_cmp);
- }
- unsigned int *_xs_unicode_lower_search(unsigned int cpoint)
- {
- unsigned int *p = xs_unicode_case_fold_table + 1;
- unsigned int *e = xs_unicode_case_fold_table +
- sizeof(xs_unicode_case_fold_table) / sizeof(unsigned int);
- while (p < e) {
- if (cpoint == *p)
- return p;
- p += 2;
- }
- return NULL;
- }
- unsigned int xs_unicode_to_upper(unsigned int cpoint)
- {
- unsigned int *p = _xs_unicode_lower_search(cpoint);
- return p == NULL ? cpoint : p[-1];
- }
- unsigned int xs_unicode_to_lower(unsigned int cpoint)
- {
- unsigned int *p = _xs_unicode_upper_search(cpoint);
- return p == NULL ? cpoint : p[1];
- }
- int xs_unicode_nfd(unsigned int cpoint, unsigned int *base, unsigned int *diac)
- {
- unsigned int *r = bsearch(&cpoint, xs_unicode_nfd_table,
- sizeof(xs_unicode_nfd_table) / (sizeof(unsigned int) * 3),
- sizeof(unsigned int) * 3,
- int_cmp);
- if (r != NULL) {
- *base = r[1];
- *diac = r[2];
- }
- return !!r;
- }
- int xs_unicode_nfc(unsigned int base, unsigned int diac, unsigned int *cpoint)
- {
- unsigned int *p = xs_unicode_nfd_table;
- unsigned int *e = xs_unicode_nfd_table +
- sizeof(xs_unicode_nfd_table) / sizeof(unsigned int);
- while (p < e) {
- if (p[1] == base && p[2] == diac) {
- *cpoint = p[0];
- return 1;
- }
- p += 3;
- }
- return 0;
- }
- #endif
- #endif
- #endif
|