1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495 |
- #ifndef _XS_UNICODE_H
- #define _XS_UNICODE_H
- xs_str *xs_utf8_enc(xs_str *str, unsigned int cpoint);
- char *xs_utf8_dec(const char *str, unsigned int *cpoint);
- #ifdef XS_IMPLEMENTATION
- char *_xs_utf8_enc(char buf[4], unsigned int cpoint)
- {
- unsigned char *p = (unsigned char *)buf;
- if (cpoint < 0x80)
- *p++ = cpoint & 0xff;
- else {
- if (cpoint < 0x800)
- *p++ = 0xc0 | (cpoint >> 6);
- else {
- if (cpoint < 0x10000)
- *p++ = 0xe0 | (cpoint >> 12);
- else {
- *p++ = 0xf0 | (cpoint >> 18);
- *p++ = 0x80 | ((cpoint >> 12) & 0x3f);
- }
- *p++ = 0x80 | ((cpoint >> 6) & 0x3f);
- }
- *p++ = 0x80 | (cpoint & 0x3f);
- }
- return (char *)p;
- }
- xs_str *xs_utf8_enc(xs_str *str, unsigned int cpoint)
- {
- char tmp[4], *p;
- p = _xs_utf8_enc(tmp, cpoint);
- return xs_append_m(str, tmp, p - tmp);
- }
- char *xs_utf8_dec(const char *str, unsigned int *cpoint)
- {
- unsigned char *p = (unsigned char *)str;
- int c = *p++;
- int cb = 0;
- if ((c & 0x80) == 0) {
- *cpoint = c;
- }
- else
- if ((c & 0xe0) == 0xc0) {
- *cpoint = (c & 0x1f) << 6;
- cb = 1;
- }
- else
- if ((c & 0xf0) == 0xe0) {
- *cpoint = (c & 0x0f) << 12;
- cb = 2;
- }
- else
- if ((c & 0xf8) == 0xf0) {
- *cpoint = (c & 0x07) << 18;
- cb = 3;
- }
-
- while (cb--) {
- if ((*p & 0xc0) == 0x80)
- *cpoint |= (*p++ & 0x3f) << (cb * 6);
- else {
- *cpoint = 0xfffd;
- break;
- }
- }
- return (char *)p;
- }
- #endif
- #endif
|