xs_unicode.h 2.1 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495
  1. /* copyright (c) 2022 - 2023 grunfink / MIT license */
  2. #ifndef _XS_UNICODE_H
  3. #define _XS_UNICODE_H
  4. xs_str *xs_utf8_enc(xs_str *str, unsigned int cpoint);
  5. char *xs_utf8_dec(const char *str, unsigned int *cpoint);
  6. #ifdef XS_IMPLEMENTATION
  7. char *_xs_utf8_enc(char buf[4], unsigned int cpoint)
  8. /* encodes an Unicode codepoint to utf-8 into buf and returns the new position */
  9. {
  10. unsigned char *p = (unsigned char *)buf;
  11. if (cpoint < 0x80) /* 1 byte char */
  12. *p++ = cpoint & 0xff;
  13. else {
  14. if (cpoint < 0x800) /* 2 byte char */
  15. *p++ = 0xc0 | (cpoint >> 6);
  16. else {
  17. if (cpoint < 0x10000) /* 3 byte char */
  18. *p++ = 0xe0 | (cpoint >> 12);
  19. else { /* 4 byte char */
  20. *p++ = 0xf0 | (cpoint >> 18);
  21. *p++ = 0x80 | ((cpoint >> 12) & 0x3f);
  22. }
  23. *p++ = 0x80 | ((cpoint >> 6) & 0x3f);
  24. }
  25. *p++ = 0x80 | (cpoint & 0x3f);
  26. }
  27. return (char *)p;
  28. }
  29. xs_str *xs_utf8_enc(xs_str *str, unsigned int cpoint)
  30. /* encodes an Unicode codepoint to utf-8 into str */
  31. {
  32. char tmp[4], *p;
  33. p = _xs_utf8_enc(tmp, cpoint);
  34. return xs_append_m(str, tmp, p - tmp);
  35. }
  36. char *xs_utf8_dec(const char *str, unsigned int *cpoint)
  37. /* decodes an utf-8 char inside str into cpoint and returns the next position */
  38. {
  39. unsigned char *p = (unsigned char *)str;
  40. int c = *p++;
  41. int cb = 0;
  42. if ((c & 0x80) == 0) { /* 1 byte char */
  43. *cpoint = c;
  44. }
  45. else
  46. if ((c & 0xe0) == 0xc0) { /* 2 byte char */
  47. *cpoint = (c & 0x1f) << 6;
  48. cb = 1;
  49. }
  50. else
  51. if ((c & 0xf0) == 0xe0) { /* 3 byte char */
  52. *cpoint = (c & 0x0f) << 12;
  53. cb = 2;
  54. }
  55. else
  56. if ((c & 0xf8) == 0xf0) { /* 4 byte char */
  57. *cpoint = (c & 0x07) << 18;
  58. cb = 3;
  59. }
  60. /* process the continuation bytes */
  61. while (cb--) {
  62. if ((*p & 0xc0) == 0x80)
  63. *cpoint |= (*p++ & 0x3f) << (cb * 6);
  64. else {
  65. *cpoint = 0xfffd;
  66. break;
  67. }
  68. }
  69. return (char *)p;
  70. }
  71. #endif /* XS_IMPLEMENTATION */
  72. #endif /* _XS_UNICODE_H */