Browse Source

Backport from xs.

default 1 year ago
parent
commit
ca2e0fcd89
2 changed files with 69 additions and 20 deletions
  1. 68 19
      xs_unicode.h
  2. 1 1
      xs_version.h

+ 68 - 19
xs_unicode.h

@@ -5,42 +5,91 @@
 #define _XS_UNICODE_H
 
  xs_str *xs_utf8_enc(xs_str *str, unsigned int cpoint);
+ char *xs_utf8_dec(const char *str, unsigned int *cpoint);
 
 
 #ifdef XS_IMPLEMENTATION
 
-/** utf-8 **/
+
+char *_xs_utf8_enc(char buf[4], unsigned int cpoint)
+/* encodes an Unicode codepoint to utf-8 into buf and returns the new position */
+{
+    unsigned char *p = (unsigned char *)buf;
+
+    if (cpoint < 0x80) /* 1 byte char */
+        *p++ = cpoint & 0xff;
+    else {
+        if (cpoint < 0x800) /* 2 byte char */
+            *p++ = 0xc0 | (cpoint >> 6);
+        else {
+            if (cpoint < 0x10000) /* 3 byte char */
+                *p++ = 0xe0 | (cpoint >> 12);
+            else { /* 4 byte char */
+                *p++ = 0xf0 | (cpoint >> 18);
+                *p++ = 0x80 | ((cpoint >> 12) & 0x3f);
+            }
+
+            *p++ = 0x80 | ((cpoint >> 6) & 0x3f);
+        }
+
+        *p++ = 0x80 | (cpoint & 0x3f);
+    }
+
+    return (char *)p;
+}
+
 
 xs_str *xs_utf8_enc(xs_str *str, unsigned int cpoint)
-/* encodes an Unicode codepoint to utf8 */
+/* encodes an Unicode codepoint to utf-8 into str */
 {
-    unsigned char tmp[4];
-    int n = 0;
+    char tmp[4], *p;
+
+    p = _xs_utf8_enc(tmp, cpoint);
 
-    if (cpoint < 0x80)
-        tmp[n++] = cpoint & 0xff;
+    return xs_append_m(str, tmp, p - tmp);
+}
+
+
+char *xs_utf8_dec(const char *str, unsigned int *cpoint)
+/* decodes an utf-8 char inside str into cpoint and returns the next position */
+{
+    unsigned char *p = (unsigned char *)str;
+    int c = *p++;
+    int cb = 0;
+
+    if ((c & 0x80) == 0) { /* 1 byte char */
+        *cpoint = c;
+    }
     else
-    if (cpoint < 0x800) {
-        tmp[n++] = 0xc0 | (cpoint >> 6);
-        tmp[n++] = 0x80 | (cpoint & 0x3f);
+    if ((c & 0xe0) == 0xc0) { /* 2 byte char */
+        *cpoint = (c & 0x1f) << 6;
+        cb = 1;
     }
     else
-    if (cpoint < 0x10000) {
-        tmp[n++] = 0xe0 | (cpoint >> 12);
-        tmp[n++] = 0x80 | ((cpoint >> 6) & 0x3f);
-        tmp[n++] = 0x80 | (cpoint & 0x3f);
+    if ((c & 0xf0) == 0xe0) { /* 3 byte char */
+        *cpoint = (c & 0x0f) << 12;
+        cb = 2;
     }
     else
-    if (cpoint < 0x200000) {
-        tmp[n++] = 0xf0 | (cpoint >> 18);
-        tmp[n++] = 0x80 | ((cpoint >> 12) & 0x3f);
-        tmp[n++] = 0x80 | ((cpoint >> 6) & 0x3f);
-        tmp[n++] = 0x80 | (cpoint & 0x3f);
+    if ((c & 0xf8) == 0xf0) { /* 4 byte char */
+        *cpoint = (c & 0x07) << 18;
+        cb = 3;
+    }
+
+    /* process the continuation bytes */
+    while (cb--) {
+        if ((*p & 0xc0) == 0x80)
+            *cpoint |= (*p++ & 0x3f) << (cb * 6);
+        else {
+            *cpoint = 0xfffd;
+            break;
+        }
     }
 
-    return xs_append_m(str, (char *)tmp, n);
+    return (char *)p;
 }
 
+
 #endif /* XS_IMPLEMENTATION */
 
 #endif /* _XS_UNICODE_H */

+ 1 - 1
xs_version.h

@@ -1 +1 @@
-/* 1948fa3c5f0df994170cd38b9144b99734b071e6 */
+/* 3588cbb7859917f1c5965254f8a53c3349c773ea */