Quellcode durchsuchen

Backport from xs.

default vor 1 Jahr
Ursprung
Commit
acf3cdcf80
4 geänderte Dateien mit 39 neuen und 11 gelöschten Zeilen
  1. 5 2
      xs.h
  2. 4 8
      xs_json.h
  3. 29 0
      xs_unicode.h
  4. 1 1
      xs_version.h

+ 5 - 2
xs.h

@@ -1180,6 +1180,8 @@ void *xs_memmem(const char *haystack, int h_size, const char *needle, int n_size
 
 /** hex **/
 
+static char xs_hex_digits[] = "0123456789abcdef";
+
 xs_str *xs_hex_enc(const xs_val *data, int size)
 /* returns an hexdump of data */
 {
@@ -1190,8 +1192,9 @@ xs_str *xs_hex_enc(const xs_val *data, int size)
     p = s = xs_realloc(NULL, _xs_blk_size(size * 2 + 1));
 
     for (n = 0; n < size; n++) {
-        snprintf(p, 3, "%02x", (unsigned char)data[n]);
-        p += 2;
+        *p++ = xs_hex_digits[*data >> 4 & 0xf];
+        *p++ = xs_hex_digits[*data      & 0xf];
+        data++;
     }
 
     *p = '\0';

+ 4 - 8
xs_json.h

@@ -248,24 +248,20 @@ static xs_val *_xs_json_load_lexer(FILE *f, js_type *t)
                         break;
                     }
 
-                    if (cp >= 0xd800 && cp <= 0xdfff) {
-                        /* it's a surrogate pair */
-                        cp = (cp & 0x3ff) << 10;
-
+                    if (xs_is_surrogate(cp)) {
                         /* \u must follow */
                         if (fgetc(f) != '\\' || fgetc(f) != 'u') {
                             *t = JS_ERROR;
                             break;
                         }
 
-                        unsigned int i;
-                        if (fscanf(f, "%04x", &i) != 1) {
+                        unsigned int p2;
+                        if (fscanf(f, "%04x", &p2) != 1) {
                             *t = JS_ERROR;
                             break;
                         }
 
-                        cp |= (i & 0x3ff);
-                        cp += 0x10000;
+                        cp = xs_surrogate_dec(cp, p2);
                     }
 
                     /* replace dangerous control codes with their visual representations */

+ 29 - 0
xs_unicode.h

@@ -8,6 +8,9 @@
  xs_str *xs_utf8_enc(xs_str *str, unsigned int cpoint);
  unsigned int xs_utf8_dec(char **str);
  int xs_unicode_width(unsigned int cpoint);
+ int xs_is_surrogate(unsigned int cpoint);
+ unsigned int xs_surrogate_dec(unsigned int p1, unsigned int p2);
+ unsigned int xs_surrogate_enc(unsigned int cpoint);
  unsigned int *_xs_unicode_upper_search(unsigned int cpoint);
  unsigned int *_xs_unicode_lower_search(unsigned int cpoint);
  #define xs_unicode_is_upper(cpoint) (!!_xs_unicode_upper_search(cpoint))
@@ -138,6 +141,32 @@ int xs_unicode_width(unsigned int cpoint)
 }
 
 
+/** surrogate pairs **/
+
+int xs_is_surrogate(unsigned int cpoint)
+/* checks if cpoint is the first element of a Unicode surrogate pair */
+{
+    return cpoint >= 0xd800 && cpoint <= 0xdfff;
+}
+
+
+unsigned int xs_surrogate_dec(unsigned int p1, unsigned int p2)
+/* "decodes" a surrogate pair into a codepoint */
+{
+    return 0x10000 | ((p1 & 0x3ff) << 10) | (p2 & 0x3ff);
+}
+
+
+unsigned int xs_surrogate_enc(unsigned int cpoint)
+/* "encodes" a Unicode into a surrogate pair (p1 in the MSB word) */
+{
+    unsigned int p1 = 0xd7c0 + (cpoint >> 10);
+    unsigned int p2 = 0xdc00 + (cpoint & 0x3ff);
+
+    return (p1 << 16) | p2;
+}
+
+
 #ifdef _XS_UNICODE_TBL_H
 
 /* include xs_unicode_tbl.h before this one to use these functions */

+ 1 - 1
xs_version.h

@@ -1 +1 @@
-/* 40d63c59610c642d1c8b2e2b94bbf5cdde69ad6a */
+/* 0932615dfe85e5d8544c4b2052eb66f3a430eb8c */