xs_json.h 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530
  1. /* copyright (c) 2022 - 2023 grunfink / MIT license */
  2. #ifndef _XS_JSON_H
  3. #define _XS_JSON_H
  4. xs_str *xs_json_dumps_pp(const xs_val *data, int indent);
  5. #define xs_json_dumps(data) xs_json_dumps_pp(data, 0)
  6. xs_val *xs_json_loads(const xs_str *json);
  7. #ifdef XS_IMPLEMENTATION
  8. /** IMPLEMENTATION **/
  9. /** JSON dumps **/
  10. static xs_str *_xs_json_dumps_str(xs_str *s, const char *data)
  11. /* dumps a string in JSON format */
  12. {
  13. unsigned char c;
  14. s = xs_str_cat(s, "\"");
  15. while ((c = *data)) {
  16. if (c == '\n')
  17. s = xs_str_cat(s, "\\n");
  18. else
  19. if (c == '\r')
  20. s = xs_str_cat(s, "\\r");
  21. else
  22. if (c == '\t')
  23. s = xs_str_cat(s, "\\t");
  24. else
  25. if (c == '\\')
  26. s = xs_str_cat(s, "\\\\");
  27. else
  28. if (c == '"')
  29. s = xs_str_cat(s, "\\\"");
  30. else
  31. if (c < 32) {
  32. char tmp[10];
  33. snprintf(tmp, sizeof(tmp), "\\u%04x", (unsigned int) c);
  34. s = xs_str_cat(s, tmp);
  35. }
  36. else
  37. s = xs_append_m(s, data, 1);
  38. data++;
  39. }
  40. s = xs_str_cat(s, "\"");
  41. return s;
  42. }
  43. static xs_str *_xs_json_indent(xs_str *s, int level, int indent)
  44. /* adds indentation */
  45. {
  46. if (indent) {
  47. int n;
  48. s = xs_str_cat(s, "\n");
  49. for (n = 0; n < level * indent; n++)
  50. s = xs_str_cat(s, " ");
  51. }
  52. return s;
  53. }
  54. static xs_str *_xs_json_dumps(xs_str *s, const xs_val *s_data, int level, int indent)
  55. /* dumps partial data as JSON */
  56. {
  57. int c = 0;
  58. xs_val *v;
  59. xs_val *data = (xs_val *)s_data;
  60. switch (xs_type(data)) {
  61. case XSTYPE_NULL:
  62. s = xs_str_cat(s, "null");
  63. break;
  64. case XSTYPE_TRUE:
  65. s = xs_str_cat(s, "true");
  66. break;
  67. case XSTYPE_FALSE:
  68. s = xs_str_cat(s, "false");
  69. break;
  70. case XSTYPE_NUMBER:
  71. s = xs_str_cat(s, xs_number_str(data));
  72. break;
  73. case XSTYPE_LIST:
  74. s = xs_str_cat(s, "[");
  75. while (xs_list_iter(&data, &v)) {
  76. if (c != 0)
  77. s = xs_str_cat(s, ",");
  78. s = _xs_json_indent(s, level + 1, indent);
  79. s = _xs_json_dumps(s, v, level + 1, indent);
  80. c++;
  81. }
  82. s = _xs_json_indent(s, level, indent);
  83. s = xs_str_cat(s, "]");
  84. break;
  85. case XSTYPE_DICT:
  86. s = xs_str_cat(s, "{");
  87. xs_str *k;
  88. while (xs_dict_iter(&data, &k, &v)) {
  89. if (c != 0)
  90. s = xs_str_cat(s, ",");
  91. s = _xs_json_indent(s, level + 1, indent);
  92. s = _xs_json_dumps_str(s, k);
  93. s = xs_str_cat(s, ":");
  94. if (indent)
  95. s = xs_str_cat(s, " ");
  96. s = _xs_json_dumps(s, v, level + 1, indent);
  97. c++;
  98. }
  99. s = _xs_json_indent(s, level, indent);
  100. s = xs_str_cat(s, "}");
  101. break;
  102. case XSTYPE_STRING:
  103. s = _xs_json_dumps_str(s, data);
  104. break;
  105. default:
  106. break;
  107. }
  108. return s;
  109. }
  110. xs_str *xs_json_dumps_pp(const xs_val *data, int indent)
  111. /* dumps a piece of data as JSON */
  112. {
  113. xstype t = xs_type(data);
  114. xs_str *s = NULL;
  115. if (t == XSTYPE_LIST || t == XSTYPE_DICT) {
  116. s = xs_str_new(NULL);
  117. s = _xs_json_dumps(s, data, 0, indent);
  118. }
  119. return s;
  120. }
  121. /** JSON loads **/
  122. /* this code comes mostly from the Minimum Profit Text Editor (MPDM) */
  123. typedef enum {
  124. JS_ERROR = -1,
  125. JS_INCOMPLETE,
  126. JS_OCURLY,
  127. JS_OBRACK,
  128. JS_CCURLY,
  129. JS_CBRACK,
  130. JS_COMMA,
  131. JS_COLON,
  132. JS_VALUE,
  133. JS_STRING,
  134. JS_INTEGER,
  135. JS_REAL,
  136. JS_TRUE,
  137. JS_FALSE,
  138. JS_NULL,
  139. JS_ARRAY,
  140. JS_OBJECT
  141. } js_type;
  142. static xs_val *_xs_json_loads_lexer(const char **json, js_type *t)
  143. {
  144. char c;
  145. const char *s = *json;
  146. xs_val *v = NULL;
  147. /* skip blanks */
  148. while (*s == L' ' || *s == L'\t' || *s == L'\n' || *s == L'\r')
  149. s++;
  150. c = *s++;
  151. if (c == '{')
  152. *t = JS_OCURLY;
  153. else
  154. if (c == '}')
  155. *t = JS_CCURLY;
  156. else
  157. if (c == '[')
  158. *t = JS_OBRACK;
  159. else
  160. if (c == ']')
  161. *t = JS_CBRACK;
  162. else
  163. if (c == ',')
  164. *t = JS_COMMA;
  165. else
  166. if (c == ':')
  167. *t = JS_COLON;
  168. else
  169. if (c == '"') {
  170. *t = JS_STRING;
  171. v = xs_str_new(NULL);
  172. while ((c = *s) != '"' && c != '\0') {
  173. char tmp[5];
  174. int cp, i;
  175. if (c == '\\') {
  176. s++;
  177. c = *s;
  178. switch (c) {
  179. case 'n': c = '\n'; break;
  180. case 'r': c = '\r'; break;
  181. case 't': c = '\t'; break;
  182. case 'u': /* Unicode codepoint as an hex char */
  183. s++;
  184. strncpy(tmp, s, 4);
  185. tmp[4] = '\0';
  186. if (strlen(tmp) != 4) {
  187. *t = JS_ERROR;
  188. break;
  189. }
  190. s += 3; /* skip as it was one byte */
  191. sscanf(tmp, "%04x", &i);
  192. if (i >= 0xd800 && i <= 0xdfff) {
  193. /* it's a surrogate pair */
  194. cp = (i & 0x3ff) << 10;
  195. /* skip to the next value (last char + \ + u) */
  196. s++;
  197. if (memcmp(s, "\\u", 2) != 0) {
  198. *t = JS_ERROR;
  199. break;
  200. }
  201. s += 2;
  202. strncpy(tmp, s, 4);
  203. tmp[4] = '\0';
  204. if (strlen(tmp) != 4) {
  205. *t = JS_ERROR;
  206. break;
  207. }
  208. s += 3; /* skip as it was one byte */
  209. sscanf(tmp, "%04x", &i);
  210. cp |= (i & 0x3ff);
  211. cp += 0x10000;
  212. }
  213. else
  214. cp = i;
  215. /* replace dangerous control codes with their visual representations */
  216. if (cp >= '\0' && cp < ' ' && !strchr("\r\n\t", cp))
  217. cp += 0x2400;
  218. v = xs_utf8_enc(v, cp);
  219. c = '\0';
  220. break;
  221. }
  222. }
  223. if (c)
  224. v = xs_append_m(v, &c, 1);
  225. s++;
  226. }
  227. if (c != '\0')
  228. s++;
  229. }
  230. else
  231. if (c == '-' || (c >= '0' && c <= '9') || c == '.') {
  232. xs *vn = NULL;
  233. *t = JS_INTEGER;
  234. vn = xs_str_new(NULL);
  235. vn = xs_append_m(vn, &c, 1);
  236. while (((c = *s) >= '0' && c <= '9') || c == '.') {
  237. if (c == '.')
  238. *t = JS_REAL;
  239. vn = xs_append_m(vn, &c, 1);
  240. s++;
  241. }
  242. /* convert to XSTYPE_NUMBER */
  243. v = xs_number_new(atof(vn));
  244. }
  245. else
  246. if (c == 't' && strncmp(s, "rue", 3) == 0) {
  247. s += 3;
  248. *t = JS_TRUE;
  249. v = xs_val_new(XSTYPE_TRUE);
  250. }
  251. else
  252. if (c == 'f' && strncmp(s, "alse", 4) == 0) {
  253. s += 4;
  254. *t = JS_FALSE;
  255. v = xs_val_new(XSTYPE_FALSE);
  256. }
  257. else
  258. if (c == 'n' && strncmp(s, "ull", 3) == 0) {
  259. s += 3;
  260. *t = JS_NULL;
  261. v = xs_val_new(XSTYPE_NULL);
  262. }
  263. else
  264. *t = JS_ERROR;
  265. *json = s;
  266. return v;
  267. }
  268. static xs_list *_xs_json_loads_array(const char **json, js_type *t);
  269. static xs_dict *_xs_json_loads_object(const char **json, js_type *t);
  270. static xs_val *_xs_json_loads_value(const char **json, js_type *t, xs_val *v)
  271. /* parses a JSON value */
  272. {
  273. if (*t == JS_OBRACK)
  274. v = _xs_json_loads_array(json, t);
  275. else
  276. if (*t == JS_OCURLY)
  277. v = _xs_json_loads_object(json, t);
  278. if (*t >= JS_VALUE)
  279. *t = JS_VALUE;
  280. else
  281. *t = JS_ERROR;
  282. return v;
  283. }
  284. static xs_list *_xs_json_loads_array(const char **json, js_type *t)
  285. /* parses a JSON array */
  286. {
  287. const char *s = *json;
  288. xs *v;
  289. xs_list *l;
  290. js_type tt;
  291. l = xs_list_new();
  292. *t = JS_INCOMPLETE;
  293. v = _xs_json_loads_lexer(&s, &tt);
  294. if (tt == JS_CBRACK)
  295. *t = JS_ARRAY;
  296. else {
  297. v = _xs_json_loads_value(&s, &tt, v);
  298. if (tt == JS_VALUE) {
  299. l = xs_list_append(l, v);
  300. while (*t == JS_INCOMPLETE) {
  301. xs_free(_xs_json_loads_lexer(&s, &tt));
  302. if (tt == JS_CBRACK)
  303. *t = JS_ARRAY;
  304. else
  305. if (tt == JS_COMMA) {
  306. xs *v2;
  307. v2 = _xs_json_loads_lexer(&s, &tt);
  308. v2 = _xs_json_loads_value(&s, &tt, v2);
  309. if (tt == JS_VALUE)
  310. l = xs_list_append(l, v2);
  311. else
  312. *t = JS_ERROR;
  313. }
  314. else
  315. *t = JS_ERROR;
  316. }
  317. }
  318. else
  319. *t = JS_ERROR;
  320. }
  321. if (*t == JS_ERROR)
  322. l = xs_free(l);
  323. *json = s;
  324. return l;
  325. }
  326. static xs_dict *_xs_json_loads_object(const char **json, js_type *t)
  327. /* parses a JSON object */
  328. {
  329. const char *s = *json;
  330. xs *k1;
  331. xs_dict *d;
  332. js_type tt;
  333. d = xs_dict_new();
  334. *t = JS_INCOMPLETE;
  335. k1 = _xs_json_loads_lexer(&s, &tt);
  336. if (tt == JS_CCURLY)
  337. *t = JS_OBJECT;
  338. else
  339. if (tt == JS_STRING) {
  340. xs_free(_xs_json_loads_lexer(&s, &tt));
  341. if (tt == JS_COLON) {
  342. xs *v1;
  343. v1 = _xs_json_loads_lexer(&s, &tt);
  344. v1 = _xs_json_loads_value(&s, &tt, v1);
  345. if (tt == JS_VALUE) {
  346. d = xs_dict_append(d, k1, v1);
  347. while (*t == JS_INCOMPLETE) {
  348. xs_free(_xs_json_loads_lexer(&s, &tt));
  349. if (tt == JS_CCURLY)
  350. *t = JS_OBJECT;
  351. else
  352. if (tt == JS_COMMA) {
  353. xs *k = _xs_json_loads_lexer(&s, &tt);
  354. if (tt == JS_STRING) {
  355. xs_free(_xs_json_loads_lexer(&s, &tt));
  356. if (tt == JS_COLON) {
  357. xs *v;
  358. v = _xs_json_loads_lexer(&s, &tt);
  359. v = _xs_json_loads_value(&s, &tt, v);
  360. if (tt == JS_VALUE)
  361. d = xs_dict_append(d, k, v);
  362. else
  363. *t = JS_ERROR;
  364. }
  365. else
  366. *t = JS_ERROR;
  367. }
  368. else
  369. *t = JS_ERROR;
  370. }
  371. else
  372. *t = JS_ERROR;
  373. }
  374. }
  375. else
  376. *t = JS_ERROR;
  377. }
  378. else
  379. *t = JS_ERROR;
  380. }
  381. else
  382. *t = JS_ERROR;
  383. if (*t == JS_ERROR)
  384. d = xs_free(d);
  385. *json = s;
  386. return d;
  387. }
  388. xs_val *xs_json_loads(const xs_str *json)
  389. /* loads a string in JSON format and converts to a multiple data */
  390. {
  391. xs_val *v = NULL;
  392. js_type t;
  393. xs_free(_xs_json_loads_lexer(&json, &t));
  394. if (t == JS_OBRACK)
  395. v = _xs_json_loads_array(&json, &t);
  396. else
  397. if (t == JS_OCURLY)
  398. v = _xs_json_loads_object(&json, &t);
  399. else
  400. t = JS_ERROR;
  401. return v;
  402. }
  403. #endif /* XS_IMPLEMENTATION */
  404. #endif /* _XS_JSON_H */