xs_url.h 7.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300
  1. /* copyright (c) 2022 - 2025 grunfink et al. / MIT license */
  2. #ifndef _XS_URL_H
  3. #define _XS_URL_H
  4. xs_str *xs_url_dec(const char *str);
  5. xs_str *xs_url_enc(const char *str);
  6. xs_dict *xs_url_vars(const char *str);
  7. xs_dict *xs_multipart_form_data(const char *payload, int p_size, const char *header);
  8. #ifdef XS_IMPLEMENTATION
  9. char *xs_url_dec_in(char *str, int qs)
  10. {
  11. char *w = str;
  12. char *r;
  13. for (r = str; *r != '\0'; r++) {
  14. switch (*r) {
  15. case '%': {
  16. unsigned hex;
  17. if (!r[1] || !r[2])
  18. return NULL;
  19. if (sscanf(r + 1, "%2x", &hex) != 1)
  20. return NULL;
  21. *w++ = hex;
  22. r += 2;
  23. break;
  24. }
  25. case '+':
  26. if (qs) {
  27. *w++ = ' ';
  28. break;
  29. }
  30. /* fall-through */
  31. default:
  32. *w++ = *r;
  33. }
  34. }
  35. *w++ = '\0';
  36. return str;
  37. }
  38. xs_str *xs_url_dec(const char *str)
  39. /* decodes an URL */
  40. {
  41. xs_str *s = xs_str_new(NULL);
  42. while (*str) {
  43. if (!xs_is_string(str))
  44. break;
  45. if (*str == '%') {
  46. unsigned int i;
  47. if (sscanf(str + 1, "%02x", &i) == 1) {
  48. unsigned char uc = i;
  49. if (!xs_is_string((char *)&uc))
  50. break;
  51. s = xs_append_m(s, (char *)&uc, 1);
  52. str += 2;
  53. }
  54. }
  55. else
  56. if (*str == '+')
  57. s = xs_append_m(s, " ", 1);
  58. else
  59. s = xs_append_m(s, str, 1);
  60. str++;
  61. }
  62. return s;
  63. }
  64. xs_str *xs_url_enc(const char *str)
  65. /* URL-encodes a string (RFC 3986) */
  66. {
  67. xs_str *s = xs_str_new(NULL);
  68. while (*str) {
  69. if (isalnum(*str) || strchr("-._~", *str)) {
  70. s = xs_append_m(s, str, 1);
  71. }
  72. else {
  73. char tmp[8];
  74. snprintf(tmp, sizeof(tmp), "%%%02X", (unsigned char)*str);
  75. s = xs_append_m(s, tmp, 3);
  76. }
  77. str++;
  78. }
  79. return s;
  80. }
  81. xs_dict *xs_url_vars(const char *str)
  82. /* parse url variables */
  83. {
  84. xs_dict *vars;
  85. vars = xs_dict_new();
  86. if (xs_is_string(str)) {
  87. xs *dup = xs_dup(str);
  88. char *k;
  89. char *saveptr;
  90. for (k = strtok_r(dup, "&", &saveptr);
  91. k;
  92. k = strtok_r(NULL, "&", &saveptr)) {
  93. char *v = strchr(k, '=');
  94. if (!v)
  95. continue;
  96. *v++ = '\0';
  97. k = xs_url_dec_in(k, 1);
  98. v = xs_url_dec_in(v, 1);
  99. if (!xs_is_string(k) || !xs_is_string(v))
  100. continue;
  101. const char *pv = xs_dict_get(vars, k);
  102. if (!xs_is_null(pv)) {
  103. /* there is a previous value: convert to a list and append */
  104. xs *vlist = NULL;
  105. if (xs_type(pv) == XSTYPE_LIST)
  106. vlist = xs_dup(pv);
  107. else {
  108. vlist = xs_list_new();
  109. vlist = xs_list_append(vlist, pv);
  110. }
  111. vlist = xs_list_append(vlist, v);
  112. vars = xs_dict_set(vars, k, vlist);
  113. }
  114. else {
  115. /* ends with []? force to always be a list */
  116. if (xs_endswith(k, "[]")) {
  117. xs *vlist = xs_list_new();
  118. vlist = xs_list_append(vlist, v);
  119. vars = xs_dict_append(vars, k, vlist);
  120. }
  121. else
  122. vars = xs_dict_append(vars, k, v);
  123. }
  124. }
  125. }
  126. return vars;
  127. }
  128. xs_dict *xs_multipart_form_data(const char *payload, int p_size, const char *header)
  129. /* parses a multipart/form-data payload */
  130. {
  131. xs *boundary = NULL;
  132. int offset = 0;
  133. int bsz;
  134. char *p;
  135. /* build the boundary string */
  136. {
  137. xs *l1 = xs_split(header, "=");
  138. if (xs_list_len(l1) != 2)
  139. return NULL;
  140. xs *t_boundary = xs_dup(xs_list_get(l1, 1));
  141. /* Tokodon sends the boundary header with double quotes surrounded */
  142. if (xs_between("\"", t_boundary, "\"") != 0)
  143. t_boundary = xs_strip_chars_i(t_boundary, "\"");
  144. boundary = xs_fmt("--%s", t_boundary);
  145. }
  146. bsz = strlen(boundary);
  147. xs_dict *p_vars = xs_dict_new();
  148. /* iterate searching the boundaries */
  149. while ((p = xs_memmem(payload + offset, p_size - offset, boundary, bsz)) != NULL) {
  150. xs *vn = NULL;
  151. xs *fn = NULL;
  152. xs *ct = NULL;
  153. char *q;
  154. int po, ps;
  155. /* final boundary? */
  156. p += bsz;
  157. if ((p - payload) + 2 > p_size || (p[0] == '-' && p[1] == '-'))
  158. break;
  159. /* skip the \r\n */
  160. p += 2;
  161. /* Tokodon sends also a Content-Type headers,
  162. let's use it to determine the file type */
  163. do {
  164. xs *s1 = NULL;
  165. xs *l1 = NULL;
  166. if (p[0] == '\r' && p[1] == '\n')
  167. break;
  168. q = memchr(p, '\r', p_size - (p - payload));
  169. /* unexpected formatting, fail immediately */
  170. if (q == NULL)
  171. return p_vars;
  172. s1 = xs_realloc(NULL, q - p + 1);
  173. memcpy(s1, p, q - p);
  174. s1[q - p] = '\0';
  175. if (xs_startswith(s1, "Content-Disposition") || xs_startswith(s1, "content-disposition")) {
  176. /* split by " like a primitive man */
  177. l1 = xs_split(s1, "\"");
  178. /* get the variable name */
  179. vn = xs_dup(xs_list_get(l1, 1));
  180. /* is it an attached file? */
  181. if (xs_list_len(l1) >= 4 && strcmp(xs_list_get(l1, 2), "; filename=") == 0) {
  182. /* get the file name */
  183. fn = xs_dup(xs_list_get(l1, 3));
  184. }
  185. }
  186. else
  187. if (xs_startswith(s1, "Content-Type") || xs_startswith(s1, "content-type")) {
  188. l1 = xs_split(s1, ":");
  189. if (xs_list_len(l1) >= 2) {
  190. ct = xs_lstrip_chars_i(xs_dup(xs_list_get(l1, 1)), " ");
  191. }
  192. }
  193. p += (q - p);
  194. p += 2; // Skip /r/n
  195. } while (1);
  196. /* find the start of the part content */
  197. if ((p = xs_memmem(p, p_size - (p - payload), "\r\n", 2)) == NULL)
  198. break;
  199. p += 2; // Skip empty line
  200. /* find the next boundary */
  201. if ((q = xs_memmem(p, p_size - (p - payload), boundary, bsz)) == NULL)
  202. break;
  203. po = p - payload;
  204. ps = q - p - 2; /* - 2 because the final \r\n */
  205. /* is it a filename? */
  206. if (fn != NULL) {
  207. /* p_var value is a list */
  208. /* if filename has no extension and content-type is image, attach extension to the filename */
  209. if (strchr(fn, '.') == NULL && ct && xs_startswith(ct, "image/")) {
  210. char *ext = strchr(ct, '/');
  211. ext++;
  212. fn = xs_str_cat(xs_str_new(""), fn, ".", ext);
  213. }
  214. xs *l1 = xs_list_new();
  215. xs *vpo = xs_number_new(po);
  216. xs *vps = xs_number_new(ps);
  217. l1 = xs_list_append(l1, fn);
  218. l1 = xs_list_append(l1, vpo);
  219. l1 = xs_list_append(l1, vps);
  220. if (xs_is_string(vn))
  221. p_vars = xs_dict_append(p_vars, vn, l1);
  222. }
  223. else {
  224. /* regular variable; just copy */
  225. xs *vc = xs_realloc(NULL, ps + 1);
  226. memcpy(vc, payload + po, ps);
  227. vc[ps] = '\0';
  228. if (xs_is_string(vn) && xs_is_string(vc))
  229. p_vars = xs_dict_append(p_vars, vn, vc);
  230. }
  231. /* move on */
  232. offset = q - payload;
  233. }
  234. return p_vars;
  235. }
  236. #endif /* XS_IMPLEMENTATION */
  237. #endif /* XS_URL_H */