format.c 7.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308
  1. /* snac - A simple, minimalistic ActivityPub instance */
  2. /* copyright (c) 2022 - 2024 grunfink et al. / MIT license */
  3. #include "xs.h"
  4. #include "xs_regex.h"
  5. #include "xs_mime.h"
  6. #include "xs_html.h"
  7. #include "xs_json.h"
  8. #include "snac.h"
  9. /* emoticons, people laughing and such */
  10. const char *smileys[] = {
  11. ":-)", "🙂",
  12. ":-D", "😀",
  13. "X-D", "😆",
  14. ";-)", "😉",
  15. "B-)", "😎",
  16. ">:-(", "😡",
  17. ":-(", "😞",
  18. ":-*", "😘",
  19. ":-/", "😕",
  20. "8-o", "😲",
  21. "%-)", "🤪",
  22. ":_(", "😢",
  23. ":-|", "😐",
  24. "<3", "&#10084;&#65039;",
  25. ":facepalm:", "&#129318;",
  26. ":shrug:", "&#129335;",
  27. ":shrug2:", "&#175;\\_(&#12484;)_/&#175;",
  28. ":eyeroll:", "&#128580;",
  29. ":beer:", "&#127866;",
  30. ":beers:", "&#127867;",
  31. ":munch:", "&#128561;",
  32. ":thumb:", "&#128077;",
  33. NULL, NULL
  34. };
  35. xs_dict *emojis(void)
  36. /* returns a dict with the emojis */
  37. {
  38. xs *fn = xs_fmt("%s/emojis.json", srv_basedir);
  39. FILE *f;
  40. if (mtime(fn) == 0) {
  41. /* file does not exist; create it with the defaults */
  42. xs *d = xs_dict_new();
  43. const char **emo = smileys;
  44. while (*emo) {
  45. d = xs_dict_append(d, emo[0], emo[1]);
  46. emo += 2;
  47. }
  48. if ((f = fopen(fn, "w")) != NULL) {
  49. xs_json_dump(d, 4, f);
  50. fclose(f);
  51. }
  52. }
  53. xs_dict *d = NULL;
  54. if ((f = fopen(fn, "r")) != NULL) {
  55. d = xs_json_load(f);
  56. fclose(f);
  57. }
  58. return d;
  59. }
  60. static xs_str *format_line(const char *line, xs_list **attach)
  61. /* formats a line */
  62. {
  63. xs_str *s = xs_str_new(NULL);
  64. char *p, *v;
  65. /* split by markup */
  66. xs *sm = xs_regex_split(line,
  67. "(`[^`]+`|\\*\\*?[^\\*]+\\*?\\*|https?:/" "/[^[:space:]]+)");
  68. int n = 0;
  69. p = sm;
  70. while (xs_list_iter(&p, &v)) {
  71. if ((n & 0x1)) {
  72. /* markup */
  73. if (xs_startswith(v, "`")) {
  74. xs *s1 = xs_crop_i(xs_dup(v), 1, -1);
  75. xs *e1 = encode_html(s1);
  76. xs *s2 = xs_fmt("<code>%s</code>", e1);
  77. s = xs_str_cat(s, s2);
  78. }
  79. else
  80. if (xs_startswith(v, "**")) {
  81. xs *s1 = xs_crop_i(xs_dup(v), 2, -2);
  82. xs *s2 = xs_fmt("<b>%s</b>", s1);
  83. s = xs_str_cat(s, s2);
  84. }
  85. else
  86. if (xs_startswith(v, "*")) {
  87. xs *s1 = xs_crop_i(xs_dup(v), 1, -1);
  88. xs *s2 = xs_fmt("<i>%s</i>", s1);
  89. s = xs_str_cat(s, s2);
  90. }
  91. else
  92. if (xs_startswith(v, "http")) {
  93. xs *u = xs_replace(v, "#", "&#35;");
  94. xs *v2 = xs_strip_chars_i(xs_dup(u), ".,)");
  95. const char *mime = xs_mime_by_ext(v2);
  96. if (attach != NULL && xs_startswith(mime, "image/")) {
  97. /* if it's a link to an image, insert it as an attachment */
  98. xs *d = xs_dict_new();
  99. d = xs_dict_append(d, "mediaType", mime);
  100. d = xs_dict_append(d, "url", v2);
  101. d = xs_dict_append(d, "name", "");
  102. d = xs_dict_append(d, "type", "Image");
  103. *attach = xs_list_append(*attach, d);
  104. }
  105. else {
  106. xs *s1 = xs_fmt("<a href=\"%s\" target=\"_blank\">%s</a>", v2, u);
  107. s = xs_str_cat(s, s1);
  108. }
  109. }
  110. else
  111. s = xs_str_cat(s, v);
  112. }
  113. else
  114. /* surrounded text, copy directly */
  115. s = xs_str_cat(s, v);
  116. n++;
  117. }
  118. return s;
  119. }
  120. xs_str *not_really_markdown(const char *content, xs_list **attach)
  121. /* formats a content using some Markdown rules */
  122. {
  123. xs_str *s = xs_str_new(NULL);
  124. int in_pre = 0;
  125. int in_blq = 0;
  126. xs *list;
  127. char *p, *v;
  128. /* work by lines */
  129. list = xs_split(content, "\n");
  130. p = list;
  131. while (xs_list_iter(&p, &v)) {
  132. xs *ss = NULL;
  133. if (strcmp(v, "```") == 0) {
  134. if (!in_pre)
  135. s = xs_str_cat(s, "<pre>");
  136. else
  137. s = xs_str_cat(s, "</pre>");
  138. in_pre = !in_pre;
  139. continue;
  140. }
  141. if (in_pre) {
  142. // Encode all HTML characters when we're in pre element until we are out.
  143. ss = encode_html(v);
  144. s = xs_str_cat(s, ss);
  145. s = xs_str_cat(s, "<br>");
  146. continue;
  147. }
  148. else
  149. ss = xs_strip_i(format_line(v, attach));
  150. if (xs_startswith(ss, "---")) {
  151. /* delete the --- */
  152. ss = xs_strip_i(xs_crop_i(ss, 3, 0));
  153. s = xs_str_cat(s, "<hr>");
  154. s = xs_str_cat(s, ss);
  155. continue;
  156. }
  157. if (xs_startswith(ss, ">")) {
  158. /* delete the > and subsequent spaces */
  159. ss = xs_strip_i(xs_crop_i(ss, 1, 0));
  160. if (!in_blq) {
  161. s = xs_str_cat(s, "<blockquote>");
  162. in_blq = 1;
  163. }
  164. s = xs_str_cat(s, ss);
  165. s = xs_str_cat(s, "<br>");
  166. continue;
  167. }
  168. if (in_blq) {
  169. s = xs_str_cat(s, "</blockquote>");
  170. in_blq = 0;
  171. }
  172. s = xs_str_cat(s, ss);
  173. s = xs_str_cat(s, "<br>");
  174. }
  175. if (in_blq)
  176. s = xs_str_cat(s, "</blockquote>");
  177. if (in_pre)
  178. s = xs_str_cat(s, "</pre>");
  179. /* some beauty fixes */
  180. s = xs_replace_i(s, "<br><br><blockquote>", "<br><blockquote>");
  181. s = xs_replace_i(s, "</blockquote><br>", "</blockquote>");
  182. s = xs_replace_i(s, "</pre><br>", "</pre>");
  183. {
  184. /* traditional emoticons */
  185. xs *d = emojis();
  186. int c = 0;
  187. char *k, *v;
  188. while (xs_dict_next(d, &k, &v, &c)) {
  189. s = xs_replace_i(s, k, v);
  190. }
  191. }
  192. return s;
  193. }
  194. const char *valid_tags[] = {
  195. "a", "p", "br", "br/", "blockquote", "ul", "ol", "li", "cite", "small",
  196. "span", "i", "b", "u", "s", "pre", "code", "em", "strong", "hr", "img", "del", "bdi", NULL
  197. };
  198. xs_str *sanitize(const char *content)
  199. /* cleans dangerous HTML output */
  200. {
  201. xs_str *s = xs_str_new(NULL);
  202. xs *sl;
  203. int n = 0;
  204. char *p, *v;
  205. sl = xs_regex_split(content, "</?[^>]+>");
  206. p = sl;
  207. n = 0;
  208. while (xs_list_iter(&p, &v)) {
  209. if (n & 0x1) {
  210. xs *s1 = xs_strip_i(xs_crop_i(xs_dup(v), v[1] == '/' ? 2 : 1, -1));
  211. xs *l1 = xs_split_n(s1, " ", 1);
  212. xs *tag = xs_tolower_i(xs_dup(xs_list_get(l1, 0)));
  213. xs *s2 = NULL;
  214. int i;
  215. /* check if it's one of the valid tags */
  216. for (i = 0; valid_tags[i]; i++) {
  217. if (strcmp(tag, valid_tags[i]) == 0)
  218. break;
  219. }
  220. if (valid_tags[i]) {
  221. /* accepted tag: rebuild it with only the accepted elements */
  222. xs *el = xs_regex_select(v, "(src|href|rel|class|target)=\"[^\"]*\"");
  223. xs *s3 = xs_join(el, " ");
  224. s2 = xs_fmt("<%s%s%s%s>",
  225. v[1] == '/' ? "/" : "", tag, xs_list_len(el) ? " " : "", s3);
  226. s = xs_str_cat(s, s2);
  227. } else {
  228. /* else? just show it with encoded code.. that's it. */
  229. xs *el = encode_html(v);
  230. s = xs_str_cat(s, el);
  231. }
  232. }
  233. else {
  234. /* non-tag */
  235. s = xs_str_cat(s, v);
  236. }
  237. n++;
  238. }
  239. return s;
  240. }
  241. xs_str *encode_html(const char *str)
  242. /* escapes html characters */
  243. {
  244. xs_str *encoded = xs_html_encode((char *)str);
  245. /* Restore only <br>. Probably safe. Let's hope nothing goes wrong with this. */
  246. encoded = xs_replace_i(encoded, "&lt;br&gt;", "<br>");
  247. return encoded;
  248. }