format.c 5.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222
  1. /* snac - A simple, minimalistic ActivityPub instance */
  2. /* copyright (c) 2022 grunfink - MIT license */
  3. #include "xs.h"
  4. #include "xs_regex.h"
  5. #include "snac.h"
  6. /* emoticons, people laughing and such */
  7. struct {
  8. const char *key;
  9. const char *value;
  10. } smileys[] = {
  11. { ":-)", "🙂" },
  12. { ":-D", "😀" },
  13. { "X-D", "😆" },
  14. { ";-)", "😉" },
  15. { "B-)", "😎" },
  16. { ":-(", "😞" },
  17. { ":-*", "😘" },
  18. { ":-/", "😕" },
  19. { "8-o", "😲" },
  20. { "%-)", "🤪" },
  21. { ":_(", "😢" },
  22. { ":-|", "😐" },
  23. { "<3", "&#128147;" },
  24. { ":facepalm:", "&#129318;" },
  25. { ":shrug:", "&#129335;" },
  26. { ":shrug2:", "&#175;\\_(&#12484;)_/&#175;" },
  27. { ":eyeroll:", "&#128580;" },
  28. { ":beer:", "&#127866;" },
  29. { ":beers:", "&#127867;" },
  30. { ":munch:", "&#128561;" },
  31. { ":thumb:", "&#128077;" },
  32. { NULL, NULL }
  33. };
  34. static d_char *format_line(const char *line)
  35. /* formats a line */
  36. {
  37. d_char *s = xs_str_new(NULL);
  38. char *p, *v;
  39. /* split by markup */
  40. xs *sm = xs_regex_split(line,
  41. "(`[^`]+`|\\*\\*?[^\\*]+\\*?\\*|https?:/" "/[^[:space:]]+)");
  42. int n = 0;
  43. p = sm;
  44. while (xs_list_iter(&p, &v)) {
  45. if ((n & 0x1)) {
  46. /* markup */
  47. if (xs_startswith(v, "`")) {
  48. xs *s1 = xs_crop(xs_dup(v), 1, -1);
  49. xs *s2 = xs_fmt("<code>%s</code>", s1);
  50. s = xs_str_cat(s, s2);
  51. }
  52. else
  53. if (xs_startswith(v, "**")) {
  54. xs *s1 = xs_crop(xs_dup(v), 2, -2);
  55. xs *s2 = xs_fmt("<b>%s</b>", s1);
  56. s = xs_str_cat(s, s2);
  57. }
  58. else
  59. if (xs_startswith(v, "*")) {
  60. xs *s1 = xs_crop(xs_dup(v), 1, -1);
  61. xs *s2 = xs_fmt("<i>%s</i>", s1);
  62. s = xs_str_cat(s, s2);
  63. }
  64. else
  65. if (xs_startswith(v, "http")) {
  66. xs *s1 = xs_fmt("<a href=\"%s\" target=\"_blank\">%s</a>", v, v);
  67. s = xs_str_cat(s, s1);
  68. }
  69. else
  70. s = xs_str_cat(s, v);
  71. }
  72. else
  73. /* surrounded text, copy directly */
  74. s = xs_str_cat(s, v);
  75. n++;
  76. }
  77. return s;
  78. }
  79. d_char *not_really_markdown(const char *content)
  80. /* formats a content using some Markdown rules */
  81. {
  82. d_char *s = xs_str_new(NULL);
  83. int in_pre = 0;
  84. int in_blq = 0;
  85. xs *list;
  86. char *p, *v;
  87. /* work by lines */
  88. p = list = xs_split(content, "\n");
  89. while (xs_list_iter(&p, &v)) {
  90. xs *ss = NULL;
  91. if (strcmp(v, "```") == 0) {
  92. if (!in_pre)
  93. s = xs_str_cat(s, "<pre>");
  94. else
  95. s = xs_str_cat(s, "</pre>");
  96. in_pre = !in_pre;
  97. continue;
  98. }
  99. if (in_pre)
  100. ss = xs_dup(v);
  101. else
  102. ss = xs_strip(format_line(v));
  103. if (xs_startswith(ss, ">")) {
  104. /* delete the > and subsequent spaces */
  105. ss = xs_strip(xs_crop(ss, 1, 0));
  106. if (!in_blq) {
  107. s = xs_str_cat(s, "<blockquote>");
  108. in_blq = 1;
  109. }
  110. s = xs_str_cat(s, ss);
  111. s = xs_str_cat(s, "<br>");
  112. continue;
  113. }
  114. if (in_blq) {
  115. s = xs_str_cat(s, "</blockquote>");
  116. in_blq = 0;
  117. }
  118. s = xs_str_cat(s, ss);
  119. s = xs_str_cat(s, "<br>");
  120. }
  121. if (in_blq)
  122. s = xs_str_cat(s, "</blockquote>");
  123. if (in_pre)
  124. s = xs_str_cat(s, "</pre>");
  125. /* some beauty fixes */
  126. s = xs_replace_i(s, "<br><br><blockquote>", "<br><blockquote>");
  127. s = xs_replace_i(s, "</blockquote><br>", "</blockquote>");
  128. s = xs_replace_i(s, "</pre><br>", "</pre>");
  129. {
  130. /* traditional emoticons */
  131. int n;
  132. for (n = 0; smileys[n].key; n++)
  133. s = xs_replace_i(s, smileys[n].key, smileys[n].value);
  134. }
  135. return s;
  136. }
  137. const char *valid_tags[] = {
  138. "a", "p", "br", "br/", "blockquote", "ul", "li",
  139. "span", "i", "b", "pre", "code", "em", "strong", NULL
  140. };
  141. d_char *sanitize(const char *content)
  142. /* cleans dangerous HTML output */
  143. {
  144. d_char *s = xs_str_new(NULL);
  145. xs *sl;
  146. int n = 0;
  147. char *p, *v;
  148. sl = xs_regex_split(content, "</?[^>]+>");
  149. p = sl;
  150. while (xs_list_iter(&p, &v)) {
  151. if (n & 0x1) {
  152. xs *s1 = xs_strip(xs_crop(xs_dup(v), v[1] == '/' ? 2 : 1, -1));
  153. xs *l1 = xs_split_n(s1, " ", 1);
  154. xs *tag = xs_tolower(xs_dup(xs_list_get(l1, 0)));
  155. xs *s2 = NULL;
  156. int i;
  157. /* check if it's one of the valid tags */
  158. for (i = 0; valid_tags[i]; i++) {
  159. if (strcmp(tag, valid_tags[i]) == 0)
  160. break;
  161. }
  162. if (valid_tags[i]) {
  163. /* accepted tag: rebuild it with only the accepted elements */
  164. xs *el = xs_regex_match(v, "(href|rel|class|target)=\"[^\"]*\"");
  165. xs *s3 = xs_join(el, " ");
  166. s2 = xs_fmt("<%s%s%s%s>",
  167. v[1] == '/' ? "/" : "", tag, xs_list_len(el) ? " " : "", s3);
  168. }
  169. else {
  170. /* bad tag: escape it */
  171. s2 = xs_replace(v, "<", "&lt;");
  172. }
  173. s = xs_str_cat(s, s2);
  174. }
  175. else {
  176. /* non-tag */
  177. s = xs_str_cat(s, v);
  178. }
  179. n++;
  180. }
  181. return s;
  182. }