Browse Source

Replaced encode_html_strict() with xs_html_encode().

default 1 year ago
parent
commit
bc5d0d4ed0
8 changed files with 273 additions and 26 deletions
  1. 4 3
      Makefile
  2. 2 14
      format.c
  3. 6 5
      html.c
  4. 1 0
      snac.c
  5. 1 2
      snac.h
  6. 18 1
      xs.h
  7. 240 0
      xs_html.h
  8. 1 1
      xs_version.h

+ 4 - 3
Makefile

@@ -37,9 +37,9 @@ activitypub.o: activitypub.c xs.h xs_json.h xs_curl.h xs_mime.h \
  xs_openssl.h xs_regex.h xs_time.h xs_set.h xs_match.h snac.h
 data.o: data.c xs.h xs_hex.h xs_io.h xs_json.h xs_openssl.h xs_glob.h \
  xs_set.h xs_time.h snac.h
-format.o: format.c xs.h xs_regex.h xs_mime.h snac.h
+format.o: format.c xs.h xs_regex.h xs_mime.h xs_html.h snac.h
 html.o: html.c xs.h xs_io.h xs_json.h xs_regex.h xs_set.h xs_openssl.h \
- xs_time.h xs_mime.h xs_match.h snac.h
+ xs_time.h xs_mime.h xs_match.h xs_html.h snac.h
 http.o: http.c xs.h xs_io.h xs_openssl.h xs_curl.h xs_time.h xs_json.h \
  snac.h
 httpd.o: httpd.c xs.h xs_io.h xs_json.h xs_socket.h xs_httpd.h xs_mime.h \
@@ -50,7 +50,8 @@ mastoapi.o: mastoapi.c xs.h xs_hex.h xs_openssl.h xs_json.h xs_io.h \
  snac.h
 snac.o: snac.c xs.h xs_hex.h xs_io.h xs_unicode.h xs_json.h xs_curl.h \
  xs_openssl.h xs_socket.h xs_url.h xs_httpd.h xs_mime.h xs_regex.h \
- xs_set.h xs_time.h xs_glob.h xs_random.h xs_match.h xs_fcgi.h snac.h
+ xs_set.h xs_time.h xs_glob.h xs_random.h xs_match.h xs_fcgi.h xs_html.h \
+ snac.h
 upgrade.o: upgrade.c xs.h xs_io.h xs_json.h xs_glob.h snac.h
 utils.o: utils.c xs.h xs_io.h xs_json.h xs_time.h xs_openssl.h \
  xs_random.h xs_glob.h snac.h

+ 2 - 14
format.c

@@ -4,6 +4,7 @@
 #include "xs.h"
 #include "xs_regex.h"
 #include "xs_mime.h"
+#include "xs_html.h"
 
 #include "snac.h"
 
@@ -260,23 +261,10 @@ xs_str *sanitize(const char *content)
 }
 
 
-xs_str *encode_html_strict(const char *str)
-/* escapes html characters */
-{
-    xs_str *encoded = xs_replace(str, "&", "&");
-    encoded = xs_replace_i(encoded, "<", "&lt;");
-    encoded = xs_replace_i(encoded, ">", "&gt;");
-    encoded = xs_replace_i(encoded, "\"", "&#34;");
-    encoded = xs_replace_i(encoded, "'", "&#39;");
-
-    return encoded;
-}
-
-
 xs_str *encode_html(const char *str)
 /* escapes html characters */
 {
-    xs_str *encoded = encode_html_strict(str);
+    xs_str *encoded = xs_html_encode((char *)str);
 
     /* Restore only <br>. Probably safe. Let's hope nothing goes wrong with this. */
     encoded = xs_replace_i(encoded, "&lt;br&gt;", "<br>");

+ 6 - 5
html.c

@@ -10,6 +10,7 @@
 #include "xs_time.h"
 #include "xs_mime.h"
 #include "xs_match.h"
+#include "xs_html.h"
 
 #include "snac.h"
 
@@ -2137,10 +2138,10 @@ int html_get_handler(const xs_dict *req, const char *q_path,
         xs *bio   = not_really_markdown(xs_dict_get(snac.config, "bio"), NULL);
         char *p, *v;
 
-        xs *es1 = encode_html_strict(xs_dict_get(snac.config, "name"));
-        xs *es2 = encode_html_strict(snac.uid);
-        xs *es3 = encode_html_strict(xs_dict_get(srv_config, "host"));
-        xs *es4 = encode_html_strict(bio);
+        xs *es1 = xs_html_encode(xs_dict_get(snac.config, "name"));
+        xs *es2 = xs_html_encode(snac.uid);
+        xs *es3 = xs_html_encode(xs_dict_get(srv_config, "host"));
+        xs *es4 = xs_html_encode(bio);
         rss = xs_fmt(
             "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
             "<rss version=\"0.91\">\n"
@@ -2168,7 +2169,7 @@ int html_get_handler(const xs_dict *req, const char *q_path,
             if (!xs_startswith(id, snac.actor))
                 continue;
 
-            xs *content = encode_html_strict(xs_dict_get(msg, "content"));
+            xs *content = xs_html_encode(xs_dict_get(msg, "content"));
 
             // We SHOULD only use sanitized one for description.
             // So, only encode for feed title, while the description just keep it sanitized as is.

+ 1 - 0
snac.c

@@ -21,6 +21,7 @@
 #include "xs_random.h"
 #include "xs_match.h"
 #include "xs_fcgi.h"
+#include "xs_html.h"
 
 #include "snac.h"
 

+ 1 - 2
snac.h

@@ -1,7 +1,7 @@
 /* snac - A simple, minimalistic ActivityPub instance */
 /* copyright (c) 2022 - 2023 grunfink et al. / MIT license */
 
-#define VERSION "2.43"
+#define VERSION "2.44-dev"
 
 #define USER_AGENT "snac/" VERSION
 
@@ -266,7 +266,6 @@ int activitypub_post_handler(const xs_dict *req, const char *q_path,
 
 xs_str *not_really_markdown(const char *content, xs_list **attach);
 xs_str *sanitize(const char *content);
-xs_str *encode_html_strict(const char *str);
 xs_str *encode_html(const char *str);
 
 xs_str *html_timeline(snac *user, const xs_list *list, int local,

+ 18 - 1
xs.h

@@ -62,7 +62,8 @@ xs_str *xs_str_new(const char *str);
 xs_str *xs_str_new_sz(const char *mem, int sz);
 xs_str *xs_str_wrap_i(const char *prefix, xs_str *str, const char *suffix);
 #define xs_str_prepend_i(str, prefix) xs_str_wrap_i(prefix, str, NULL)
-#define xs_str_cat(str, suffix) xs_str_wrap_i(NULL, str, suffix)
+xs_str *_xs_str_cat(xs_str *str, const char *strs[]);
+#define xs_str_cat(str, ...) _xs_str_cat(str, (const char *[]){ __VA_ARGS__, NULL })
 xs_str *xs_replace_in(xs_str *str, const char *sfrom, const char *sto, int times);
 #define xs_replace_i(str, sfrom, sto) xs_replace_in(str, sfrom, sto, XS_ALL)
 #define xs_replace(str, sfrom, sto) xs_replace_in(xs_dup(str), sfrom, sto, XS_ALL)
@@ -451,6 +452,22 @@ xs_str *xs_str_wrap_i(const char *prefix, xs_str *str, const char *suffix)
 }
 
 
+xs_str *_xs_str_cat(xs_str *str, const char *strs[])
+/* concatenates all strings after str */
+{
+    int o = strlen(str);
+
+    while (*strs) {
+        int sz = strlen(*strs);
+        str = xs_insert_m(str, o, *strs, sz);
+        o += sz;
+        strs++;
+    }
+
+    return str;
+}
+
+
 xs_str *xs_replace_in(xs_str *str, const char *sfrom, const char *sto, int times)
 /* replaces inline all sfrom with sto */
 {

+ 240 - 0
xs_html.h

@@ -0,0 +1,240 @@
+/* copyright (c) 2022 - 2023 grunfink et al. / MIT license */
+
+#ifndef _XS_HTML_H
+
+#define _XS_HTML_H
+
+typedef struct xs_html xs_html;
+
+xs_str *xs_html_encode(char *str);
+
+xs_html *xs_html_attr(char *key, char *value);
+xs_html *xs_html_text(char *content);
+xs_html *xs_html_raw(char *content);
+
+xs_html *xs_html_add(xs_html *tag, xs_html *data);
+
+xs_html *_xs_html_tag(char *tag, xs_html *var[]);
+#define xs_html_tag(tag, ...) _xs_html_tag(tag, (xs_html *[]) { __VA_ARGS__, NULL })
+xs_html *_xs_html_sctag(char *tag, xs_html *var[]);
+#define xs_html_sctag(tag, ...) _xs_html_sctag(tag, (xs_html *[]) { __VA_ARGS__, NULL })
+xs_str *_xs_html_render(xs_html *h, xs_str *s);
+#define xs_html_render(h) _xs_html_render(h, xs_str_new(NULL))
+
+#ifdef XS_IMPLEMENTATION
+
+typedef enum {
+    XS_HTML_TAG,
+    XS_HTML_SCTAG,
+    XS_HTML_ATTR,
+    XS_HTML_TEXT
+} xs_html_type;
+
+struct xs_html {
+    xs_html_type type;
+    xs_str *content;
+    xs_html *f_attr;
+    xs_html *l_attr;
+    xs_html *f_tag;
+    xs_html *l_tag;
+    xs_html *next;
+};
+
+xs_str *xs_html_encode(char *str)
+/* encodes str using HTML entities */
+{
+    xs_str *s = xs_str_new(NULL);
+    int o = 0;
+    char *e = str + strlen(str);
+
+    for (;;) {
+        char *ec = "<>\"'&";   /* characters to escape */
+        char *q = e;
+        int z;
+
+        /* find the nearest happening of a char */
+        while (*ec) {
+            char *m = memchr(str, *ec++, q - str);
+            if (m)
+                q = m;
+        }
+
+        /* copy string to here */
+        z = q - str;
+        s = xs_insert_m(s, o, str, z);
+        o += z;
+
+        /* if q points to the end, nothing more to do */
+        if (q == e)
+            break;
+
+        /* insert the escaped char */
+        char tmp[8];
+        snprintf(tmp, sizeof(tmp), "&#%d;", *q);
+
+        z = strlen(tmp);
+        s = xs_insert_m(s, o, tmp, z);
+        o += z;
+
+        str = q + 1;
+    }
+
+    return s;
+}
+
+
+#define XS_HTML_NEW() memset(xs_realloc(NULL, sizeof(xs_html)), '\0', sizeof(xs_html))
+
+xs_html *xs_html_attr(char *key, char *value)
+/* creates an HTML block with an attribute */
+{
+    xs_html *a = XS_HTML_NEW();
+
+    a->type = XS_HTML_ATTR;
+
+    if (value) {
+        xs *ev = xs_html_encode(value);
+        a->content = xs_fmt("%s=\"%s\"", key, ev);
+    }
+    else
+        a->content = xs_dup(key);
+
+    return a;
+}
+
+
+xs_html *xs_html_text(char *content)
+/* creates an HTML block of text, escaping it previously */
+{
+    xs_html *a = XS_HTML_NEW();
+
+    a->type    = XS_HTML_TEXT;
+    a->content = xs_html_encode(content);
+
+    return a;
+}
+
+
+xs_html *xs_html_raw(char *content)
+/* creates an HTML block without escaping (for pre-formatted HTML, comments, etc) */
+{
+    xs_html *a = XS_HTML_NEW();
+
+    a->type    = XS_HTML_TEXT;
+    a->content = xs_dup(content);
+
+    return a;
+}
+
+
+xs_html *xs_html_add(xs_html *tag, xs_html *data)
+/* add data (attrs, tags or text) to a tag */
+{
+    xs_html **first;
+    xs_html **last;
+
+    if (data->type == XS_HTML_ATTR) {
+        first = &tag->f_attr;
+        last  = &tag->l_attr;
+    }
+    else {
+        first = &tag->f_tag;
+        last  = &tag->l_tag;
+    }
+
+    if (*first == NULL)
+        *first = data;
+
+    if (*last != NULL)
+        (*last)->next = data;
+
+    *last = data;
+
+    return tag;
+}
+
+
+static xs_html *_xs_html_tag_t(xs_html_type type, char *tag, xs_html *var[])
+/* creates a tag with a variable list of attributes and subtags */
+{
+    xs_html *a = XS_HTML_NEW();
+
+    a->type    = type;
+    a->content = xs_dup(tag);
+
+    while (*var)
+        xs_html_add(a, *var++);
+
+    return a;
+}
+
+
+xs_html *_xs_html_tag(char *tag, xs_html *var[])
+{
+    return _xs_html_tag_t(XS_HTML_TAG, tag, var);
+}
+
+
+xs_html *_xs_html_sctag(char *tag, xs_html *var[])
+{
+    return _xs_html_tag_t(XS_HTML_SCTAG, tag, var);
+}
+
+
+xs_str *_xs_html_render(xs_html *h, xs_str *s)
+/* renders the tag and its subtags */
+{
+    xs_html *st;
+
+    switch (h->type) {
+    case XS_HTML_TAG:
+    case XS_HTML_SCTAG:
+        s = xs_str_cat(s, "<", h->content);
+
+        /* render the attributes */
+        st = h->f_attr;
+        while (st) {
+            xs_html *nst = st->next;
+            s = _xs_html_render(st, s);
+            st = nst;
+        }
+
+        if (h->type == XS_HTML_SCTAG) {
+            /* self-closing tags should not have subtags */
+            s = xs_str_cat(s, "/>");
+        }
+        else {
+            s = xs_str_cat(s, ">");
+
+            /* render the subtags */
+            st = h->f_tag;
+            while (st) {
+                xs_html *nst = st->next;
+                s = _xs_html_render(st, s);
+                st = nst;
+            }
+
+            s = xs_str_cat(s, "</", h->content, ">");
+        }
+
+        break;
+
+    case XS_HTML_ATTR:
+        s = xs_str_cat(s, " ", h->content);
+        break;
+
+    case XS_HTML_TEXT:
+        s = xs_str_cat(s, h->content);
+        break;
+    }
+
+    xs_free(h->content);
+    xs_free(h);
+
+    return s;
+}
+
+
+#endif /* XS_IMPLEMENTATION */
+
+#endif /* _XS_HTML_H */

+ 1 - 1
xs_version.h

@@ -1 +1 @@
-/* 416f5ffa99ecd4a3ec25d273b986d3d99dc92d22 */
+/* 63beb583926bb5dfec89e1d694172cc887614460 2023-11-19T19:51:05+01:00 */