Browse Source

Backport from xs.

default 1 year ago
parent
commit
bf435af788
4 changed files with 247 additions and 128 deletions
  1. 103 35
      xs_hex.h
  2. 30 13
      xs_socket.h
  3. 113 79
      xs_unicode.h
  4. 1 1
      xs_version.h

+ 103 - 35
xs_hex.h

@@ -4,65 +4,129 @@
 
 #define _XS_HEX_H
 
-xs_str *xs_hex_enc(const xs_val *data, int size);
-xs_val *xs_hex_dec(const xs_str *hex, int *size);
-int xs_is_hex(const char *str);
+ int xs_is_hex_digit(char str);
+ void xs_hex_enc_1(char **dst, const char **src);
+ int xs_hex_dec_1(char **dst, const char **src);
+ char *_xs_hex_enc(char *dst, const char *src, int src_size);
+ char *_xs_hex_dec(char *dst, const char *src, int src_size);
+
+#ifdef _XS_H
+ xs_str *xs_hex_enc(const xs_val *data, int size);
+ xs_val *xs_hex_dec(const xs_str *hex, int *size);
+ int xs_is_hex(const char *str);
+#endif /* _XS_H */
+
 
 #ifdef XS_IMPLEMENTATION
 
+#include <string.h>
+
 /** hex **/
 
 static char rev_hex_digits[] = "fedcba9876543210FEDCBA";
 
-xs_str *xs_hex_enc(const xs_val *data, int size)
-/* returns an hexdump of data */
+int xs_is_hex_digit(char str)
+/* checks if the char is an hex digit */
 {
-    xs_str *s;
-    char *p;
-    int n;
+    return strchr(rev_hex_digits, str) != NULL;
+}
 
-    p = s = xs_realloc(NULL, _xs_blk_size(size * 2 + 1));
 
-    for (n = 0; n < size; n++) {
-        *p++ = rev_hex_digits[0xf - (*data >> 4 & 0xf)];
-        *p++ = rev_hex_digits[0xf - (*data      & 0xf)];
-        data++;
+void xs_hex_enc_1(char **dst, const char **src)
+/* decodes one character into two hex digits */
+{
+    const char *i = *src;
+    char *o = *dst;
+
+    *o++ = rev_hex_digits[0xf - (*i >> 4 & 0xf)];
+    *o++ = rev_hex_digits[0xf - (*i      & 0xf)];
+
+    *src = i + 1;
+    *dst = o;
+}
+
+
+int xs_hex_dec_1(char **dst, const char **src)
+/* decodes two hex digits (returns 0 on error) */
+{
+    const char *i = *src;
+    char *o = *dst;
+
+    char *d1 = strchr(rev_hex_digits, *i++);
+    char *d2 = strchr(rev_hex_digits, *i++);
+
+    if (!d1 || !d2) {
+        /* decoding error */
+        return 0;
     }
 
-    *p = '\0';
+    *o++ = (0xf - ((d1 - rev_hex_digits) & 0xf)) << 4 |
+           (0xf - ((d2 - rev_hex_digits) & 0xf));
 
-    return s;
+    *src = i;
+    *dst = o;
+    return 1;
 }
 
 
-xs_val *xs_hex_dec(const xs_str *hex, int *size)
-/* decodes an hexdump into data */
+char *_xs_hex_enc(char *dst, const char *src, int src_size)
+/* hex-encodes the src buffer into dst, which has enough size */
 {
-    int sz = strlen(hex);
-    xs_val *s = NULL;
-    char *p;
-    int n;
+    const char *e = src + src_size;
 
-    if (sz % 2)
-        return NULL;
+    while (src < e)
+        xs_hex_enc_1(&dst, &src);
 
-    p = s = xs_realloc(NULL, _xs_blk_size(sz / 2 + 1));
+    return dst;
+}
 
-    for (n = 0; n < sz; n += 2) {
-        char *d1 = strchr(rev_hex_digits, *hex++);
-        char *d2 = strchr(rev_hex_digits, *hex++);
 
-        if (!d1 || !d2) {
-            /* decoding error */
-            return xs_free(s);
-        }
+char *_xs_hex_dec(char *dst, const char *src, int src_size)
+/* hex-decodes the src string int dst, which has enough size.
+   return NULL on decoding errors or the final position of dst */
+{
+    if (src_size % 2)
+        return NULL;
 
-        *p++ = (0xf - ((d1 - rev_hex_digits) & 0xf)) << 4 |
-               (0xf - ((d2 - rev_hex_digits) & 0xf));
+    const char *e = src + src_size;
+
+    while (src < e) {
+        if (!xs_hex_dec_1(&dst, &src))
+            return NULL;
     }
 
-    *p = '\0';
+    return dst;
+}
+
+
+#ifdef _XS_H
+
+xs_str *xs_hex_enc(const xs_val *data, int size)
+/* returns an hexdump of data */
+{
+    xs_str *s = xs_realloc(NULL, _xs_blk_size(size * 2 + 1));
+
+    char *q = _xs_hex_enc(s, data, size);
+
+    *q = '\0';
+
+    return s;
+}
+
+
+xs_val *xs_hex_dec(const xs_str *hex, int *size)
+/* decodes an hexdump into data */
+{
+    int sz = strlen(hex);
+    xs_val *s = NULL;
+
     *size = sz / 2;
+    s = xs_realloc(NULL, _xs_blk_size(*size + 1));
+
+    if (!_xs_hex_dec(s, hex, sz))
+        return xs_free(s);
+
+    s[*size] = '\0';
 
     return s;
 }
@@ -71,14 +135,18 @@ xs_val *xs_hex_dec(const xs_str *hex, int *size)
 int xs_is_hex(const char *str)
 /* returns 1 if str is an hex string */
 {
+    if (strlen(str) % 2)
+        return 0;
+
     while (*str) {
-        if (strchr(rev_hex_digits, *str++) == NULL)
+        if (!xs_is_hex_digit(*str++))
             return 0;
     }
 
     return 1;
 }
 
+#endif /* _XS_H */
 
 #endif /* XS_IMPLEMENTATION */
 

+ 30 - 13
xs_socket.h

@@ -7,9 +7,13 @@
 int xs_socket_timeout(int s, double rto, double sto);
 int xs_socket_server(const char *addr, const char *serv);
 FILE *xs_socket_accept(int rs);
-xs_str *xs_socket_peername(int s);
+int _xs_socket_peername(int s, char *buf, int buf_size);
 int xs_socket_connect(const char *addr, const char *serv);
 
+#ifdef _XS_H
+xs_str *xs_socket_peername(int s);
+#endif
+
 
 #ifdef XS_IMPLEMENTATION
 
@@ -17,6 +21,9 @@ int xs_socket_connect(const char *addr, const char *serv);
 #include <netdb.h>
 #include <netinet/in.h>
 #include <arpa/inet.h>
+#include <string.h>
+#include <stdlib.h>
+#include <unistd.h>
 
 
 int xs_socket_timeout(int s, double rto, double sto)
@@ -100,34 +107,28 @@ FILE *xs_socket_accept(int rs)
 }
 
 
-xs_str *xs_socket_peername(int s)
-/* returns the remote address as a string */
+int _xs_socket_peername(int s, char *buf, int buf_size)
+/* fill the buffer with the socket peername */
 {
-    xs_str *ip = NULL;
     struct sockaddr_storage addr;
     socklen_t slen = sizeof(addr);
+    const char *p = NULL;
 
     if (getpeername(s, (struct sockaddr *)&addr, &slen) != -1) {
-        char buf[1024];
-        const char *p = NULL;
-
         if (addr.ss_family == AF_INET) {
             struct sockaddr_in *sa = (struct sockaddr_in *)&addr;
 
-            p = inet_ntop(AF_INET, &sa->sin_addr, buf, sizeof(buf));
+            p = inet_ntop(AF_INET, &sa->sin_addr, buf, buf_size);
         }
         else
         if (addr.ss_family == AF_INET6) {
             struct sockaddr_in6 *sa = (struct sockaddr_in6 *)&addr;
 
-            p = inet_ntop(AF_INET6, &sa->sin6_addr, buf, sizeof(buf));
+            p = inet_ntop(AF_INET6, &sa->sin6_addr, buf, buf_size);
         }
-
-        if (p != NULL)
-            ip = xs_str_new(p);
     }
 
-    return ip;
+    return p != NULL;
 }
 
 
@@ -195,6 +196,22 @@ int xs_socket_connect(const char *addr, const char *serv)
 }
 
 
+#ifdef _XS_H
+
+xs_str *xs_socket_peername(int s)
+/* returns the remote address as a string */
+{
+    char buf[2028];
+    xs_str *p = NULL;
+
+    if (_xs_socket_peername(s, buf, sizeof(buf)))
+        p = xs_str_new(buf);
+
+    return p;
+}
+
+#endif /* _XS_H */
+
 #endif /* XS_IMPLEMENTATION */
 
 #endif /* _XS_SOCKET_H */

+ 113 - 79
xs_unicode.h

@@ -5,7 +5,6 @@
 #define _XS_UNICODE_H
 
  int _xs_utf8_enc(char buf[4], unsigned int cpoint);
- xs_str *xs_utf8_enc(xs_str *str, unsigned int cpoint);
  unsigned int xs_utf8_dec(char **str);
  int xs_unicode_width(unsigned int cpoint);
  int xs_is_surrogate(unsigned int cpoint);
@@ -21,13 +20,20 @@
  int xs_unicode_nfc(unsigned int base, unsigned int diac, unsigned int *cpoint);
  int xs_unicode_is_alpha(unsigned int cpoint);
 
+#ifdef _XS_H
+ xs_str *xs_utf8_enc(xs_str *str, unsigned int cpoint);
+#endif
+
 #ifdef XS_IMPLEMENTATION
 
+#ifndef countof
+#define countof(a) (sizeof((a)) / sizeof((*a)))
+#endif
 
 int _xs_utf8_enc(char buf[4], unsigned int cpoint)
 /* encodes an Unicode codepoint to utf-8 into buf and returns the size in bytes */
 {
-    unsigned char *p = (unsigned char *)buf;
+    char *p = buf;
 
     if (cpoint < 0x80) /* 1 byte char */
         *p++ = cpoint & 0xff;
@@ -48,27 +54,16 @@ int _xs_utf8_enc(char buf[4], unsigned int cpoint)
         *p++ = 0x80 | (cpoint & 0x3f);
     }
 
-    return p - (unsigned char *)buf;
-}
-
-
-xs_str *xs_utf8_enc(xs_str *str, unsigned int cpoint)
-/* encodes an Unicode codepoint to utf-8 into str */
-{
-    char tmp[4];
-
-    int c = _xs_utf8_enc(tmp, cpoint);
-
-    return xs_append_m(str, tmp, c);
+    return p - buf;
 }
 
 
 unsigned int xs_utf8_dec(char **str)
 /* decodes an utf-8 char inside str and updates the pointer */
 {
-    unsigned char *p = (unsigned char *)*str;
+    char *p = *str;
     unsigned int cpoint = 0;
-    int c = *p++;
+    unsigned char c = *p++;
     int cb = 0;
 
     if ((c & 0x80) == 0) { /* 1 byte char */
@@ -91,30 +86,19 @@ unsigned int xs_utf8_dec(char **str)
     }
 
     /* process the continuation bytes */
-    while (cb--) {
-        if ((*p & 0xc0) == 0x80)
-            cpoint |= (*p++ & 0x3f) << (cb * 6);
-        else {
-            cpoint = 0xfffd;
-            break;
-        }
-    }
+    while (cb > 0 && *p && (*p & 0xc0) == 0x80)
+        cpoint |= (*p++ & 0x3f) << (--cb * 6);
 
-    *str = (char *)p;
-    return cpoint;
-}
-
-
-static int int_range_cmp(const void *p1, const void *p2)
-{
-    const unsigned int *a = p1;
-    const unsigned int *b = p2;
+    /* incomplete or broken? */
+    if (cb)
+        cpoint = 0xfffd;
 
-    return *a < b[0] ? -1 : *a > b[1] ? 1 : 0;
+    *str = p;
+    return cpoint;
 }
 
 
-/* intentionally dead simple */
+/** Unicode character width: intentionally dead simple **/
 
 static unsigned int xs_unicode_width_table[] = {
     0x300,      0x36f,      0,      /* diacritics */
@@ -132,12 +116,23 @@ static unsigned int xs_unicode_width_table[] = {
 int xs_unicode_width(unsigned int cpoint)
 /* returns the width in columns of a Unicode codepoint (somewhat simplified) */
 {
-    unsigned int *r = bsearch(&cpoint, xs_unicode_width_table,
-                        sizeof(xs_unicode_width_table) / (sizeof(unsigned int) * 3),
-                        sizeof(unsigned int) * 3,
-                        int_range_cmp);
+    int b = 0;
+    int t = countof(xs_unicode_width_table) / 3 - 1;
+
+    while (t >= b) {
+        int n = (b + t) / 2;
+        unsigned int *p = &xs_unicode_width_table[n * 3];
+
+        if (cpoint < p[0])
+            t = n - 1;
+        else
+        if (cpoint > p[1])
+            b = n + 1;
+        else
+            return p[2];
+    }
 
-    return r ? r[2] : 1;
+    return 1;
 }
 
 
@@ -167,38 +162,56 @@ unsigned int xs_surrogate_enc(unsigned int cpoint)
 }
 
 
-#ifdef _XS_UNICODE_TBL_H
-
-/* include xs_unicode_tbl.h before this one to use these functions */
+#ifdef _XS_H
 
-static int int_cmp(const void *p1, const void *p2)
+xs_str *xs_utf8_enc(xs_str *str, unsigned int cpoint)
+/* encodes an Unicode codepoint to utf-8 into str */
 {
-    const unsigned int *a = p1;
-    const unsigned int *b = p2;
+    char tmp[4];
+
+    int c = _xs_utf8_enc(tmp, cpoint);
 
-    return *a < *b ? -1 : *a > *b ? 1 : 0;
+    return xs_append_m(str, tmp, c);
 }
 
+#endif /* _XS_H */
+
+
+#ifdef _XS_UNICODE_TBL_H
+
+/* include xs_unicode_tbl.h before this one to use these functions */
 
 unsigned int *_xs_unicode_upper_search(unsigned int cpoint)
 /* searches for an uppercase codepoint in the case fold table */
 {
-    return bsearch(&cpoint, xs_unicode_case_fold_table,
-        sizeof(xs_unicode_case_fold_table) / (sizeof(unsigned int) * 2),
-        sizeof(unsigned int) * 2,
-        int_cmp);
+    int b = 0;
+    int t = countof(xs_unicode_case_fold_table) / 2 + 1;
+
+    while (t >= b) {
+        int n = (b + t) / 2;
+        unsigned int *p = &xs_unicode_case_fold_table[n * 2];
+
+        if (cpoint < p[0])
+            t = n - 1;
+        else
+        if (cpoint > p[0])
+            b = n + 1;
+        else
+            return p;
+    }
+
+    return NULL;
 }
 
 
 unsigned int *_xs_unicode_lower_search(unsigned int cpoint)
 /* searches for a lowercase codepoint in the case fold table */
 {
-    unsigned int *p = xs_unicode_case_fold_table + 1;
-    unsigned int *e = xs_unicode_case_fold_table +
-            sizeof(xs_unicode_case_fold_table) / sizeof(unsigned int);
+    unsigned int *p = xs_unicode_case_fold_table;
+    unsigned int *e = p + countof(xs_unicode_case_fold_table);
 
     while (p < e) {
-        if (cpoint == *p)
+        if (cpoint == p[1])
             return p;
 
         p += 2;
@@ -208,38 +221,49 @@ unsigned int *_xs_unicode_lower_search(unsigned int cpoint)
 }
 
 
-unsigned int xs_unicode_to_upper(unsigned int cpoint)
-/* returns the cpoint to uppercase */
+unsigned int xs_unicode_to_lower(unsigned int cpoint)
+/* returns the cpoint to lowercase */
 {
-    unsigned int *p = _xs_unicode_lower_search(cpoint);
+    unsigned int *p = _xs_unicode_upper_search(cpoint);
 
-    return p == NULL ? cpoint : p[-1];
+    return p == NULL ? cpoint : p[1];
 }
 
 
-unsigned int xs_unicode_to_lower(unsigned int cpoint)
-/* returns the cpoint to lowercase */
+unsigned int xs_unicode_to_upper(unsigned int cpoint)
+/* returns the cpoint to uppercase */
 {
-    unsigned int *p = _xs_unicode_upper_search(cpoint);
+    unsigned int *p = _xs_unicode_lower_search(cpoint);
 
-    return p == NULL ? cpoint : p[1];
+    return p == NULL ? cpoint : p[0];
 }
 
 
 int xs_unicode_nfd(unsigned int cpoint, unsigned int *base, unsigned int *diac)
 /* applies unicode Normalization Form D */
 {
-    unsigned int *r = bsearch(&cpoint, xs_unicode_nfd_table,
-                        sizeof(xs_unicode_nfd_table) / (sizeof(unsigned int) * 3),
-                        sizeof(unsigned int) * 3,
-                        int_cmp);
-
-    if (r != NULL) {
-        *base = r[1];
-        *diac = r[2];
+    int b = 0;
+    int t = countof(xs_unicode_nfd_table) / 3 - 1;
+
+    while (t >= b) {
+        int n = (b + t) / 2;
+        unsigned int *p = &xs_unicode_nfd_table[n * 3];
+
+        int c = cpoint - p[0];
+
+        if (c < 0)
+            t = n - 1;
+        else
+        if (c > 0)
+            b = n + 1;
+        else {
+            *base = p[1];
+            *diac = p[2];
+            return 1;
+        }
     }
 
-    return !!r;
+    return 0;
 }
 
 
@@ -247,8 +271,7 @@ int xs_unicode_nfc(unsigned int base, unsigned int diac, unsigned int *cpoint)
 /* applies unicode Normalization Form C */
 {
     unsigned int *p = xs_unicode_nfd_table;
-    unsigned int *e = xs_unicode_nfd_table +
-        sizeof(xs_unicode_nfd_table) / sizeof(unsigned int);
+    unsigned int *e = p + countof(xs_unicode_nfd_table);
 
     while (p < e) {
         if (p[1] == base && p[2] == diac) {
@@ -266,12 +289,23 @@ int xs_unicode_nfc(unsigned int base, unsigned int diac, unsigned int *cpoint)
 int xs_unicode_is_alpha(unsigned int cpoint)
 /* checks if a codepoint is an alpha (i.e. a letter) */
 {
-    unsigned int *r = bsearch(&cpoint, xs_unicode_alpha_table,
-                        sizeof(xs_unicode_alpha_table) / (sizeof(unsigned int) * 2),
-                        sizeof(unsigned int) * 2,
-                        int_range_cmp);
+    int b = 0;
+    int t = countof(xs_unicode_alpha_table) / 2 - 1;
+
+    while (t >= b) {
+        int n = (b + t) / 2;
+        unsigned int *p = &xs_unicode_alpha_table[n * 2];
+
+        if (cpoint < p[0])
+            t = n - 1;
+        else
+        if (cpoint > p[1])
+            b = n + 1;
+        else
+            return 1;
+    }
 
-    return !!r;
+    return 0;
 }
 
 

+ 1 - 1
xs_version.h

@@ -1 +1 @@
-/* 3582ff265e19407df1d532eb1d90c372fe22ca62 2023-12-08T06:10:40+01:00 */
+/* fd50c72456b717bb235eec8fe5f712da5f695f2b 2023-12-27T12:51:14+01:00 */