Browse Source

Backport from xs (faster dicts).

default 9 months ago
parent
commit
aca6b2cff7
2 changed files with 168 additions and 84 deletions
  1. 167 83
      xs.h
  2. 1 1
      xs_version.h

+ 167 - 83
xs.h

@@ -1040,12 +1040,29 @@ xs_keyval *xs_keyval_make(xs_keyval *keyval, const xs_str *key, const xs_val *va
 
 /** dicts **/
 
+typedef struct {
+    int value_offset;   /* offset to value (from dict start) */
+    int next;           /* next node in sequential search */
+    int child[4];       /* child nodes in hashed search */
+    char key[];         /* C string key */
+} ditem_hdr;
+
+typedef struct {
+    int size;           /* size of full dict (_XS_TYPE_SIZE) */
+    int first;          /* first node for sequential search */
+    int root;           /* root node for hashed search */
+    ditem_hdr ditems[]; /* the ditems */
+} dict_hdr;
+
+
 xs_dict *xs_dict_new(void)
 /* creates a new dict */
 {
-    int sz = 1 + _XS_TYPE_SIZE + 1;
+    /* size of dict */
+    int sz = 1 + sizeof(dict_hdr);
+
     xs_dict *d = xs_realloc(NULL, sz);
-    memset(d, XSTYPE_EOM, sz);
+    memset(d, '\0', sz);
 
     d[0] = XSTYPE_DICT;
     _xs_put_size(d, sz);
@@ -1053,140 +1070,207 @@ xs_dict *xs_dict_new(void)
     return d;
 }
 
-
-xs_dict *_xs_dict_write_keyval(xs_dict *dict, int offset, const xs_str *key, const xs_val *value)
-/* adds a new keyval to the dict */
+static int *_xs_dict_locate(const xs_dict *dict, const char *key)
+/* locates a ditem */
 {
-    XS_ASSERT_TYPE(dict, XSTYPE_DICT);
-    XS_ASSERT_TYPE(key, XSTYPE_STRING);
+    unsigned int h = xs_hash_func(key, strlen(key));
 
-    if (value == NULL)
-        value = xs_stock(XSTYPE_NULL);
+    /* start from the root */
+    dict_hdr *dh = (dict_hdr *)(dict + 1);
+    int *off = &dh->root;
 
-    dict = xs_expand(dict, offset, xs_keyval_size(key, value));
+    while (*off) {
+        /* pointer to ditem */
+        ditem_hdr *di = (ditem_hdr *)(dict + *off);
 
-    xs_keyval_make(&dict[offset], key, value);
+        /* pointer to the key */
+        const char *d_key = di->key;
 
-    return dict;
-}
+        if (strcmp(key, d_key) == 0)
+            break;
 
+        off = &di->child[h >> 30];
+        h <<= 2;
+    }
 
-xs_dict *xs_dict_append(xs_dict *dict, const xs_str *key, const xs_val *value)
-/* appends a memory block to the dict */
-{
-    return _xs_dict_write_keyval(dict, xs_size(dict) - 1, key, value);
+    return off;
 }
 
 
-xs_dict *xs_dict_prepend(xs_dict *dict, const xs_str *key, const xs_val *value)
-/* prepends a memory block to the dict */
+xs_dict *xs_dict_set(xs_dict *dict, const xs_str *key, const xs_val *value)
+/* sets a key/value pair */
 {
-    return _xs_dict_write_keyval(dict, 1 + _XS_TYPE_SIZE, key, value);
-}
+    if (value == NULL)
+        value = xs_stock(XSTYPE_NULL);
 
+    if (xs_type(dict) == XSTYPE_DICT) {
+        int *o = _xs_dict_locate(dict, key);
+        int end = xs_size(dict);
 
-int xs_dict_next(const xs_dict *dict, const xs_str **key, const xs_val **value, int *ctxt)
-/* iterates a dict, with context */
-{
-    if (xs_type(dict) != XSTYPE_DICT)
-        return 0;
+        if (!*o) {
+            /* ditem does not exist yet: append to the end */
+            *o = end;
 
-    int goon = 1;
+            int ksz = xs_size(key);
+            int vsz = xs_size(value);
+            int dsz = sizeof(ditem_hdr) + ksz + vsz;
 
-    char *p = (char *)dict;
+            /* open room in the dict for the full ditem */
+            dict = xs_expand(dict, end, dsz);
 
-    /* skip the start of the list */
-    if (*ctxt == 0)
-        *ctxt = 1 + _XS_TYPE_SIZE;
+            dict_hdr *dh = (dict_hdr *)(dict + 1);
 
-    p += *ctxt;
+            /* build the ditem */
+            ditem_hdr *di = (ditem_hdr *)(dict + end);
+            memset(di, '\0', dsz);
 
-    /* an element? */
-    if (xs_type(p) == XSTYPE_KEYVAL) {
-        p++;
+            /* set the offset to the value */
+            di->value_offset = end + sizeof(ditem_hdr) + ksz;
 
-        *key = p;
-        p += xs_size(*key);
+            /* copy the key */
+            memcpy(di->key, key, ksz);
 
-        *value = p;
-        p += xs_size(*value);
-    }
-    else {
-        /* end of list */
-        goon = 0;
+            /* copy the value */
+            memcpy(dict + di->value_offset, value, vsz);
+
+            /* chain to the sequential list */
+            di->next = dh->first;
+            dh->first = end;
+        }
+        else {
+            /* ditem already exists */
+            ditem_hdr *di = (ditem_hdr *)(dict + *o);
+
+            /* get pointer to the value offset */
+            int *i = &di->value_offset;
+
+            /* deleted? recover offset */
+            if (*i < 0)
+                *i *= -1;
+
+            /* get old value */
+            xs_val *o_value = dict + *i;
+
+            /* will new value fit over the old one? */
+            if (xs_size(value) <= xs_size(o_value)) {
+                /* just overwrite */
+                /* (difference is leaked inside the dict) */
+                memcpy(o_value, value, xs_size(value));
+            }
+            else {
+                /* not enough room: new value will live at the end of the dict */
+                /* (old value is leaked inside the dict) */
+                *i = end;
+
+                dict = xs_insert(dict, end, value);
+            }
+        }
     }
 
-    /* store back the pointer */
-    *ctxt = p - dict;
+    return dict;
+}
 
-    return goon;
+
+xs_dict *xs_dict_append(xs_dict *dict, const xs_str *key, const xs_val *value)
+/* just an alias (for this implementation it's the same) */
+{
+    return xs_dict_set(dict, key, value);
 }
 
 
-const xs_val *xs_dict_get_def(const xs_dict *dict, const xs_str *key, const xs_val *def)
-/* returns the value directed by key, or the default value */
+xs_dict *xs_dict_prepend(xs_dict *dict, const xs_str *key, const xs_val *value)
+/* just an alias (for this implementation it's the same) */
 {
-    XS_ASSERT_TYPE(dict, XSTYPE_DICT);
-    XS_ASSERT_TYPE(key, XSTYPE_STRING);
+    return xs_dict_set(dict, key, value);
+}
 
-    const xs_str *k;
-    const xs_val *v;
-    int c = 0;
 
-    while (xs_dict_next(dict, &k, &v, &c)) {
-        if (strcmp(k, key) == 0)
-            return v;
+xs_dict *xs_dict_del(xs_dict *dict, const xs_str *key)
+/* deletes a key/value pair */
+{
+    if (xs_type(dict) == XSTYPE_DICT) {
+        int *o = _xs_dict_locate(dict, key);
+
+        if (*o) {
+            /* found ditem */
+            ditem_hdr *di = (ditem_hdr *)(dict + *o);
+
+            /* deleted ditems have a negative value offset */
+            di->value_offset *= -1;
+        }
     }
 
-    return (xs_val *)def;
+    return dict;
 }
 
 
-xs_dict *xs_dict_del(xs_dict *dict, const xs_str *key)
-/* deletes a key */
+const xs_val *xs_dict_get_def(const xs_dict *dict, const xs_str *key, const xs_str *def)
+/* gets a value by key, or returns def */
 {
-    XS_ASSERT_TYPE(dict, XSTYPE_DICT);
-    XS_ASSERT_TYPE(key, XSTYPE_STRING);
-
-    const xs_str *k;
-    const xs_val *v;
-    int c = 0;
+    if (xs_type(dict) == XSTYPE_DICT) {
+        int *o = _xs_dict_locate(dict, key);
 
-    while (xs_dict_next(dict, &k, &v, &c)) {
-        if (strcmp(k, key) == 0) {
-            /* the address of the item is just behind the key */
-            char *i = (char *)k - 1;
+        if (*o) {
+            /* found ditem */
+            ditem_hdr *di = (ditem_hdr *)(dict + *o);
 
-            dict = xs_collapse(dict, i - dict, xs_size(i));
-            break;
+            if (di->value_offset > 0)
+                return dict + di->value_offset;
         }
     }
 
-    return dict;
+    return def;
 }
 
 
-xs_dict *xs_dict_set(xs_dict *dict, const xs_str *key, const xs_val *data)
-/* sets (replaces) a key */
+int xs_dict_next(const xs_dict *dict, const xs_str **key, const xs_val **value, int *ctxt)
+/* dict iterator, with context */
 {
-    XS_ASSERT_TYPE(dict, XSTYPE_DICT);
-    XS_ASSERT_TYPE(key, XSTYPE_STRING);
+    if (xs_type(dict) != XSTYPE_DICT)
+        return 0;
 
-    /* delete the possibly existing key */
-    dict = xs_dict_del(dict, key);
+    if (*ctxt == 0) {
+        /* at the beginning: get the first sequential item */
+        const dict_hdr *dh = (dict_hdr *)(dict + 1);
+        *ctxt = dh->first;
+    }
 
-    /* add the data */
-    dict = xs_dict_append(dict, key, data);
+    *value = NULL;
 
-    return dict;
+    while (*value == NULL && *ctxt > 0) {
+        const ditem_hdr *di = (ditem_hdr *)(dict + *ctxt);
+
+        /* get value */
+        if (di->value_offset > 0) {
+            *value = (xs_val *)(dict + di->value_offset);
+
+            /* get key */
+            *key = (xs_str *)&di->key;
+        }
+
+        /* get offset to next ditem */
+        *ctxt = di->next ? di->next : -1;
+    }
+
+    return *value != NULL;
 }
 
 
 xs_dict *xs_dict_gc(xs_dict *dict)
 /* collects garbage (leaked values) inside a dict */
 {
-    /* this kind of dicts does not get garbage */
-    return dict;
+    xs_dict *nd = xs_dict_new();
+    const xs_str *k;
+    const xs_val *v;
+    int c = 0;
+
+    /* shamelessly create a new dict with the same content */
+    while (xs_dict_next(dict, &k, &v, &c))
+        nd = xs_dict_set(nd, k, v);
+
+    xs_free(dict);
+
+    return nd;
 }
 
 

+ 1 - 1
xs_version.h

@@ -1 +1 @@
-/* efb85fa3768a86f1609c520f0c86e8f87239b695 2024-05-27T08:24:40+02:00 */
+/* 4595e864ae31bc59cca0fff38bd2bac798c2b038 2024-06-08T19:50:32+02:00 */