Browse Source

New function search_by_content().

default 11 months ago
parent
commit
3ab733cdf5
3 changed files with 79 additions and 0 deletions
  1. 58 0
      data.c
  2. 18 0
      main.c
  3. 3 0
      snac.h

+ 58 - 0
data.c

@@ -2488,6 +2488,64 @@ void notify_clear(snac *snac)
 }
 
 
+/** searches **/
+
+xs_list *search_by_content(snac *user, const xs_list *timeline,
+                            const char *regex, int timeout)
+/* returns a list of posts which content matches the regex */
+{
+    xs_list *r = xs_list_new();
+
+    if (timeout == 0)
+        timeout = 3;
+
+    int c = 0;
+    char *v;
+
+    time_t t = time(NULL) + timeout;
+
+    while (xs_list_next(timeline, &v, &c)) {
+        xs *post = NULL;
+
+        /* timeout? */
+        if (time(NULL) > t)
+            break;
+
+        int status;
+
+        if (user)
+            status = timeline_get_by_md5(user, v, &post);
+        else
+            status = object_get_by_md5(v, &post);
+
+        if (!valid_status(status))
+            continue;
+
+        /* must be a Note */
+        if (strcmp(xs_dict_get_def(post, "type", ""), "Note"))
+            continue;
+
+        char *content = xs_dict_get(post, "content");
+
+        if (xs_is_null(content))
+            continue;
+
+        /* strip HTML */
+        xs *c = xs_regex_replace(content, "<[^>]+>", " ");
+        c = xs_regex_replace_i(c, " {2,}", " ");
+        c = xs_tolower_i(c);
+
+        /* apply regex */
+        xs *l = xs_regex_select_n(c, regex, 1);
+
+        if (xs_list_len(l))
+            r = xs_list_append(r, v);
+    }
+
+    return r;
+}
+
+
 /** the queue **/
 
 static xs_dict *_enqueue_put(const char *fn, xs_dict *msg)

+ 18 - 0
main.c

@@ -44,6 +44,7 @@ int usage(void)
     printf("limit {basedir} {uid} {actor}        Limits an actor (drops their announces)\n");
     printf("unlimit {basedir} {uid} {actor}      Unlimits an actor\n");
     printf("verify_links {basedir} {uid}         Verifies a user's links (in the metadata)\n");
+    printf("search {basedir} {uid} {regex}       Searches posts by content\n");
 
     return 1;
 }
@@ -374,6 +375,23 @@ int main(int argc, char *argv[])
         return 0;
     }
 
+    if (strcmp(cmd, "search") == 0) { /** **/
+        xs *tl = timeline_simple_list(&snac, "private", 0, XS_ALL);
+
+        /* 'url' contains the regex */
+        xs *r = search_by_content(&snac, tl, url, 10);
+
+        int c = 0;
+        char *v;
+
+        /* print results as standalone links */
+        while (xs_list_next(r, &v, &c)) {
+            printf("%s/admin/p/%s\n", snac.actor, v);
+        }
+
+        return 0;
+    }
+
     if (strcmp(cmd, "ping") == 0) { /** **/
         xs *actor_o = NULL;
 

+ 3 - 0
snac.h

@@ -179,6 +179,9 @@ xs_list *list_timeline(snac *user, const char *list, int skip, int show);
 xs_val *list_content(snac *user, const char *list_id, const char *actor_md5, int op);
 void list_distribute(snac *user, const char *who, const xs_dict *post);
 
+xs_list *search_by_content(snac *user, const xs_list *timeline,
+                            const char *regex, int timeout);
+
 int actor_add(const char *actor, xs_dict *msg);
 int actor_get(const char *actor, xs_dict **data);
 int actor_get_refresh(snac *user, const char *actor, xs_dict **data);