瀏覽代碼

Improve the check logic

poesty 1 年之前
父節點
當前提交
d219ce4c6d
共有 2 個文件被更改,包括 29 次插入17 次删除
  1. 13 7
      update/update_fbadb_1.sh
  2. 16 10
      update/update_fbadb_2.sh

+ 13 - 7
update/update_fbadb_1.sh

@@ -6,20 +6,26 @@ feed_url="https://fediverse.observer/new-servers.xml"
 # Set the interval for checking for updates (in seconds)
 interval=600
 
+# Initialize last_update variable
+last_update=""
+
 while true; do
     # Get the date of the last update from the feed
-    last_update=$(curl -Ls "$feed_url" | grep -oP -m 1 "(?<=<updated>)[^<]+")
+    feed=$(curl -Ls "$feed_url")
+    update_dates=$(echo "$feed" | xmlstarlet sel -N atom=http://www.w3.org/2005/Atom -t -v "//atom:entry/atom:published" | awk -v date="$last_update" '{ if ($0 == date) exit; print }')
 
-    # Compare the last update date with the date of the last check
-    if [ "$last_update" != "$last_check" ]; then
+    if [ -n "$update_dates" ]; then
         # Update the date of the last check
-        last_check="$last_update"
+        last_update=$(echo "$update_dates" | head -1)
+
+        # Get total count of last updated sites 
+        update_count=$(echo "$update_dates" | wc -l)
 
         # Parse the feed and get the link of the first entry
-        urls=$(curl -Ls "$feed_url" | xmlstarlet sel -N atom=http://www.w3.org/2005/Atom -t -v "//atom:entry/atom:link/@href" | awk -F'/' '{print $NF}' | sort | uniq -c | sort -nr | awk '{print $2}')
+        urls=$(echo "$feed" | xmlstarlet sel -N atom=http://www.w3.org/2005/Atom -t -v "//atom:entry/atom:link/@href" | head -n "$update_count" | awk -F'/' '{print $NF}' | sort | uniq -c | sort -nr | awk '{print $2}')
 
-        for url in $urls; do
-                cd /opt/fedi-block-api && sudo -Hu fba python3 fetch_instances.py $url
+        for url in "$urls"; do
+                cd /opt/fedi-block-api && sudo -Hu fba python3 fetch_instances.py "$url"
         done
 
     fi

+ 16 - 10
update/update_fbadb_2.sh

@@ -1,25 +1,31 @@
 #!/bin/bash
 
-# Set the URL of the Atom feed
+# Set the URL of the RSS feed
 feed_url="http://demo.fedilist.com/instance/newest/rss"
 
 # Set the interval for checking for updates (in seconds)
 interval=600
 
+# Initialize last_update variable
+last_update=""
+
 while true; do
-    # Get the date of the last update from the feed
-    last_update=$(curl -Ls "$feed_url" | xmlstarlet sel -t -v "/rss/channel/item[1]/pubDate")
+    # Get dates of last updated sites from the feed
+    feed=$(curl -Ls "$feed_url")
+    update_dates=$(echo "$feed" | xmlstarlet sel -t -v "/rss/channel/item/pubDate" | awk -v date="$last_update" '{ if ($0 == date) exit; print }')
 
-    # Compare the last update date with the date of the last check
-    if [ "$last_update" != "$last_check" ]; then
+    if [ -n "$update_dates" ]; then
         # Update the date of the last check
-        last_check="$last_update"
+        last_update=$(echo "$update_dates" | head -1)
+
+        # Get total count of last updated sites 
+        update_count=$(echo "$update_dates" | wc -l)
 
-        # Parse the feed and get the link of the first entry
-        urls=$(curl -Ls "$feed_url" | xmlstarlet sel -t -v "/rss/channel/item/title" | awk '{print $NF}' | sort | uniq -c | sort -nr | awk '{print $2}')
+        # Parse the feed and get the link of the entries
+        urls=$(echo "$feed" | xmlstarlet sel -t -v "/rss/channel/item/title" | head -n "$update_count" | awk '{print $NF}' | sort | uniq -c | sort -nr | awk '{print $2}')
 
-        for url in $urls; do
-                cd /opt/fedi-block-api && sudo -Hu fba python3 fetch_instances.py $url
+        for url in "$urls"; do
+                cd /opt/fedi-block-api && sudo -Hu fba python3 fetch_instances.py "$url"
         done
 
     fi