fetch_instances.py 2.8 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697
  1. from reqto import get
  2. from hashlib import sha256
  3. import sqlite3
  4. import sys
  5. import json
  6. with open("config.json") as f:
  7. config = json.loads(f.read())
  8. domain = sys.argv[1]
  9. blacklist = [
  10. "activitypub-troll.cf",
  11. "gab.best",
  12. "4chan.icu",
  13. "social.shrimpcam.pw",
  14. "mastotroll.netz.org",
  15. "github.dev",
  16. "ngrok.io"
  17. ]
  18. headers = {
  19. "user-agent": config["useragent"]
  20. }
  21. def get_hash(domain: str) -> str:
  22. return sha256(domain.encode("utf-8")).hexdigest()
  23. def get_peers(domain: str) -> str:
  24. try:
  25. res = get(f"https://{domain}/api/v1/instance/peers", headers=headers, timeout=5, allow_redirects=False)
  26. return res.json()
  27. except:
  28. return None
  29. peerlist = get_peers(domain)
  30. def get_type(instdomain: str) -> str:
  31. try:
  32. res = get(f"https://{instdomain}/nodeinfo/2.1.json", headers=headers, timeout=5, allow_redirects=False)
  33. if res.status_code == 404:
  34. res = get(f"https://{instdomain}/nodeinfo/2.0", headers=headers, timeout=5, allow_redirects=False)
  35. if res.status_code == 404:
  36. res = get(f"https://{instdomain}/nodeinfo/2.0.json", headers=headers, timeout=5, allow_redirects=False)
  37. if res.ok and "text/html" in res.headers["content-type"]:
  38. res = get(f"https://{instdomain}/nodeinfo/2.1", headers=headers, timeout=5, allow_redirects=False)
  39. if res.ok:
  40. if res.json()["software"]["name"] in ["akkoma", "rebased", "incestoma"]:
  41. return "pleroma"
  42. elif res.json()["software"]["name"] in ["hometown", "ecko"]:
  43. return "mastodon"
  44. elif res.json()["software"]["name"] in ["calckey", "groundpolis", "foundkey", "cherrypick", "firefish", "iceshrimp", "sharkey", "catodon"]:
  45. return "misskey"
  46. else:
  47. return res.json()["software"]["name"]
  48. elif res.status_code == 404:
  49. res = get(f"https://{instdomain}/api/v1/instance", headers=headers, timeout=5, allow_redirects=False)
  50. if res.ok:
  51. return "mastodon"
  52. except:
  53. return None
  54. conn = sqlite3.connect("blocks.db")
  55. c = conn.cursor()
  56. c.execute(
  57. "select domain from instances where 1"
  58. )
  59. for instance in peerlist:
  60. instance = instance.lower()
  61. blacklisted = False
  62. for domain in blacklist:
  63. if domain in instance:
  64. blacklisted = True
  65. if blacklisted:
  66. continue
  67. print(instance)
  68. try:
  69. c.execute(
  70. "select domain from instances where domain = ?", (instance,)
  71. )
  72. if c.fetchone() == None:
  73. c.execute(
  74. "insert into instances select ?, ?, ?",
  75. (instance, get_hash(instance), get_type(instance)),
  76. )
  77. conn.commit()
  78. except Exception as e:
  79. print("error:", e, instance)
  80. conn.close()