Browse Source

refactor: properly check for UD redirect

Before request url was constructed with multiple slashes after
domain and without "www" subdomain, which lead to redirect on
every request and could not be properly checked.
Zubarev Grigoriy 7 months ago
parent
commit
f3c74b29f1
1 changed files with 6 additions and 7 deletions
  1. 6 7
      src/rural_dict/__main__.py

+ 6 - 7
src/rural_dict/__main__.py

@@ -1,5 +1,6 @@
+import logging
 import re
-from urllib.parse import quote, unquote
+import sys
 
 import requests
 from bs4 import BeautifulSoup
@@ -9,12 +10,8 @@ from flask import Flask, redirect, render_template, request
 def scrape(url):
     data = requests.get(url, timeout=10)
 
-    our_path = re.sub(r".*://.*/", "/", request.url)
     path = re.sub(r".*://.*/", "/", data.url)
-    if our_path != path and quote(unquote(re.sub("[?&=]", "", our_path))) != re.sub(
-        "[?&=]", "", path
-    ):
-        # this is bad ^
+    if data.history:
         return f"REDIRECT {path}"
     ret = []
     soup = BeautifulSoup(data.text, "html.parser")
@@ -54,7 +51,8 @@ app = Flask(__name__, template_folder="templates", static_folder="static")
 @app.route("/", defaults={"path": ""})
 @app.route("/<path:path>")
 def catch_all(path):
-    scraped = scrape(f"https://urbandictionary.com/{re.sub(r'.*://.*/', '/', request.url)}")
+    path_without_host = re.sub(r"https?://[^/]+/", "", request.url)
+    scraped = scrape(f"https://www.urbandictionary.com/{path_without_host}")
     if isinstance(scraped, str) and scraped.startswith("REDIRECT"):
         return redirect(scraped.replace("REDIRECT ", ""), 302)
     return render_template("index.html", data=scraped, term=request.args.get("term"))
@@ -63,4 +61,5 @@ def catch_all(path):
 if __name__ == "__main__":
     from waitress import serve
 
+    logging.basicConfig(level=logging.INFO, stream=sys.stdout)
     serve(app, host="0.0.0.0", port=8080)  # noqa: S104