Selaa lähdekoodia

refactor: properly check for UD redirect

Before request url was constructed with multiple slashes after
domain and without "www" subdomain, which lead to redirect on
every request and could not be properly checked.
Zubarev Grigoriy 7 kuukautta sitten
vanhempi
commit
f3c74b29f1
1 muutettua tiedostoa jossa 6 lisäystä ja 7 poistoa
  1. 6 7
      src/rural_dict/__main__.py

+ 6 - 7
src/rural_dict/__main__.py

@@ -1,5 +1,6 @@
+import logging
 import re
-from urllib.parse import quote, unquote
+import sys
 
 import requests
 from bs4 import BeautifulSoup
@@ -9,12 +10,8 @@ from flask import Flask, redirect, render_template, request
 def scrape(url):
     data = requests.get(url, timeout=10)
 
-    our_path = re.sub(r".*://.*/", "/", request.url)
     path = re.sub(r".*://.*/", "/", data.url)
-    if our_path != path and quote(unquote(re.sub("[?&=]", "", our_path))) != re.sub(
-        "[?&=]", "", path
-    ):
-        # this is bad ^
+    if data.history:
         return f"REDIRECT {path}"
     ret = []
     soup = BeautifulSoup(data.text, "html.parser")
@@ -54,7 +51,8 @@ app = Flask(__name__, template_folder="templates", static_folder="static")
 @app.route("/", defaults={"path": ""})
 @app.route("/<path:path>")
 def catch_all(path):
-    scraped = scrape(f"https://urbandictionary.com/{re.sub(r'.*://.*/', '/', request.url)}")
+    path_without_host = re.sub(r"https?://[^/]+/", "", request.url)
+    scraped = scrape(f"https://www.urbandictionary.com/{path_without_host}")
     if isinstance(scraped, str) and scraped.startswith("REDIRECT"):
         return redirect(scraped.replace("REDIRECT ", ""), 302)
     return render_template("index.html", data=scraped, term=request.args.get("term"))
@@ -63,4 +61,5 @@ def catch_all(path):
 if __name__ == "__main__":
     from waitress import serve
 
+    logging.basicConfig(level=logging.INFO, stream=sys.stdout)
     serve(app, host="0.0.0.0", port=8080)  # noqa: S104