|
@@ -6,16 +6,22 @@ import requests
|
|
|
from bs4 import BeautifulSoup
|
|
|
from flask import Flask, redirect, render_template, request
|
|
|
|
|
|
+app = Flask(__name__, template_folder="templates", static_folder="static")
|
|
|
+
|
|
|
+
|
|
|
+@app.route("/", defaults={"path": ""})
|
|
|
+@app.route("/<path:path>")
|
|
|
+def catch_all(path):
|
|
|
+ path_without_host = re.sub(r"https?://[^/]+/", "", request.url)
|
|
|
+ url = f"https://www.urbandictionary.com/{path_without_host}"
|
|
|
|
|
|
-def scrape(url):
|
|
|
data = requests.get(url, timeout=10)
|
|
|
|
|
|
- path = re.sub(r".*://.*/", "/", data.url)
|
|
|
if data.history:
|
|
|
- return f"REDIRECT {path}"
|
|
|
- ret = []
|
|
|
- soup = BeautifulSoup(data.text, "html.parser")
|
|
|
+ return redirect(re.sub(r"https?://[^/]+", "", data.url), 302)
|
|
|
|
|
|
+ res = []
|
|
|
+ soup = BeautifulSoup(data.text, "html.parser")
|
|
|
defs = [(div, div.get("data-defid")) for div in soup.find_all("div") if div.get("data-defid")]
|
|
|
try:
|
|
|
thumbs_data = {
|
|
@@ -38,24 +44,10 @@ def scrape(url):
|
|
|
contributor = definition.find(attrs={"class": ["contributor font-bold"]})
|
|
|
thumbs_up = thumbs_data.get(defid, {}).get("up")
|
|
|
thumbs_down = thumbs_data.get(defid, {}).get("down")
|
|
|
- ret.append([defid, word, meaning, example, contributor, thumbs_up, thumbs_down])
|
|
|
- pages = soup.find(attrs={"class": ["pagination text-xl text-center"]})
|
|
|
- if pages is None:
|
|
|
- pages = ""
|
|
|
- return ret, pages
|
|
|
+ res.append([defid, word, meaning, example, contributor, thumbs_up, thumbs_down])
|
|
|
+ pagination = soup.find(attrs={"class": ["pagination text-xl text-center"]}) or ""
|
|
|
|
|
|
-
|
|
|
-app = Flask(__name__, template_folder="templates", static_folder="static")
|
|
|
-
|
|
|
-
|
|
|
-@app.route("/", defaults={"path": ""})
|
|
|
-@app.route("/<path:path>")
|
|
|
-def catch_all(path):
|
|
|
- path_without_host = re.sub(r"https?://[^/]+/", "", request.url)
|
|
|
- scraped = scrape(f"https://www.urbandictionary.com/{path_without_host}")
|
|
|
- if isinstance(scraped, str) and scraped.startswith("REDIRECT"):
|
|
|
- return redirect(scraped.replace("REDIRECT ", ""), 302)
|
|
|
- return render_template("index.html", data=scraped, term=request.args.get("term"))
|
|
|
+ return render_template("index.html", data=(res, pagination), term=request.args.get("term"))
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|