12345678910111213141516171819202122232425262728293031323334353637383940414243444546 |
- from flask import Flask, render_template, request, redirect
- import requests
- import html
- import re
- from bs4 import BeautifulSoup
- from urllib.parse import quote, unquote
- def scrape(url):
- data = requests.get(url)
-
- our_path = re.sub(r".*://.*/", "/", request.url)
- path = re.sub(r".*://.*/", "/", data.url)
- if our_path != path and \
- quote(unquote(re.sub("[?&=]", "", our_path))) != re.sub("[?&=]", "", path):
-
- return f"REDIRECT {path}"
- ret = []
- soup = BeautifulSoup(data.text, "html.parser")
- for div in soup.find_all("div"):
- defid = div.get('data-defid')
- if defid != None:
- definition = soup.find(attrs={"data-defid": [defid]})
- word = definition.select("div div h1 a, div div h2 a")[0].text
- meaning = definition.find(attrs={"class" : ["break-words meaning mb-4"]}).decode_contents()
- example = definition.find(attrs={"class" : ["break-words example italic mb-4"]}).decode_contents()
- contributor = definition.find(attrs={"class" : ["contributor font-bold"]})
- ret.append([defid, word, meaning, example, contributor])
- pages = soup.find(attrs={"class" : ["pagination text-xl text-center"]})
- if pages == None:
- pages = ""
- return (ret, pages)
- app = Flask(__name__, template_folder="templates", static_folder="static")
- @app.route('/', defaults={'path': ''})
- @app.route('/<path:path>')
- def catch_all(path):
- scraped = scrape(f"https://urbandictionary.com/{re.sub(r'.*://.*/', '/', request.url)}")
- if type(scraped) == str and scraped.startswith("REDIRECT"):
- return redirect(scraped.replace("REDIRECT ", ""), 302)
- return render_template('index.html', data=scraped)
- if __name__ == '__main__':
- app.run(port=8000)
|