main.py 2.3 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859
  1. #!/usr/bin/env python
  2. from flask import Flask, render_template, request, redirect
  3. import requests
  4. import html
  5. import re
  6. from bs4 import BeautifulSoup
  7. from urllib.parse import quote, unquote
  8. def scrape(url):
  9. data = requests.get(url)
  10. our_path = re.sub(r".*://.*/", "/", request.url)
  11. path = re.sub(r".*://.*/", "/", data.url)
  12. if our_path != path and \
  13. quote(unquote(re.sub("[?&=]", "", our_path))) != re.sub("[?&=]", "", path):
  14. # this is bad ^
  15. return f"REDIRECT {path}"
  16. ret = []
  17. soup = BeautifulSoup(data.text, "html.parser")
  18. defs = [(div, div.get('data-defid')) for div in soup.find_all("div") if div.get('data-defid')]
  19. try:
  20. thumbs_data = {
  21. str(entry['defid']): entry
  22. for entry
  23. in requests.get(
  24. 'https://api.urbandictionary.com/v0/uncacheable?ids=' + ','.join(defid for (_, defid) in defs)
  25. ).json()['thumbs']
  26. }
  27. except:
  28. thumbs_data = {}
  29. for (definition, defid) in defs:
  30. word = definition.select("div div h1 a, div div h2 a")[0].text
  31. meaning = definition.find(attrs={"class" : ["break-words meaning mb-4"]}).decode_contents()
  32. example = definition.find(attrs={"class" : ["break-words example italic mb-4"]}).decode_contents()
  33. contributor = definition.find(attrs={"class" : ["contributor font-bold"]})
  34. thumbs_up = thumbs_data.get(defid, {}).get('up')
  35. thumbs_down = thumbs_data.get(defid, {}).get('down')
  36. ret.append([defid, word, meaning, example, contributor, thumbs_up, thumbs_down])
  37. pages = soup.find(attrs={"class" : ["pagination text-xl text-center"]})
  38. if pages == None:
  39. pages = ""
  40. return (ret, pages)
  41. app = Flask(__name__, template_folder="templates", static_folder="static")
  42. @app.route('/', defaults={'path': ''})
  43. @app.route('/<path:path>')
  44. def catch_all(path):
  45. scraped = scrape(f"https://urbandictionary.com/{re.sub(r'.*://.*/', '/', request.url)}")
  46. if type(scraped) == str and scraped.startswith("REDIRECT"):
  47. return redirect(scraped.replace("REDIRECT ", ""), 302)
  48. return render_template('index.html', data=scraped, term=request.args.get("term"))
  49. if __name__ == '__main__':
  50. from waitress import serve
  51. serve(app, host="0.0.0.0", port=8080)