__main__.py 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126
  1. import re
  2. from contextlib import asynccontextmanager
  3. from datetime import datetime
  4. from json import JSONDecodeError
  5. from pathlib import Path
  6. import aiohttp
  7. from fastapi import FastAPI, Request
  8. from fastapi.responses import HTMLResponse, RedirectResponse
  9. from fastapi.staticfiles import StaticFiles
  10. from fastapi.templating import Jinja2Templates
  11. from selectolax.parser import HTMLParser, Node
  12. @asynccontextmanager
  13. async def lifespan(app: FastAPI):
  14. """Establishing an aiohttp ClientSession for the duration of the app's lifecycle."""
  15. global session
  16. session = aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(10))
  17. yield
  18. await session.close()
  19. ROOT_PATH = Path(__file__).parent
  20. app = FastAPI(lifespan=lifespan, docs_url=None, redoc_url=None)
  21. app.mount("/static", StaticFiles(directory=ROOT_PATH / "static"), name="static")
  22. templates = Jinja2Templates(directory=ROOT_PATH / "templates")
  23. session: aiohttp.ClientSession = None # pyright: ignore[reportAssignmentType]
  24. def remove_classes(node: Node) -> Node:
  25. """Recursively remove all classes from all nodes."""
  26. if "class" in node.attributes:
  27. del node.attrs["class"] # pyright: ignore [reportIndexIssue]
  28. for child in node.iter():
  29. remove_classes(child)
  30. return node
  31. @app.get("/{path:path}", response_class=HTMLResponse)
  32. async def catch_all(response: Request):
  33. """Handle all routes on Urban Dictionary and perform redirection if necessary."""
  34. path_without_host = (
  35. f"{response.url.path}{f'?{response.url.query}' if response.url.query else ''}"
  36. )
  37. url = f"https://www.urbandictionary.com{path_without_host}"
  38. term = response.query_params.get("term")
  39. async with session.get(url) as dict_response:
  40. if dict_response.history:
  41. return RedirectResponse(str(dict_response.url.relative()), status_code=301)
  42. html = await dict_response.text()
  43. parser = HTMLParser(html)
  44. if dict_response.status != 200:
  45. similar_words = None
  46. if (try_this := parser.css_first("div.try-these")) is not None:
  47. similar_words = [remove_classes(word).html for word in try_this.css("li a")]
  48. return templates.TemplateResponse(
  49. "404.html",
  50. {
  51. "request": response,
  52. "similar_words": similar_words,
  53. "term": term,
  54. "site_title": f"Rural Dictionary: {term}",
  55. "site_description": (
  56. "View on Rural Dictionary, an alternative private "
  57. "frontend to Urban Dictionary."
  58. ),
  59. },
  60. status_code=404,
  61. )
  62. results = []
  63. definitions = parser.css("div[data-defid]")
  64. try:
  65. thumbs_api_url = (
  66. f'https://api.urbandictionary.com/v0/uncacheable?ids='
  67. f'{",".join(d.attributes["data-defid"] or "-1" for d in definitions)}'
  68. )
  69. async with session.get(thumbs_api_url) as thumbs_response:
  70. thumbs_json = await thumbs_response.json()
  71. thumbs_data = {el["defid"]: el for el in thumbs_json["thumbs"]}
  72. except (KeyError, JSONDecodeError, TimeoutError):
  73. thumbs_data = {}
  74. site_description = None
  75. for definition in definitions:
  76. word = definition.css_first("a.word").text()
  77. meaning_node = remove_classes(definition.css_first("div.meaning"))
  78. if site_description is None:
  79. site_description = re.sub(r"\s+", " ", meaning_node.text(strip=True, separator=" "))
  80. meaning = meaning_node.html
  81. example = remove_classes(definition.css_first("div.example")).html
  82. contributor = remove_classes(definition.css_first("div.contributor")).html
  83. definition_id = int(definition.attributes["data-defid"] or "-1")
  84. definition_thumbs = thumbs_data.get(definition_id, {})
  85. thumbs_up = definition_thumbs.get("up")
  86. thumbs_down = definition_thumbs.get("down")
  87. results.append(
  88. [definition_id, word, meaning, example, contributor, thumbs_up, thumbs_down]
  89. )
  90. if (pagination := parser.css_first("div.pagination")) is not None:
  91. pagination = remove_classes(pagination)
  92. pagination.attrs["class"] = "pagination" # pyright: ignore [reportIndexIssue]
  93. pagination = pagination.html
  94. term = term or results[0][1]
  95. site_title = "Rural Dictionary"
  96. match response.url.path:
  97. case "/":
  98. # add current date for page with words of the day
  99. site_title += f', {datetime.now().strftime("%d %B")}'
  100. case "/random.php":
  101. term = "Random words"
  102. site_title += f": {term}"
  103. return templates.TemplateResponse(
  104. "index.html",
  105. {
  106. "request": response,
  107. "results": results,
  108. "pagination": pagination,
  109. "site_title": site_title,
  110. "site_description": site_description,
  111. },
  112. )