main.py 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125
  1. import re
  2. from contextlib import asynccontextmanager
  3. from datetime import datetime
  4. from json import JSONDecodeError
  5. import aiohttp
  6. from fastapi import FastAPI, Request
  7. from fastapi.responses import HTMLResponse, RedirectResponse
  8. from fastapi.staticfiles import StaticFiles
  9. from fastapi.templating import Jinja2Templates
  10. from selectolax.parser import HTMLParser, Node
  11. @asynccontextmanager
  12. async def lifespan(app: FastAPI):
  13. """Establishing an aiohttp ClientSession for the duration of the app's lifecycle."""
  14. global session
  15. session = aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(10))
  16. yield
  17. await session.close()
  18. app = FastAPI(lifespan=lifespan, docs_url=None, redoc_url=None)
  19. app.mount("/static", StaticFiles(directory="static"), name="static")
  20. templates = Jinja2Templates(directory="templates")
  21. session: aiohttp.ClientSession = None # pyright: ignore[reportAssignmentType]
  22. def remove_classes(node: Node) -> Node:
  23. """Recursively remove all classes from all nodes."""
  24. if "class" in node.attributes:
  25. del node.attrs["class"] # pyright: ignore [reportIndexIssue]
  26. for child in node.iter():
  27. remove_classes(child)
  28. return node
  29. @app.get("/{path:path}", response_class=HTMLResponse)
  30. async def catch_all(response: Request):
  31. """Handle all routes on Urban Dictionary and perform redirection if necessary."""
  32. path_without_host = (
  33. f"{response.url.path}{f'?{response.url.query}' if response.url.query else ''}"
  34. )
  35. url = f"https://www.urbandictionary.com{path_without_host}"
  36. term = response.query_params.get("term")
  37. async with session.get(url) as dict_response:
  38. if dict_response.history:
  39. return RedirectResponse(str(dict_response.url.relative()), status_code=301)
  40. html = await dict_response.text()
  41. parser = HTMLParser(html)
  42. if dict_response.status != 200:
  43. similar_words = None
  44. if (try_this := parser.css_first("div.try-these")) is not None:
  45. similar_words = [remove_classes(word).html for word in try_this.css("li a")]
  46. return templates.TemplateResponse(
  47. "404.html",
  48. {
  49. "request": response,
  50. "similar_words": similar_words,
  51. "term": term,
  52. "site_title": f"Rural Dictionary: {term}",
  53. "site_description": (
  54. "View on Rural Dictionary, an alternative private "
  55. "frontend to Urban Dictionary."
  56. ),
  57. },
  58. status_code=404,
  59. )
  60. results = []
  61. definitions = parser.css("div[data-defid]")
  62. try:
  63. thumbs_api_url = (
  64. f'https://api.urbandictionary.com/v0/uncacheable?ids='
  65. f'{",".join(d.attributes["data-defid"] or "-1" for d in definitions)}'
  66. )
  67. async with session.get(thumbs_api_url) as thumbs_response:
  68. thumbs_json = await thumbs_response.json()
  69. thumbs_data = {el["defid"]: el for el in thumbs_json["thumbs"]}
  70. except (KeyError, JSONDecodeError, TimeoutError):
  71. thumbs_data = {}
  72. site_description = None
  73. for definition in definitions:
  74. word = definition.css_first("a.word").text()
  75. meaning_node = remove_classes(definition.css_first("div.meaning"))
  76. if site_description is None:
  77. site_description = re.sub(r"\s+", " ", meaning_node.text(strip=True, separator=" "))
  78. meaning = meaning_node.html
  79. example = remove_classes(definition.css_first("div.example")).html
  80. contributor = remove_classes(definition.css_first("div.contributor")).html
  81. definition_id = int(definition.attributes["data-defid"] or "-1")
  82. definition_thumbs = thumbs_data.get(definition_id, {})
  83. thumbs_up = definition_thumbs.get("up")
  84. thumbs_down = definition_thumbs.get("down")
  85. results.append(
  86. [definition_id, word, meaning, example, contributor, thumbs_up, thumbs_down]
  87. )
  88. if (pagination := parser.css_first("div.pagination")) is not None:
  89. pagination = remove_classes(pagination)
  90. pagination.attrs["class"] = "pagination" # pyright: ignore [reportIndexIssue]
  91. pagination = pagination.html
  92. term = term or results[0][1]
  93. site_title = "Rural Dictionary"
  94. match response.url.path:
  95. case "/":
  96. # add current date for page with words of the day
  97. site_title += f', {datetime.now().strftime("%d %B")}'
  98. case "/random.php":
  99. term = "Random words"
  100. site_title += f": {term}"
  101. return templates.TemplateResponse(
  102. "index.html",
  103. {
  104. "request": response,
  105. "results": results,
  106. "pagination": pagination,
  107. "term": term,
  108. "site_title": site_title,
  109. "site_description": site_description,
  110. },
  111. )