__main__.py 4.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118
  1. import re
  2. from contextlib import asynccontextmanager
  3. from datetime import datetime
  4. from json import JSONDecodeError
  5. from pathlib import Path
  6. import aiohttp
  7. from fastapi import FastAPI, Request
  8. from fastapi.responses import HTMLResponse, RedirectResponse
  9. from fastapi.staticfiles import StaticFiles
  10. from fastapi.templating import Jinja2Templates
  11. from selectolax.parser import HTMLParser, Node
  12. @asynccontextmanager
  13. async def lifespan(app: FastAPI):
  14. """Establishing an aiohttp ClientSession for the duration of the app's lifecycle."""
  15. global session
  16. session = aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(10))
  17. yield
  18. await session.close()
  19. ROOT_PATH = Path(__file__).parent
  20. app = FastAPI(lifespan=lifespan, docs_url=None, redoc_url=None)
  21. app.mount("/static", StaticFiles(directory=ROOT_PATH / "static"), name="static")
  22. templates = Jinja2Templates(directory=ROOT_PATH / "templates")
  23. session: aiohttp.ClientSession = None # pyright: ignore[reportAssignmentType]
  24. def remove_classes(node: Node) -> Node:
  25. """Recursively remove all classes from all nodes."""
  26. if "class" in node.attributes:
  27. del node.attrs["class"] # pyright: ignore [reportIndexIssue]
  28. for child in node.iter():
  29. remove_classes(child)
  30. return node
  31. @app.get("/{path:path}", response_class=HTMLResponse)
  32. async def catch_all(response: Request):
  33. """Handle all routes on Urban Dictionary and perform redirection if necessary."""
  34. path_without_host = (
  35. f"{response.url.path}{f'?{response.url.query}' if response.url.query else ''}"
  36. )
  37. url = f"https://www.urbandictionary.com{path_without_host}"
  38. async with session.get(url) as dict_response:
  39. if dict_response.history:
  40. return RedirectResponse(str(dict_response.url.relative()), status_code=301)
  41. html = await dict_response.text()
  42. results = []
  43. parser = HTMLParser(html)
  44. definitions = parser.css("div[data-defid]")
  45. try:
  46. thumbs_api_url = (
  47. f'https://api.urbandictionary.com/v0/uncacheable?ids='
  48. f'{",".join(d.attributes["data-defid"] or "-1" for d in definitions)}'
  49. )
  50. async with session.get(thumbs_api_url) as thumbs_response:
  51. thumbs_json = await thumbs_response.json()
  52. thumbs_data = {el["defid"]: el for el in thumbs_json["thumbs"]}
  53. except (KeyError, JSONDecodeError):
  54. thumbs_data = {}
  55. site_description = None
  56. for definition in definitions:
  57. word = definition.css_first("a.word").text()
  58. meaning_node = remove_classes(definition.css_first("div.meaning"))
  59. if site_description is None:
  60. site_description = re.sub(r"\s+", " ", meaning_node.text(strip=True, separator=" "))
  61. meaning = meaning_node.html
  62. example = remove_classes(definition.css_first("div.example")).html
  63. contributor = remove_classes(definition.css_first("div.contributor")).html
  64. definition_id = int(definition.attributes["data-defid"] or "-1")
  65. definition_thumbs = thumbs_data.get(definition_id, {})
  66. thumbs_up = definition_thumbs.get("up")
  67. thumbs_down = definition_thumbs.get("down")
  68. results.append(
  69. [definition_id, word, meaning, example, contributor, thumbs_up, thumbs_down]
  70. )
  71. if (pagination := parser.css_first("div.pagination")) is not None:
  72. pagination = remove_classes(pagination)
  73. pagination.attrs["class"] = "pagination" # pyright: ignore [reportIndexIssue]
  74. pagination = pagination.html
  75. term = response.query_params.get("term", results[0][1])
  76. site_title = "Rural Dictionary"
  77. match response.url.path:
  78. case "/":
  79. # add current date for page with words of the day
  80. site_title += f', {datetime.now().strftime("%d %B")}'
  81. case "/random.php":
  82. term = "Random words"
  83. site_title += f": {term}"
  84. return templates.TemplateResponse(
  85. "index.html",
  86. {
  87. "request": response,
  88. "results": results,
  89. "pagination": pagination,
  90. "site_title": site_title,
  91. "site_description": site_description,
  92. },
  93. )
  94. def main():
  95. """Run the app in production mode. It is intended to be executed within a container."""
  96. import uvicorn
  97. uvicorn.run(app, host="0.0.0.0", port=5758, access_log=False) # noqa: S104
  98. if __name__ == "__main__":
  99. main()