import re from contextlib import asynccontextmanager from datetime import datetime from json import JSONDecodeError from pathlib import Path import aiohttp from fastapi import FastAPI, Request from fastapi.responses import HTMLResponse, RedirectResponse from fastapi.staticfiles import StaticFiles from fastapi.templating import Jinja2Templates from selectolax.parser import HTMLParser, Node @asynccontextmanager async def lifespan(app: FastAPI): """Establishing an aiohttp ClientSession for the duration of the app's lifecycle.""" global session session = aiohttp.ClientSession(timeout=aiohttp.ClientTimeout(10)) yield await session.close() ROOT_PATH = Path(__file__).parent app = FastAPI(lifespan=lifespan, docs_url=None, redoc_url=None) app.mount("/static", StaticFiles(directory=ROOT_PATH / "static"), name="static") templates = Jinja2Templates(directory=ROOT_PATH / "templates") session: aiohttp.ClientSession = None # pyright: ignore[reportAssignmentType] def remove_classes(node: Node) -> Node: """Recursively remove all classes from all nodes.""" if "class" in node.attributes: del node.attrs["class"] # pyright: ignore [reportIndexIssue] for child in node.iter(): remove_classes(child) return node @app.get("/{path:path}", response_class=HTMLResponse) async def catch_all(response: Request): """Handle all routes on Urban Dictionary and perform redirection if necessary.""" path_without_host = ( f"{response.url.path}{f'?{response.url.query}' if response.url.query else ''}" ) url = f"https://www.urbandictionary.com{path_without_host}" term = response.query_params.get("term") async with session.get(url) as dict_response: if dict_response.history: return RedirectResponse(str(dict_response.url.relative()), status_code=301) html = await dict_response.text() parser = HTMLParser(html) if dict_response.status != 200: similar_words = None if (try_this := parser.css_first("div.try-these")) is not None: similar_words = [remove_classes(word).html for word in try_this.css("li a")] return templates.TemplateResponse( "404.html", { "request": response, "similar_words": similar_words, "term": term, "site_title": f"Rural Dictionary: {term}", "site_description": ( "View on Rural Dictionary, an alternative private " "frontend to Urban Dictionary." ), }, status_code=404, ) results = [] definitions = parser.css("div[data-defid]") try: thumbs_api_url = ( f'https://api.urbandictionary.com/v0/uncacheable?ids=' f'{",".join(d.attributes["data-defid"] or "-1" for d in definitions)}' ) async with session.get(thumbs_api_url) as thumbs_response: thumbs_json = await thumbs_response.json() thumbs_data = {el["defid"]: el for el in thumbs_json["thumbs"]} except (KeyError, JSONDecodeError, TimeoutError): thumbs_data = {} site_description = None for definition in definitions: word = definition.css_first("a.word").text() meaning_node = remove_classes(definition.css_first("div.meaning")) if site_description is None: site_description = re.sub(r"\s+", " ", meaning_node.text(strip=True, separator=" ")) meaning = meaning_node.html example = remove_classes(definition.css_first("div.example")).html contributor = remove_classes(definition.css_first("div.contributor")).html definition_id = int(definition.attributes["data-defid"] or "-1") definition_thumbs = thumbs_data.get(definition_id, {}) thumbs_up = definition_thumbs.get("up") thumbs_down = definition_thumbs.get("down") results.append( [definition_id, word, meaning, example, contributor, thumbs_up, thumbs_down] ) if (pagination := parser.css_first("div.pagination")) is not None: pagination = remove_classes(pagination) pagination.attrs["class"] = "pagination" # pyright: ignore [reportIndexIssue] pagination = pagination.html term = term or results[0][1] site_title = "Rural Dictionary" match response.url.path: case "/": # add current date for page with words of the day site_title += f', {datetime.now().strftime("%d %B")}' case "/random.php": term = "Random words" site_title += f": {term}" return templates.TemplateResponse( "index.html", { "request": response, "results": results, "pagination": pagination, "site_title": site_title, "site_description": site_description, }, )