Browse Source

Refactor index.php for improved DOM handling and API integration

- Changed constants to use 'const' for better performance and clarity.
- Refactored DOM manipulation functions to enhance readability and maintainability.
- Introduced pagination handling within the DOM processing function.
- Optimized cURL initialization to reuse the same handle for multiple requests.
- Simplified the fetching of thumbs data from the Urban Dictionary API.
- Updated source code link in the footer for accuracy.
- Improved error handling and response processing for better user experience.
poesty 3 months ago
parent
commit
4cd7eb60f6
1 changed files with 84 additions and 96 deletions
  1. 84 96
      index.php

+ 84 - 96
index.php

@@ -4,72 +4,87 @@ error_reporting(E_ALL);
 ini_set('display_errors', 1);
 
 // Constants and helper functions
-define('URBAN_DICT_BASE', 'https://www.urbandictionary.com');
-define('URBAN_API_BASE', 'https://api.urbandictionary.com/v0');
+const URBAN_DICT_BASE = 'https://www.urbandictionary.com';
+const URBAN_API_BASE = 'https://api.urbandictionary.com/v0';
+const DOM_OPTIONS = LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD;
 
-function remove_classes($html) {
+function handle_dom($html, $is_pagination = false) {
+    if (empty($html)) return $html;
+    
     $dom = new DOMDocument();
-    @$dom->loadHTML(mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8'), LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD);
+    @$dom->loadHTML(mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8'), DOM_OPTIONS);
     $xpath = new DOMXPath($dom);
-    $elements = $xpath->query('//*[@class]');
-    foreach ($elements as $element) {
+    
+    // Remove all classes
+    foreach ($xpath->query('//*[@class]') as $element) {
         $element->removeAttribute('class');
     }
+    
+    // Handle pagination
+    if ($is_pagination && ($div = $xpath->query("//div")[0])) {
+        $div->setAttribute('class', 'pagination');
+        
+        // Fix pagination links for subdirectory
+        foreach ($xpath->query("//a[@href]") as $link) {
+            $href = $link->getAttribute('href');
+            if (str_starts_with($href, '/?')) {
+                $link->setAttribute('href', './' . substr($href, 1));
+            }
+        }
+    }
+    
     return $dom->saveHTML();
 }
 
 function fetch_url($url) {
-    $ch = curl_init();
+    static $ch = null;
+    if (!$ch) {
+        $ch = curl_init();
+        curl_setopt_array($ch, [
+            CURLOPT_RETURNTRANSFER => true,
+            CURLOPT_FOLLOWLOCATION => true,
+            CURLOPT_TIMEOUT => 10,
+            CURLOPT_ENCODING => '',
+            CURLOPT_USERAGENT => 'Mozilla/5.0 Rural Dictionary',
+            CURLOPT_IPRESOLVE => CURL_IPRESOLVE_V4,
+            CURLOPT_TCP_FASTOPEN => 1,
+        ]);
+    }
     curl_setopt($ch, CURLOPT_URL, $url);
-    curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
-    curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
-    curl_setopt($ch, CURLOPT_TIMEOUT, 10);
-    $response = curl_exec($ch);
-    $http_code = curl_getinfo($ch, CURLINFO_HTTP_CODE);
-    $final_url = curl_getinfo($ch, CURLINFO_EFFECTIVE_URL);
-    curl_close($ch);
-    return [$response, $http_code, $final_url];
+    return [
+        curl_exec($ch),
+        curl_getinfo($ch, CURLINFO_HTTP_CODE),
+        curl_getinfo($ch, CURLINFO_EFFECTIVE_URL)
+    ];
 }
 
-function add_pagination_class($html) {
-    if (empty($html)) return $html;
-    
-    $dom = new DOMDocument();
-    @$dom->loadHTML(mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8'), LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD);
-    $xpath = new DOMXPath($dom);
-    
-    // Find the pagination div
-    $pagination = $xpath->query("//div")[0];
-    if ($pagination) {
-        $pagination->setAttribute('class', 'pagination');
-    }
-    
-    return $dom->saveHTML();
+function fetch_thumbs($def_ids) {
+    if (empty($def_ids)) return [];
+    $response = @file_get_contents(
+        URBAN_API_BASE . '/uncacheable?ids=' . implode(',', $def_ids),
+        false,
+        stream_context_create(['http' => ['timeout' => 5, 'ignore_errors' => true]])
+    );
+    $data = $response ? json_decode($response, true) : [];
+    return isset($data['thumbs']) ? array_column($data['thumbs'], null, 'defid') : [];
 }
 
-// Get current path and query
-$request_uri = $_SERVER['REQUEST_URI'];
-$path = parse_url($request_uri, PHP_URL_PATH);
-$query = parse_url($request_uri, PHP_URL_QUERY);
-$term = isset($_GET['term']) ? $_GET['term'] : '';
-
-// Construct Urban Dictionary URL
-$url = URBAN_DICT_BASE . $path . ($query ? "?$query" : '');
-
-// Fetch content from Urban Dictionary
-[$html, $status_code, $final_url] = fetch_url($url);
+// Get request info
+$path = parse_url($_SERVER['REQUEST_URI'], PHP_URL_PATH);
+$query = parse_url($_SERVER['REQUEST_URI'], PHP_URL_QUERY);
+$term = $_GET['term'] ?? '';
 
-// Handle redirects
-if ($final_url !== $url) {
-    $new_path = parse_url($final_url, PHP_URL_PATH);
-    $new_query = parse_url($final_url, PHP_URL_QUERY);
-    header("Location: $new_path" . ($new_query ? "?$new_query" : ''));
+// Fetch and handle redirects
+[$html, $status_code, $final_url] = fetch_url(URBAN_DICT_BASE . $path . ($query ? "?$query" : ''));
+if ($final_url !== URBAN_DICT_BASE . $path . ($query ? "?$query" : '')) {
+    header('Location: ' . parse_url($final_url, PHP_URL_PATH) . 
+           (($q = parse_url($final_url, PHP_URL_QUERY)) ? "?$q" : ''));
     exit;
 }
 
 // Parse HTML
 $dom = new DOMDocument();
-@$dom->loadHTML(mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8'), LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD);
+@$dom->loadHTML(mb_convert_encoding($html, 'HTML-ENTITIES', 'UTF-8'), DOM_OPTIONS);
 $xpath = new DOMXPath($dom);
 
 $results = [];
@@ -77,72 +92,45 @@ $site_description = null;
 $pagination = null;
 
 if ($status_code !== 200) {
-    // Handle 404 page
-    $similar_words = [];
-    $try_these = $xpath->query("//div[contains(@class, 'try-these')]//li/a");
-    if ($try_these->length > 0) {
-        foreach ($try_these as $word) {
-            $similar_words[] = remove_classes($dom->saveHTML($word));
-        }
-    }
+    $similar_words = array_map(
+        fn($word) => handle_dom($dom->saveHTML($word)),
+        iterator_to_array($xpath->query("//div[contains(@class, 'try-these')]//li/a"))
+    );
     $template = '404';
 } else {
-    // Parse definitions
     $definitions = $xpath->query("//div[@data-defid]");
-    $def_ids = [];
+    $def_ids = array_map(fn($def) => $def->getAttribute('data-defid'), iterator_to_array($definitions));
+    $thumbs_data = fetch_thumbs($def_ids);
     
     foreach ($definitions as $def) {
-        $def_ids[] = $def->getAttribute('data-defid');
-    }
-    
-    // Fetch thumbs data
-    $thumbs_data = [];
-    if (!empty($def_ids)) {
-        $thumbs_url = URBAN_API_BASE . '/uncacheable?ids=' . implode(',', $def_ids);
-        $thumbs_response = @file_get_contents($thumbs_url);
-        if ($thumbs_response !== false) {
-            $thumbs_json = json_decode($thumbs_response, true);
-            foreach ($thumbs_json['thumbs'] as $thumb) {
-                $thumbs_data[$thumb['defid']] = $thumb;
-            }
-        }
-    }
-    
-    foreach ($definitions as $definition) {
-        $def_id = $definition->getAttribute('data-defid');
-        $word = $xpath->query(".//a[contains(@class, 'word')]", $definition)[0]->textContent;
-        $meaning = remove_classes($dom->saveHTML($xpath->query(".//div[contains(@class, 'meaning')]", $definition)[0]));
-        $example = remove_classes($dom->saveHTML($xpath->query(".//div[contains(@class, 'example')]", $definition)[0]));
-        $contributor = remove_classes($dom->saveHTML($xpath->query(".//div[contains(@class, 'contributor')]", $definition)[0]));
+        $def_id = $def->getAttribute('data-defid');
+        $word = $xpath->query(".//a[contains(@class, 'word')]", $def)[0]->textContent;
+        $meaning = handle_dom($dom->saveHTML($xpath->query(".//div[contains(@class, 'meaning')]", $def)[0]));
+        $example = handle_dom($dom->saveHTML($xpath->query(".//div[contains(@class, 'example')]", $def)[0]));
+        $contributor = handle_dom($dom->saveHTML($xpath->query(".//div[contains(@class, 'contributor')]", $def)[0]));
         
-        if ($site_description === null) {
+        if (!$site_description) {
             $site_description = preg_replace('/\s+/', ' ', strip_tags($meaning));
         }
         
-        $thumbs_up = isset($thumbs_data[$def_id]) ? $thumbs_data[$def_id]['up'] : null;
-        $thumbs_down = isset($thumbs_data[$def_id]) ? $thumbs_data[$def_id]['down'] : null;
-        
-        $results[] = [$def_id, $word, $meaning, $example, $contributor, $thumbs_up, $thumbs_down];
+        $thumbs = $thumbs_data[$def_id] ?? [];
+        $results[] = [
+            $def_id, $word, $meaning, $example, $contributor,
+            $thumbs['up'] ?? null, $thumbs['down'] ?? null
+        ];
     }
     
-    $pagination_node = $xpath->query("//div[contains(@class, 'pagination')]");
-    if ($pagination_node->length > 0) {
-        $pagination = add_pagination_class(remove_classes($dom->saveHTML($pagination_node[0])));
+    if ($pagination_node = $xpath->query("//div[contains(@class, 'pagination')]")[0]) {
+        $pagination = handle_dom($dom->saveHTML($pagination_node), true);
     }
     
     $template = 'index';
 }
 
 // Set title
-$site_title = 'Rural Dictionary';
-if ($path === '/') {
-    $site_title .= ', ' . date('d F');
-} elseif ($path === '/random.php') {
-    $term = 'Random words';
-}
-if ($term) {
-    $site_title .= ": $term";
-}
+$site_title = 'Rural Dictionary' . 
+    ($path === '/' ? ', ' . date('d F') : '') .
+    ($path === '/random.php' ? ': Random words' : ($term ? ": $term" : ''));
 
 // Output HTML
 ?>
@@ -185,7 +173,7 @@ if ($term) {
         </form>
         <a href="./random.php">Random</a>
         <br>
-        <a href="https://git.vern.cc/cobra/rural-dict">Source Code</a>
+        <a href="https://git.qunn.eu/poesty/rural-dict/src/php-port">Source Code</a>
     </div>
     <br>