mwmbl.php 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236
  1. <?php
  2. class mwmbl{
  3. public function __construct(){
  4. include "lib/backend.php";
  5. $this->backend = new backend("mwmbl");
  6. include "lib/fuckhtml.php";
  7. $this->fuckhtml = new fuckhtml();
  8. }
  9. public function getfilters($page){
  10. return [];
  11. }
  12. private function get($proxy, $url, $get = []){
  13. $curlproc = curl_init();
  14. if($get !== []){
  15. $get = http_build_query($get);
  16. $url .= "?" . $get;
  17. }
  18. curl_setopt($curlproc, CURLOPT_URL, $url);
  19. // use http2
  20. curl_setopt($curlproc, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_2_0);
  21. curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
  22. curl_setopt($curlproc, CURLOPT_HTTPHEADER,
  23. ["User-Agent: " . config::USER_AGENT,
  24. "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
  25. "Accept-Language: en-US,en;q=0.5",
  26. "Accept-Encoding: gzip",
  27. "Referer: https://beta.mwmbl.org/",
  28. "DNT: 1",
  29. "Sec-GPC: 1",
  30. "Connection: keep-alive",
  31. "Upgrade-Insecure-Requests: 1",
  32. "Sec-Fetch-Dest: document",
  33. "Sec-Fetch-Mode: navigate",
  34. "Sec-Fetch-Site: same-origin",
  35. "Priority: u=0, i",
  36. "Sec-Fetch-User: ?1"]
  37. );
  38. curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
  39. curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
  40. curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
  41. curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
  42. curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
  43. $this->backend->assign_proxy($curlproc, $proxy);
  44. $data = curl_exec($curlproc);
  45. if(curl_errno($curlproc)){
  46. throw new Exception(curl_error($curlproc));
  47. }
  48. curl_close($curlproc);
  49. return $data;
  50. }
  51. public function web($get){
  52. $search = $get["s"];
  53. if(strlen($search) === 0){
  54. throw new Exception("Search term is empty!");
  55. }
  56. try{
  57. $html = $this->get(
  58. $this->backend->get_ip(), // no next page!
  59. "https://beta.mwmbl.org/",
  60. [
  61. "q" => $search
  62. ]
  63. );
  64. }catch(Exception $error){
  65. throw new Exception("Failed to fetch HTML. If you're getting a timeout, make sure you have curl-impersonate setup.");
  66. }
  67. $out = [
  68. "status" => "ok",
  69. "spelling" => [
  70. "type" => "no_correction",
  71. "using" => null,
  72. "correction" => null
  73. ],
  74. "npt" => null,
  75. "answer" => [],
  76. "web" => [],
  77. "image" => [],
  78. "video" => [],
  79. "news" => [],
  80. "related" => []
  81. ];
  82. $this->fuckhtml->load($html);
  83. $results =
  84. $this->fuckhtml
  85. ->getElementsByClassName(
  86. "result",
  87. "li"
  88. );
  89. foreach($results as $result){
  90. $this->fuckhtml->load($result);
  91. $p =
  92. $this->fuckhtml
  93. ->getElementsByTagName("p");
  94. $sublinks = [];
  95. $mores =
  96. $this->fuckhtml
  97. ->getElementsByClassName(
  98. "result-link-more",
  99. "div"
  100. );
  101. foreach($mores as $more){
  102. $this->fuckhtml->load($more);
  103. $as =
  104. $this->fuckhtml
  105. ->getElementsByClassName(
  106. "more",
  107. "a"
  108. );
  109. if(count($as) === 0){
  110. // ?? invalid
  111. continue;
  112. }
  113. $sublinks[] = [
  114. "title" =>
  115. $this->titledots(
  116. $this->fuckhtml
  117. ->getTextContent(
  118. $this->fuckhtml
  119. ->getElementsByClassName(
  120. "more-title",
  121. "span"
  122. )[0]
  123. )
  124. ),
  125. "description" =>
  126. $this->titledots(
  127. $this->fuckhtml
  128. ->getTextContent(
  129. $this->fuckhtml
  130. ->getElementsByClassName(
  131. "more-extract",
  132. "span"
  133. )[0]
  134. )
  135. ),
  136. "url" =>
  137. $this->fuckhtml
  138. ->getTextContent(
  139. $as[0]
  140. ["attributes"]
  141. ["href"]
  142. )
  143. ];
  144. }
  145. // reset
  146. $this->fuckhtml->load($result);
  147. $out["web"][] = [
  148. "title" =>
  149. $this->titledots(
  150. $this->fuckhtml
  151. ->getTextContent(
  152. $this->fuckhtml
  153. ->getElementsByClassName(
  154. "title",
  155. $p
  156. )[0]
  157. )
  158. ),
  159. "description" =>
  160. $this->titledots(
  161. $this->fuckhtml
  162. ->getTextContent(
  163. $this->fuckhtml
  164. ->getElementsByClassName(
  165. "extract",
  166. $p
  167. )[0]
  168. )
  169. ),
  170. "url" =>
  171. $this->fuckhtml
  172. ->getTextContent(
  173. $this->fuckhtml
  174. ->getElementsByTagName("a")
  175. [0]
  176. ["attributes"]
  177. ["href"]
  178. ),
  179. "date" => null,
  180. "type" => "web",
  181. "thumb" => [
  182. "url" => null,
  183. "ratio" => null
  184. ],
  185. "sublink" => $sublinks,
  186. "table" => []
  187. ];
  188. }
  189. return $out;
  190. }
  191. private function titledots($title){
  192. return rtrim($title, "…");
  193. }
  194. }