curlie.php 6.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309
  1. <?php
  2. class curlie{
  3. public function __construct(){
  4. include "lib/backend.php";
  5. $this->backend = new backend("curlie");
  6. include "lib/fuckhtml.php";
  7. $this->fuckhtml = new fuckhtml();
  8. }
  9. public function getfilters($page){
  10. if($page != "web"){
  11. return [];
  12. }
  13. return [
  14. "lang" => [
  15. "display" => "Language",
  16. "option" => [
  17. "any" => "Any language",
  18. "en" => "English",
  19. "de" => "German",
  20. "fr" => "French",
  21. "ja" => "Japanese",
  22. "it" => "Italian",
  23. "es" => "Spanish",
  24. "ru" => "Russian",
  25. "nl" => "Dutch",
  26. "pl" => "Polish",
  27. "tr" => "Turkish",
  28. "da" => "Danish",
  29. "sv" => "Swedish",
  30. "no" => "Norwegian",
  31. "is" => "Icelandic",
  32. "fo" => "Faroese",
  33. "fi" => "Finnish",
  34. "et" => "Estonian",
  35. "lt" => "Lithuanian",
  36. "lv" => "Latvian",
  37. "cy" => "Welsh",
  38. "ga" => "Irish",
  39. "gd" => "Scottish Gaelic",
  40. "br" => "Breton",
  41. "fy" => "Frisian",
  42. "frr" => "North Frisian",
  43. "gem" => "Saterland Frisian",
  44. "lb" => "Luxembourgish",
  45. "rm" => "Romansh",
  46. "pt" => "Portuguese",
  47. "ca" => "Catalan",
  48. "gl" => "Galician",
  49. "eu" => "Basque",
  50. "ast" => "Asturian",
  51. "an" => "Aragonese",
  52. "fur" => "Friulan",
  53. "sc" => "Sardinian",
  54. "scn" => "Sicilian",
  55. "oc" => "Occitan",
  56. "be" => "Belarusian",
  57. "cs" => "Czech",
  58. "hu" => "Hungarian",
  59. "sk" => "Slovak",
  60. "uk" => "Ukrainian",
  61. "csb" => "Kashubian",
  62. "tt" => "Tatar",
  63. "ba" => "Bashkir",
  64. "os" => "Ossetian",
  65. "sl" => "Slovene",
  66. "sr" => "Serbian",
  67. "hr" => "Croatian",
  68. "bs" => "Bosnian",
  69. "bg" => "Bulgarian",
  70. "sq" => "Albanian",
  71. "ro" => "Romanian",
  72. "mk" => "Macedonian",
  73. "el" => "Greek",
  74. "iw" => "Hebrew",
  75. "fa" => "Persian",
  76. "ar" => "Arabic",
  77. "ku" => "Kurdish",
  78. "az" => "Azerbaijani",
  79. "hy" => "Armenian",
  80. "af" => "Afrikaans",
  81. "sw" => "Kiswahili",
  82. "uz" => "Uzbek",
  83. "kk" => "Kazakh",
  84. "ky" => "Kyrgyz",
  85. "tg" => "Tajik",
  86. "tk" => "Turkmen",
  87. "ug" => "Uyghurche",
  88. "hi" => "Hindi",
  89. "si" => "Sinhalese",
  90. "gu" => "Gujarati",
  91. "ur" => "Urdu",
  92. "mr" => "Marathi",
  93. "pa" => "Punjabi",
  94. "bn" => "Bengali",
  95. "ta" => "Tamil",
  96. "te" => "Telugu",
  97. "kn" => "Kannada",
  98. "zh_CN" => "Chinese Simplified",
  99. "zh_TW" => "Chinese Traditional",
  100. "ko" => "Korean",
  101. "cfr" => "Taiwanese",
  102. "th" => "Thai",
  103. "vi" => "Vietnamese",
  104. "in" => "Indonesian",
  105. "ms" => "Malay",
  106. "tl" => "Tagalog",
  107. "eo" => "Esperanto",
  108. "ia" => "Interlingua",
  109. "la" => "Latin"
  110. ]
  111. ]
  112. ];
  113. }
  114. private function get($proxy, $url, $get = []){
  115. $curlproc = curl_init();
  116. if($get !== []){
  117. $get = http_build_query($get);
  118. $url .= "?" . $get;
  119. }
  120. curl_setopt($curlproc, CURLOPT_URL, $url);
  121. curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
  122. curl_setopt($curlproc, CURLOPT_HTTPHEADER,
  123. ["User-Agent: " . config::USER_AGENT,
  124. "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
  125. "Accept-Language: en-US,en;q=0.5",
  126. "Accept-Encoding: gzip",
  127. "DNT: 1",
  128. "Connection: keep-alive",
  129. "Upgrade-Insecure-Requests: 1",
  130. "Sec-Fetch-Dest: document",
  131. "Sec-Fetch-Mode: navigate",
  132. "Sec-Fetch-Site: none",
  133. "Sec-Fetch-User: ?1"]
  134. );
  135. curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
  136. curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
  137. curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
  138. curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
  139. curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
  140. $this->backend->assign_proxy($curlproc, $proxy);
  141. $data = curl_exec($curlproc);
  142. if(curl_errno($curlproc)){
  143. throw new Exception(curl_error($curlproc));
  144. }
  145. curl_close($curlproc);
  146. return $data;
  147. }
  148. public function web($get){
  149. if($get["npt"]){
  150. [$query, $proxy] = $this->backend->get($get["npt"], "web");
  151. try{
  152. $html = $this->get(
  153. $proxy,
  154. "https://curlie.org/" . $query,
  155. []
  156. );
  157. }catch(Exception $error){
  158. throw new Exception("Failed to fetch search page");
  159. }
  160. }else{
  161. $proxy = $this->backend->get_ip();
  162. $query = [
  163. "q" => $get["s"],
  164. "start" => 0,
  165. "stime" => 92452189 // ?
  166. ];
  167. if($get["lang"] !== "any"){
  168. $query["lang"] = $get["lang"];
  169. }
  170. try{
  171. $html = $this->get(
  172. $proxy,
  173. "https://curlie.org/search",
  174. $query
  175. );
  176. }catch(Exception $error){
  177. throw new Exception("Failed to fetch search page");
  178. }
  179. }
  180. $this->fuckhtml->load($html);
  181. $nextpage =
  182. $this->fuckhtml
  183. ->getElementsByClassName(
  184. "next-page",
  185. "a"
  186. );
  187. if(count($nextpage) !== 0){
  188. $nextpage =
  189. $this->backend->store(
  190. $nextpage[0]["attributes"]["href"],
  191. "web",
  192. $proxy
  193. );
  194. }else{
  195. $nextpage = null;
  196. }
  197. $out = [
  198. "status" => "ok",
  199. "spelling" => [
  200. "type" => "no_correction",
  201. "using" => null,
  202. "correction" => null
  203. ],
  204. "npt" => $nextpage,
  205. "answer" => [],
  206. "web" => [],
  207. "image" => [],
  208. "video" => [],
  209. "news" => [],
  210. "related" => []
  211. ];
  212. $items =
  213. $this->fuckhtml
  214. ->getElementsByClassName(
  215. "site-item",
  216. "div"
  217. );
  218. foreach($items as $item){
  219. $this->fuckhtml->load($item);
  220. $a =
  221. $this->fuckhtml
  222. ->getElementsByAttributeValue(
  223. "target",
  224. "_blank",
  225. "a"
  226. )[0];
  227. $description =
  228. $this->fuckhtml
  229. ->getElementsByClassName("site-descr");
  230. if(count($description) !== 0){
  231. $description =
  232. $this->fuckhtml
  233. ->getTextContent(
  234. $description[0]
  235. );
  236. }else{
  237. $description = null;
  238. }
  239. $out["web"][] = [
  240. "title" =>
  241. $this->fuckhtml
  242. ->getTextContent(
  243. $a
  244. ),
  245. "description" => $description,
  246. "url" =>
  247. $this->fuckhtml
  248. ->getTextContent(
  249. $a["attributes"]["href"]
  250. ),
  251. "date" => null,
  252. "type" => "web",
  253. "thumb" => [
  254. "url" => null,
  255. "ratio" => null
  256. ],
  257. "sublink" => [],
  258. "table" => []
  259. ];
  260. }
  261. return $out;
  262. }
  263. }