ghostery.php 6.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320
  1. <?php
  2. class ghostery{
  3. public function __construct(){
  4. include "lib/backend.php";
  5. $this->backend = new backend("ghostery");
  6. include "lib/fuckhtml.php";
  7. $this->fuckhtml = new fuckhtml();
  8. }
  9. public function getfilters($page){
  10. if($page != "web"){
  11. return [];
  12. }
  13. return [
  14. "country" => [
  15. "display" => "Country",
  16. "option" => [
  17. "any" => "All regions",
  18. "AR" => "Argentina",
  19. "AU" => "Australia",
  20. "AT" => "Austria",
  21. "BE" => "Belgium",
  22. "BR" => "Brazil",
  23. "CA" => "Canada",
  24. "CL" => "Chile",
  25. "DK" => "Denmark",
  26. "FI" => "Finland",
  27. "FR" => "France",
  28. "DE" => "Germany",
  29. "HK" => "Hong Kong",
  30. "IN" => "India",
  31. "ID" => "Indonesia",
  32. "IT" => "Italy",
  33. "JP" => "Japan",
  34. "KR" => "Korea",
  35. "MY" => "Malaysia",
  36. "MX" => "Mexico",
  37. "NL" => "Netherlands",
  38. "NZ" => "New Zealand",
  39. "NO" => "Norway",
  40. "CN" => "People's Republic of China",
  41. "PL" => "Poland",
  42. "PT" => "Portugal",
  43. "PH" => "Republic of the Philippines",
  44. "RU" => "Russia",
  45. "SA" => "Saudi Arabia",
  46. "ZA" => "South Africa",
  47. "ES" => "Spain",
  48. "SE" => "Sweden",
  49. "CH" => "Switzerland",
  50. "TW" => "Taiwan",
  51. "TR" => "Turkey",
  52. "GB" => "United Kingdom",
  53. "US" => "United States"
  54. ]
  55. ]
  56. ];
  57. }
  58. private function get($proxy, $url, $get = [], $country){
  59. $curlproc = curl_init();
  60. if($get !== []){
  61. $get = http_build_query($get);
  62. $url .= "?" . $get;
  63. }
  64. curl_setopt($curlproc, CURLOPT_URL, $url);
  65. curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
  66. curl_setopt($curlproc, CURLOPT_HTTPHEADER,
  67. ["User-Agent: " . config::USER_AGENT,
  68. "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
  69. "Accept-Language: en-US,en;q=0.5",
  70. "Accept-Encoding: gzip",
  71. "Referer: https://ghosterysearch.com",
  72. "DNT: 1",
  73. "Sec-GPC: 1",
  74. "Connection: keep-alive",
  75. "Cookie: ctry=" . ($country == "any" ? "--" : $country) . "; noads=true",
  76. "Upgrade-Insecure-Requests: 1",
  77. "Sec-Fetch-Dest: document",
  78. "Sec-Fetch-Mode: navigate",
  79. "Sec-Fetch-Site: same-origin",
  80. "Sec-Fetch-User: ?1",
  81. "Priority: u=0, i"]
  82. );
  83. // http2 bypass
  84. curl_setopt($curlproc, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_2_0);
  85. curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
  86. curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
  87. curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
  88. curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
  89. curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
  90. $this->backend->assign_proxy($curlproc, $proxy);
  91. $data = curl_exec($curlproc);
  92. if(curl_errno($curlproc)){
  93. throw new Exception(curl_error($curlproc));
  94. }
  95. curl_close($curlproc);
  96. return $data;
  97. }
  98. public function web($get){
  99. if($get["npt"]){
  100. [$query, $proxy] = $this->backend->get($get["npt"], "web");
  101. parse_str($query, $query);
  102. // country
  103. $country = $query["c"];
  104. unset($query["c"]);
  105. $query = http_build_query($query);
  106. try{
  107. $html =
  108. $this->get(
  109. $proxy,
  110. "https://ghosterysearch.com/search?" . $query,
  111. [],
  112. $country
  113. );
  114. }catch(Exception $error){
  115. throw new Exception("Failed to fetch search page");
  116. }
  117. }else{
  118. $proxy = $this->backend->get_ip();
  119. try{
  120. $html =
  121. $this->get(
  122. $proxy,
  123. "https://ghosterysearch.com/search",
  124. [
  125. "q" => $get["s"]
  126. ],
  127. $get["country"]
  128. );
  129. }catch(Exception $error){
  130. throw new Exception("Failed to fetch search page");
  131. }
  132. }
  133. $out = [
  134. "status" => "ok",
  135. "spelling" => [
  136. "type" => "no_correction",
  137. "using" => null,
  138. "correction" => null
  139. ],
  140. "npt" => null,
  141. "answer" => [],
  142. "web" => [],
  143. "image" => [],
  144. "video" => [],
  145. "news" => [],
  146. "related" => []
  147. ];
  148. $this->fuckhtml->load($html);
  149. $results_wrapper =
  150. $this->fuckhtml
  151. ->getElementsByClassName(
  152. "results",
  153. "section"
  154. );
  155. if(count($results_wrapper) === 0){
  156. throw new Exception("Failed to grep result section");
  157. }
  158. $this->fuckhtml->load($results_wrapper[0]);
  159. // get search results
  160. $results =
  161. $this->fuckhtml
  162. ->getElementsByClassName(
  163. "result",
  164. "li"
  165. );
  166. if(count($results) === 0){
  167. return $out;
  168. }
  169. foreach($results as $result){
  170. $this->fuckhtml->load($result);
  171. $a =
  172. $this->fuckhtml
  173. ->getElementsByClassName(
  174. "url",
  175. "a"
  176. );
  177. if(count($a) === 0){
  178. continue;
  179. }
  180. $a = $a[0];
  181. $out["web"][] = [
  182. "title" =>
  183. $this->titledots(
  184. $this->fuckhtml
  185. ->getTextContent(
  186. $this->fuckhtml
  187. ->getElementsByTagName(
  188. "h2"
  189. )[0]
  190. )
  191. ),
  192. "description" =>
  193. $this->titledots(
  194. $this->fuckhtml
  195. ->getTextContent(
  196. $this->fuckhtml
  197. ->getElementsByTagName(
  198. "p"
  199. )[0]
  200. )
  201. ),
  202. "url" =>
  203. $this->fuckhtml
  204. ->getTextContent(
  205. $a
  206. ["attributes"]
  207. ["href"]
  208. ),
  209. "date" => null,
  210. "type" => "web",
  211. "thumb" => [
  212. "url" => null,
  213. "ratio" => null
  214. ],
  215. "sublink" => [],
  216. "table" => []
  217. ];
  218. }
  219. $this->fuckhtml->load($html);
  220. // get pagination token
  221. $pagination_wrapper =
  222. $this->fuckhtml
  223. ->getElementsByClassName(
  224. "pagination",
  225. "div"
  226. );
  227. if(count($pagination_wrapper) !== 0){
  228. // found next page!
  229. $this->fuckhtml->load($pagination_wrapper[0]);
  230. $a =
  231. $this->fuckhtml
  232. ->getElementsByTagName(
  233. "a"
  234. );
  235. if(count($a) !== 0){
  236. $q =
  237. parse_url(
  238. $this->fuckhtml
  239. ->getTextContent(
  240. $a[count($a) - 1]
  241. ["attributes"]
  242. ["href"]
  243. ),
  244. PHP_URL_QUERY
  245. );
  246. $out["npt"] =
  247. $this->backend
  248. ->store(
  249. $q . "&c=" . $get["country"],
  250. "web",
  251. $proxy
  252. );
  253. }
  254. }
  255. return $out;
  256. }
  257. private function titledots($title){
  258. return trim($title, " .\t\n\r\0\x0B…");
  259. }
  260. }