yandex.php 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170
  1. <?php
  2. class yandex{
  3. /*
  4. curl functions
  5. */
  6. public function __construct(){
  7. include "lib/fuckhtml.php";
  8. $this->fuckhtml = new fuckhtml();
  9. include "lib/backend.php";
  10. // backend included in the scraper functions
  11. }
  12. private function get($proxy, $url, $get = [], $nsfw){
  13. $curlproc = curl_init();
  14. if($get !== []){
  15. $get = http_build_query($get);
  16. $url .= "?" . $get;
  17. }
  18. curl_setopt($curlproc, CURLOPT_URL, $url);
  19. switch($nsfw){
  20. case "yes": $nsfw = "0"; break;
  21. case "maybe": $nsfw = "1"; break;
  22. case "no": $nsfw = "2"; break;
  23. }
  24. $headers =
  25. ["User-Agent: " . config::USER_AGENT,
  26. "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
  27. "Accept-Encoding: gzip",
  28. "Accept-Language: en-US,en;q=0.5",
  29. "DNT: 1",
  30. "Cookie: yp=1716337604.sp.family%3A{$nsfw}#1685406411.szm.1:1920x1080:1920x999",
  31. "Referer: https://yandex.com/images/search",
  32. "Connection: keep-alive",
  33. "Upgrade-Insecure-Requests: 1",
  34. "Sec-Fetch-Dest: document",
  35. "Sec-Fetch-Mode: navigate",
  36. "Sec-Fetch-Site: cross-site",
  37. "Upgrade-Insecure-Requests: 1"];
  38. curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
  39. curl_setopt($curlproc, CURLOPT_HTTPHEADER, $headers);
  40. curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
  41. curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
  42. curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
  43. curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
  44. curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
  45. $this->backend->assign_proxy($curlproc, $proxy);
  46. $data = curl_exec($curlproc);
  47. if(curl_errno($curlproc)){
  48. throw new Exception(curl_error($curlproc));
  49. }
  50. curl_close($curlproc);
  51. return $data;
  52. }
  53. public function getfilters($pagetype){
  54. switch($pagetype){
  55. case "web":
  56. return [
  57. "lang" => [
  58. "display" => "Language",
  59. "option" => [
  60. "any" => "Any language",
  61. "en" => "English",
  62. "ru" => "Russian",
  63. "be" => "Belorussian",
  64. "fr" => "French",
  65. "de" => "German",
  66. "id" => "Indonesian",
  67. "kk" => "Kazakh",
  68. "tt" => "Tatar",
  69. "tr" => "Turkish",
  70. "uk" => "Ukrainian"
  71. ]
  72. ],
  73. "newer" => [
  74. "display" => "Newer than",
  75. "option" => "_DATE"
  76. ],
  77. "older" => [
  78. "display" => "Older than",
  79. "option" => "_DATE"
  80. ]
  81. ];
  82. break;
  83. case "images":
  84. return
  85. [
  86. "nsfw" => [
  87. "display" => "NSFW",
  88. "option" => [
  89. "yes" => "Yes",
  90. "maybe" => "Maybe",
  91. "no" => "No"
  92. ]
  93. ],
  94. "time" => [
  95. "display" => "Time posted",
  96. "option" => [
  97. "any" => "Any time",
  98. "week" => "Last week"
  99. ]
  100. ],
  101. "size" => [
  102. "display" => "Size",
  103. "option" => [
  104. "any" => "Any size",
  105. "small" => "Small",
  106. "medium" => "Medium",
  107. "large" => "Large",
  108. "wallpaper" => "Wallpaper"
  109. ]
  110. ],
  111. "color" => [
  112. "display" => "Colors",
  113. "option" => [
  114. "any" => "All colors",
  115. "color" => "Color images only",
  116. "gray" => "Black and white",
  117. "red" => "Red",
  118. "orange" => "Orange",
  119. "yellow" => "Yellow",
  120. "cyan" => "Cyan",
  121. "green" => "Green",
  122. "blue" => "Blue",
  123. "violet" => "Purple",
  124. "white" => "White",
  125. "black" => "Black"
  126. ]
  127. ],
  128. "type" => [
  129. "display" => "Type",
  130. "option" => [
  131. "any" => "All types",
  132. "photo" => "Photos",
  133. "clipart" => "White background",
  134. "lineart" => "Drawings and sketches",
  135. "face" => "People",
  136. "demotivator" => "Demotivators"
  137. ]
  138. ],
  139. "layout" => [
  140. "display" => "Layout",
  141. "option" => [
  142. "any" => "All layouts",
  143. "horizontal" => "Horizontal",
  144. "vertical" => "Vertical",
  145. "square" => "Square"
  146. ]
  147. ],
  148. "format" => [
  149. "display" => "Format",
  150. "option" => [
  151. "any" => "Any format",
  152. "jpeg" => "JPEG",
  153. "png" => "PNG",
  154. "gif" => "GIF"
  155. ]
  156. ]
  157. ];
  158. break;
  159. case "videos":
  160. return [
  161. "nsfw" => [
  162. "display" => "NSFW",
  163. "option" => [
  164. "yes" => "Yes",
  165. "maybe" => "Maybe",
  166. "no" => "No"
  167. ]
  168. ],
  169. "time" => [
  170. "display" => "Time posted",
  171. "option" => [
  172. "any" => "Any time",
  173. "9" => "Recently"
  174. ]
  175. ],
  176. "duration" => [
  177. "display" => "Duration",
  178. "option" => [
  179. "any" => "Any duration",
  180. "short" => "Short"
  181. ]
  182. ]
  183. ];
  184. break;
  185. }
  186. }
  187. public function web($get){
  188. $this->backend = new backend("yandex_w");
  189. // has captcha
  190. // https://yandex.com/search/touch/?text=lol&app_platform=android&appsearch_header=1&ui=webmobileapp.yandex&app_version=23070603&app_id=ru.yandex.searchplugin&search_source=yandexcom_touch_native&clid=2218567
  191. // https://yandex.com/search/site/?text=minecraft&web=1&frame=1&v=2.0&searchid=3131712
  192. // &within=777&from_day=26&from_month=8&from_year=2023&to_day=26&to_month=8&to_year=2023
  193. if($get["npt"]){
  194. [$npt, $proxy] = $this->backend->get($get["npt"], "web");
  195. $html =
  196. $this->get(
  197. $proxy,
  198. "https://yandex.com" . $npt,
  199. [],
  200. "yes"
  201. );
  202. }else{
  203. $search = $get["s"];
  204. if(strlen($search) === 0){
  205. throw new Exception("Search term is empty!");
  206. }
  207. $proxy = $this->backend->get_ip();
  208. $lang = $get["lang"];
  209. $older = $get["older"];
  210. $newer = $get["newer"];
  211. $params = [
  212. "text" => $search,
  213. "web" => "1",
  214. "frame" => "1",
  215. "searchid" => "3131712"
  216. ];
  217. if($lang != "any"){
  218. $params["lang"] = $lang;
  219. }
  220. if(
  221. $newer === false &&
  222. $older !== false
  223. ){
  224. $newer = 0;
  225. }
  226. if($newer !== false){
  227. $params["from_day"] = date("j", $newer);
  228. $params["from_month"] = date("n", $newer);
  229. $params["from_year"] = date("Y", $newer);
  230. if($older === false){
  231. $older = time();
  232. }
  233. $params["to_day"] = date("j", $older);
  234. $params["to_month"] = date("n", $older);
  235. $params["to_year"] = date("Y", $older);
  236. }
  237. try{
  238. $html =
  239. $this->get(
  240. $proxy,
  241. "https://yandex.com/search/site/",
  242. $params,
  243. "yes"
  244. );
  245. }catch(Exception $error){
  246. throw new Exception("Could not get search page");
  247. }
  248. /*
  249. $handle = fopen("scraper/yandex.html", "r");
  250. $html = fread($handle, filesize("scraper/yandex.html"));
  251. fclose($handle);*/
  252. }
  253. $out = [
  254. "status" => "ok",
  255. "spelling" => [
  256. "type" => "no_correction",
  257. "using" => null,
  258. "correction" => null
  259. ],
  260. "npt" => null,
  261. "answer" => [],
  262. "web" => [],
  263. "image" => [],
  264. "video" => [],
  265. "news" => [],
  266. "related" => []
  267. ];
  268. $this->fuckhtml->load($html);
  269. // get nextpage
  270. $npt =
  271. $this->fuckhtml
  272. ->getElementsByClassName(
  273. "b-pager__next",
  274. "a"
  275. );
  276. if(count($npt) !== 0){
  277. $out["npt"] =
  278. $this->backend->store(
  279. $this->fuckhtml
  280. ->getTextContent(
  281. $npt
  282. [0]
  283. ["attributes"]
  284. ["href"]
  285. ),
  286. "web",
  287. $proxy
  288. );
  289. }
  290. // get items
  291. $items =
  292. $this->fuckhtml
  293. ->getElementsByClassName(
  294. "b-serp-item",
  295. "li"
  296. );
  297. foreach($items as $item){
  298. $this->fuckhtml->load($item);
  299. $link =
  300. $this->fuckhtml
  301. ->getElementsByClassName(
  302. "b-serp-item__title-link",
  303. "a"
  304. )[0];
  305. $out["web"][] = [
  306. "title" =>
  307. $this->titledots(
  308. $this->fuckhtml
  309. ->getTextContent(
  310. $link
  311. )
  312. ),
  313. "description" =>
  314. $this->titledots(
  315. $this->fuckhtml
  316. ->getTextContent(
  317. $this->fuckhtml
  318. ->getElementsByClassName(
  319. "b-serp-item__text",
  320. "div"
  321. )[0]
  322. )
  323. ),
  324. "url" =>
  325. $this->fuckhtml
  326. ->getTextContent(
  327. $link
  328. ["attributes"]
  329. ["href"]
  330. ),
  331. "date" => null,
  332. "type" => "web",
  333. "thumb" => [
  334. "url" => null,
  335. "ratio" => null
  336. ],
  337. "sublink" => [],
  338. "table" => []
  339. ];
  340. }
  341. return $out;
  342. }
  343. public function image($get){
  344. $this->backend = new backend("yandex_i");
  345. if($get["npt"]){
  346. [$request, $proxy] =
  347. $this->backend->get(
  348. $get["npt"],
  349. "images"
  350. );
  351. $request = json_decode($request, true);
  352. $nsfw = $request["nsfw"];
  353. unset($request["nsfw"]);
  354. }else{
  355. $search = $get["s"];
  356. if(strlen($search) === 0){
  357. throw new Exception("Search term is empty!");
  358. }
  359. $proxy = $this->backend->get_ip();
  360. $nsfw = $get["nsfw"];
  361. $time = $get["time"];
  362. $size = $get["size"];
  363. $color = $get["color"];
  364. $type = $get["type"];
  365. $layout = $get["layout"];
  366. $format = $get["format"];
  367. /*
  368. $handle = fopen("scraper/yandex.json", "r");
  369. $json = fread($handle, filesize("scraper/yandex.json"));
  370. fclose($handle);*/
  371. // SIZE
  372. // large
  373. // 227.0=1;203.0=1;76fe94.0=1;41d251.0=1;75.0=1;371.0=1;291.0=1;307.0=1;f797ee.0=1;1cf7c2.0=1;deca32.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&isize=large&suggest_reqid=486139416166165501540886508227485&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
  374. // medium
  375. // 227.0=1;203.0=1;76fe94.0=1;41d251.0=1;75.0=1;371.0=1;291.0=1;307.0=1;f797ee.0=1;1cf7c2.0=1;deca32.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&isize=medium&suggest_reqid=486139416166165501540886508227485&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
  376. // small
  377. // 227.0=1;203.0=1;76fe94.0=1;41d251.0=1;75.0=1;371.0=1;291.0=1;307.0=1;f797ee.0=1;1cf7c2.0=1;deca32.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&isize=small&suggest_reqid=486139416166165501540886508227485&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
  378. // ORIENTATION
  379. // Horizontal
  380. // 227.0=1;203.0=1;76fe94.0=1;41d251.0=1;75.0=1;371.0=1;291.0=1;307.0=1;f797ee.0=1;1cf7c2.0=1;deca32.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&iorient=horizontal&suggest_reqid=486139416166165501540886508227485&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
  381. // Vertical
  382. // 227.0=1;203.0=1;76fe94.0=1;41d251.0=1;75.0=1;371.0=1;291.0=1;307.0=1;f797ee.0=1;1cf7c2.0=1;deca32.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&iorient=vertical&suggest_reqid=486139416166165501540886508227485&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
  383. // Square
  384. // 227.0=1;203.0=1;76fe94.0=1;41d251.0=1;75.0=1;371.0=1;291.0=1;307.0=1;f797ee.0=1;1cf7c2.0=1;deca32.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&iorient=square&suggest_reqid=486139416166165501540886508227485&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
  385. // TYPE
  386. // Photos
  387. // 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&text=minecraft&type=photo&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
  388. // White background
  389. // 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&text=minecraft&type=clipart&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
  390. // Drawings and sketches
  391. // 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&text=minecraft&type=lineart&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
  392. // People
  393. // 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&text=minecraft&type=face&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
  394. // Demotivators
  395. // 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&text=minecraft&type=demotivator&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
  396. // COLOR
  397. // Color images only
  398. // 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&icolor=color&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
  399. // Black and white
  400. // 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&icolor=gray&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
  401. // Red
  402. // 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&icolor=red&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
  403. // Orange
  404. // 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&icolor=orange&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
  405. // Yellow
  406. // 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&icolor=yellow&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
  407. // Cyan
  408. // 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&icolor=cyan&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
  409. // Green
  410. // 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&icolor=green&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
  411. // Blue
  412. // 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&icolor=blue&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
  413. // Purple
  414. // 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&icolor=violet&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
  415. // White
  416. // 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&icolor=white&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
  417. // Black
  418. // 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&icolor=black&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
  419. // FORMAT
  420. // jpeg
  421. // 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&itype=jpg&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
  422. // png
  423. // 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&itype=png&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
  424. // gif
  425. // 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&itype=gifan&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
  426. // RECENT
  427. // 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&recent=7D&text=minecraft&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
  428. // WALLPAPER
  429. // 307.0=1;371.0=1;291.0=1;203.0=1;deca32.0=1;f797ee.0=1;1cf7c2.0=1;41d251.0=1;267.0=1;bde197.0=1"},"extraContent":{"names":["i-react-ajax-adapter"]}}}&yu=4861394161661655015&isize=wallpaper&text=minecraft&wp=wh16x9_1920x1080&uinfo=sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080
  430. $request = [
  431. "format" => "json",
  432. "request" => [
  433. "blocks" => [
  434. [
  435. "block" => "extra-content",
  436. "params" => (object)[],
  437. "version" => 2
  438. ],
  439. [
  440. "block" => "i-global__params:ajax",
  441. "params" => (object)[],
  442. "version" => 2
  443. ],
  444. [
  445. "block" => "search2:ajax",
  446. "params" => (object)[],
  447. "version" => 2
  448. ],
  449. [
  450. "block" => "preview__isWallpaper",
  451. "params" => (object)[],
  452. "version" => 2
  453. ],
  454. [
  455. "block" => "content_type_search",
  456. "params" => (object)[],
  457. "version" => 2
  458. ],
  459. [
  460. "block" => "serp-controller",
  461. "params" => (object)[],
  462. "version" => 2
  463. ],
  464. [
  465. "block" => "cookies_ajax",
  466. "params" => (object)[],
  467. "version" => 2
  468. ],
  469. [
  470. "block" => "advanced-search-block",
  471. "params" => (object)[],
  472. "version" => 2
  473. ]
  474. ],
  475. "metadata" => [
  476. "bundles" => [
  477. "lb" => "AS?(E<X120"
  478. ],
  479. "assets" => [
  480. // las base
  481. "las" => "justifier-height=1;justifier-setheight=1;fitimages-height=1;justifier-fitincuts=1;react-with-dom=1;"
  482. // las default
  483. //"las" => "justifier-height=1;justifier-setheight=1;fitimages-height=1;justifier-fitincuts=1;react-with-dom=1;227.0=1;203.0=1;76fe94.0=1;215f96.0=1;75.0=1"
  484. ],
  485. "extraContent" => [
  486. "names" => [
  487. "i-react-ajax-adapter"
  488. ]
  489. ]
  490. ]
  491. ]
  492. ];
  493. /*
  494. Apply filters
  495. */
  496. if($time == "week"){
  497. $request["recent"] = "7D";
  498. }
  499. if($size != "any"){
  500. $request["isize"] = $size;
  501. }
  502. if($type != "any"){
  503. $request["type"] = $type;
  504. }
  505. if($color != "any"){
  506. $request["icolor"] = $color;
  507. }
  508. if($layout != "any"){
  509. $request["iorient"] = $layout;
  510. }
  511. if($format != "any"){
  512. $request["itype"] = $format;
  513. }
  514. $request["text"] = $search;
  515. $request["uinfo"] = "sw-1920-sh-1080-ww-1125-wh-999-pd-1-wp-16x9_1920x1080";
  516. $request["request"] = json_encode($request["request"]);
  517. }
  518. try{
  519. $json = $this->get(
  520. $proxy,
  521. "https://yandex.com/images/search",
  522. $request,
  523. $nsfw,
  524. "yandex_i"
  525. );
  526. }catch(Exception $err){
  527. throw new Exception("Failed to get JSON");
  528. }
  529. /*
  530. $handle = fopen("scraper/yandex.json", "r");
  531. $json = fread($handle, filesize("scraper/yandex.json"));
  532. fclose($handle);*/
  533. $json = json_decode($json, true);
  534. if($json === null){
  535. throw new Exception("Failed to decode JSON");
  536. }
  537. if(
  538. isset($json["type"]) &&
  539. $json["type"] == "captcha"
  540. ){
  541. throw new Exception("Yandex blocked this 4get instance. Please try again in ~7 minutes.");
  542. }
  543. $out = [
  544. "status" => "ok",
  545. "npt" => null,
  546. "image" => []
  547. ];
  548. // get html
  549. $html = "";
  550. foreach($json["blocks"] as $block){
  551. $html .= $block["html"];
  552. // get next page
  553. if(
  554. isset($block["params"]["nextPageUrl"]) &&
  555. !empty($block["params"]["nextPageUrl"])
  556. ){
  557. $request["nsfw"] = $nsfw;
  558. if(isset($request["p"])){
  559. $request["p"]++;
  560. }else{
  561. $request["p"] = 1;
  562. }
  563. $out["npt"] =
  564. $this->backend->store(
  565. json_encode($request),
  566. "images",
  567. $proxy
  568. );
  569. }
  570. }
  571. $this->fuckhtml->load($html);
  572. // get search results
  573. $data = null;
  574. foreach(
  575. $this->fuckhtml
  576. ->getElementsByClassName(
  577. "Root",
  578. "div"
  579. ) as $div
  580. ){
  581. if(isset($div["attributes"]["data-state"])){
  582. $tmp = json_decode(
  583. $this->fuckhtml
  584. ->getTextContent(
  585. $div["attributes"]["data-state"]
  586. ),
  587. true
  588. );
  589. if(isset($tmp["initialState"]["serpList"])){
  590. $data = $tmp;
  591. break;
  592. }
  593. }
  594. }
  595. if($data === null){
  596. throw new Exception("Failed to extract JSON");
  597. }
  598. foreach($data["initialState"]["serpList"]["items"]["entities"] as $image){
  599. $title = [html_entity_decode($image["snippet"]["title"], ENT_QUOTES | ENT_HTML5)];
  600. if(isset($image["snippet"]["text"])){
  601. $title[] = html_entity_decode($image["snippet"]["text"], ENT_QUOTES | ENT_HTML5);
  602. }
  603. $tmp = [
  604. "title" =>
  605. $this->fuckhtml
  606. ->getTextContent(
  607. $this->titledots(
  608. implode(": ", $title)
  609. )
  610. ),
  611. "source" => [],
  612. "url" => htmlspecialchars_decode($image["snippet"]["url"])
  613. ];
  614. // add preview URL
  615. $tmp["source"][] = [
  616. "url" => htmlspecialchars_decode($image["viewerData"]["preview"][0]["url"]),
  617. "width" => (int)$image["viewerData"]["preview"][0]["w"],
  618. "height" => (int)$image["viewerData"]["preview"][0]["h"],
  619. ];
  620. foreach($image["viewerData"]["dups"] as $dup){
  621. $tmp["source"][] = [
  622. "url" => htmlspecialchars_decode($dup["url"]),
  623. "width" => (int)$dup["w"],
  624. "height" => (int)$dup["h"],
  625. ];
  626. }
  627. $tmp["source"][] = [
  628. "url" =>
  629. preg_replace(
  630. '/^\/\//',
  631. "https://",
  632. htmlspecialchars_decode($image["viewerData"]["thumb"]["url"])
  633. ),
  634. "width" => (int)$image["viewerData"]["thumb"]["w"],
  635. "height" => (int)$image["viewerData"]["thumb"]["h"]
  636. ];
  637. $out["image"][] = $tmp;
  638. }
  639. return $out;
  640. }
  641. public function video($get){
  642. $this->backend = new backend("yandex_v");
  643. if($get["npt"]){
  644. [$params, $proxy] =
  645. $this->backend->get(
  646. $get["npt"],
  647. "video"
  648. );
  649. $params = json_decode($params, true);
  650. $nsfw = $params["nsfw"];
  651. unset($params["nsfw"]);
  652. }else{
  653. $search = $get["s"];
  654. if(strlen($search) === 0){
  655. throw new Exception("Search term is empty!");
  656. }
  657. $proxy = $this->backend->get_ip();
  658. $nsfw = $get["nsfw"];
  659. $time = $get["time"];
  660. $duration = $get["duration"];
  661. // https://yandex.com/video/search
  662. // ?tmpl_version=releases/frontend/video/v1.1168.0#8d942de0f4ebc4eb6b8f3c24ffbd1f8dbc5bbe63
  663. // &format=json
  664. // &request=
  665. // {
  666. // "blocks":[
  667. // {"block":"extra-content","params":{},"version":2},
  668. // {"block":"i-global__params:ajax","params":{},"version":2},
  669. // {"block":"search2:ajax","params":{},"version":2},
  670. // {"block":"vital-incut","params":{},"version":2},
  671. // {"block":"content_type_search","params":{},"version":2},
  672. // {"block":"serp-controller","params":{},"version":2},
  673. // {"block":"cookies_ajax","params":{},"version":2}
  674. // ],
  675. // "metadata":{
  676. // "bundles":{"lb":"^G]!q<X120"},
  677. // "assets":{"las":"react-with-dom=1;185.0=1;73.0=1;145.0=1;5a502a.0=1;32c342.0=1;b84ac8.0=1"},
  678. // "extraContent":{"names":["i-react-ajax-adapter"]}
  679. // }
  680. // }
  681. // &yu=4861394161661655015
  682. // &from=tabbar
  683. // &reqid=1693106278500184-6825210746979814879-balancer-l7leveler-kubr-yp-sas-7-BAL-4237
  684. // &suggest_reqid=486139416166165501562797413447032
  685. // &text=minecraft
  686. $params = [
  687. "tmpl_version" => "releases/frontend/video/v1.1168.0#8d942de0f4ebc4eb6b8f3c24ffbd1f8dbc5bbe63",
  688. "format" => "json",
  689. "request" => json_encode([
  690. "blocks" => [
  691. (object)[
  692. "block" => "extra-content",
  693. "params" => (object)[],
  694. "version" => 2
  695. ],
  696. (object)[
  697. "block" => "i-global__params:ajax",
  698. "params" => (object)[],
  699. "version" => 2
  700. ],
  701. (object)[
  702. "block" => "search2:ajax",
  703. "params" => (object)[],
  704. "version" => 2
  705. ],
  706. (object)[
  707. "block" => "vital-incut",
  708. "params" => (object)[],
  709. "version" => 2
  710. ],
  711. (object)[
  712. "block" => "content_type_search",
  713. "params" => (object)[],
  714. "version" => 2
  715. ],
  716. (object)[
  717. "block" => "serp-controller",
  718. "params" => (object)[],
  719. "version" => 2
  720. ],
  721. (object)[
  722. "block" => "cookies_ajax",
  723. "params" => (object)[],
  724. "version" => 2
  725. ]
  726. ],
  727. "metadata" => (object)[
  728. "bundles" => (object)[
  729. "lb" => "^G]!q<X120"
  730. ],
  731. "assets" => (object)[
  732. "las" => "react-with-dom=1;185.0=1;73.0=1;145.0=1;5a502a.0=1;32c342.0=1;b84ac8.0=1"
  733. ],
  734. "extraContent" => (object)[
  735. "names" => [
  736. "i-react-ajax-adapter"
  737. ]
  738. ]
  739. ]
  740. ]),
  741. "text" => $search
  742. ];
  743. if($duration != "any"){
  744. $params["duration"] = $duration;
  745. }
  746. if($time != "any"){
  747. $params["within"] = $time;
  748. }
  749. }
  750. /*
  751. $handle = fopen("scraper/yandex-video.json", "r");
  752. $json = fread($handle, filesize("scraper/yandex-video.json"));
  753. fclose($handle);
  754. */
  755. try{
  756. $json =
  757. $this->get(
  758. $proxy,
  759. "https://yandex.com/video/search",
  760. $params,
  761. $nsfw,
  762. "yandex_v"
  763. );
  764. }catch(Exception $error){
  765. throw new Exception("Could not fetch JSON");
  766. }
  767. $json = json_decode($json, true);
  768. if($json === null){
  769. throw new Exception("Could not parse JSON");
  770. }
  771. if(!isset($json["blocks"])){
  772. throw new Exception("Yandex blocked this 4get instance. Please try again in 7~ minutes.");
  773. }
  774. $out = [
  775. "status" => "ok",
  776. "npt" => null,
  777. "video" => [],
  778. "author" => [],
  779. "livestream" => [],
  780. "playlist" => [],
  781. "reel" => []
  782. ];
  783. $html = null;
  784. foreach($json["blocks"] as $block){
  785. if(isset($block["html"])){
  786. $html .= $block["html"];
  787. }
  788. }
  789. $this->fuckhtml->load($html);
  790. $div =
  791. $this->fuckhtml
  792. ->getElementsByTagName("div");
  793. /*
  794. Get nextpage
  795. */
  796. $npt =
  797. $this->fuckhtml
  798. ->getElementsByClassName(
  799. "more more_direction_next i-bem",
  800. $div
  801. );
  802. if(count($npt) !== 0){
  803. $params["p"] = "1";
  804. $params["nsfw"] = $nsfw;
  805. $out["npt"] =
  806. $this->backend->store(
  807. json_encode($params),
  808. "video",
  809. $proxy
  810. );
  811. }
  812. $items =
  813. $this->fuckhtml
  814. ->getElementsByClassName(
  815. "serp-item",
  816. $div
  817. );
  818. foreach($items as $item){
  819. $data =
  820. json_decode(
  821. $this->fuckhtml
  822. ->getTextContent(
  823. $item["attributes"]["data-video"]
  824. ),
  825. true
  826. );
  827. $this->fuckhtml->load($item);
  828. $thumb =
  829. $this->fuckhtml
  830. ->getElementsByClassName(
  831. "thumb-image__image",
  832. "img"
  833. );
  834. $c = 1;
  835. if(count($thumb) === 0){
  836. $thumb = [
  837. "url" => null,
  838. "ratio" => null
  839. ];
  840. }else{
  841. $thumb = [
  842. "url" =>
  843. str_replace(
  844. "//",
  845. "https://",
  846. $this->fuckhtml
  847. ->getTextContent(
  848. $thumb
  849. [0]
  850. ["attributes"]
  851. ["src"]
  852. ),
  853. $c
  854. ),
  855. "ratio" => "16:9"
  856. ];
  857. }
  858. $smallinfos =
  859. $this->fuckhtml
  860. ->getElementsByClassName(
  861. "serp-item__sitelinks-item",
  862. "div"
  863. );
  864. $date = null;
  865. $views = null;
  866. $first = true;
  867. foreach($smallinfos as $info){
  868. if($first){
  869. $first = false;
  870. continue;
  871. }
  872. $info =
  873. $this->fuckhtml
  874. ->getTextContent(
  875. $info
  876. );
  877. if($temp_date = strtotime($info)){
  878. $date = $temp_date;
  879. }else{
  880. $views = $this->parseviews($info);
  881. }
  882. }
  883. $description =
  884. $this->fuckhtml
  885. ->getElementsByClassName(
  886. "serp-item__text serp-item__text_visibleText_always",
  887. "div"
  888. );
  889. if(count($description) === 0){
  890. $description = null;
  891. }else{
  892. $description =
  893. $this->titledots(
  894. $this->fuckhtml
  895. ->getTextContent(
  896. $description[0]
  897. )
  898. );
  899. }
  900. $out["video"][] = [
  901. "title" =>
  902. $this->fuckhtml
  903. ->getTextContent(
  904. $this->titledots(
  905. $data["title"]
  906. )
  907. ),
  908. "description" => $description,
  909. "author" => [
  910. "name" => null,
  911. "url" => null,
  912. "avatar" => null
  913. ],
  914. "date" => $date,
  915. "duration" =>
  916. (int)$data
  917. ["counters"]
  918. ["toHostingLoaded"]
  919. ["stredParams"]
  920. ["duration"],
  921. "views" => $views,
  922. "thumb" => $thumb,
  923. "url" =>
  924. str_replace(
  925. "http://",
  926. "https://",
  927. $this->fuckhtml
  928. ->getTextContent(
  929. $data["counters"]
  930. ["toHostingLoaded"]
  931. ["postfix"]
  932. ["href"]
  933. ),
  934. $c
  935. )
  936. ];
  937. }
  938. return $out;
  939. }
  940. private function parseviews($text){
  941. $text = explode(" ", $text);
  942. $num = (float)$text[0];
  943. $mod = $text[1];
  944. switch($mod){
  945. case "bln.": $num = $num * 1000000000; break;
  946. case "mln.": $num = $num * 1000000; break;
  947. case "thsd.": $num = $num * 1000; break;
  948. }
  949. return $num;
  950. }
  951. private function titledots($title){
  952. $substr = substr($title, -3);
  953. if(
  954. $substr == "..." ||
  955. $substr == "…"
  956. ){
  957. return trim(substr($title, 0, -3));
  958. }
  959. return trim($title);
  960. }
  961. }