1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054 |
- <?php
- class google_cse{
-
- public const req_html = 0;
- public const req_js = 1;
-
- public function __construct(){
-
- include "lib/backend.php";
- $this->backend = new backend("google_cse");
-
- include "lib/fuckhtml.php";
- $this->fuckhtml = new fuckhtml();
- }
-
- public function getfilters($page){
-
- $base = [
- "country" => [ // gl=<country> (image: cr=countryAF)
- "display" => "Country",
- "option" => [
- "any" => "Any country",
- "af" => "Afghanistan",
- "al" => "Albania",
- "dz" => "Algeria",
- "as" => "American Samoa",
- "ad" => "Andorra",
- "ao" => "Angola",
- "ai" => "Anguilla",
- "aq" => "Antarctica",
- "ag" => "Antigua and Barbuda",
- "ar" => "Argentina",
- "am" => "Armenia",
- "aw" => "Aruba",
- "au" => "Australia",
- "at" => "Austria",
- "az" => "Azerbaijan",
- "bs" => "Bahamas",
- "bh" => "Bahrain",
- "bd" => "Bangladesh",
- "bb" => "Barbados",
- "by" => "Belarus",
- "be" => "Belgium",
- "bz" => "Belize",
- "bj" => "Benin",
- "bm" => "Bermuda",
- "bt" => "Bhutan",
- "bo" => "Bolivia",
- "ba" => "Bosnia and Herzegovina",
- "bw" => "Botswana",
- "bv" => "Bouvet Island",
- "br" => "Brazil",
- "io" => "British Indian Ocean Territory",
- "bn" => "Brunei Darussalam",
- "bg" => "Bulgaria",
- "bf" => "Burkina Faso",
- "bi" => "Burundi",
- "kh" => "Cambodia",
- "cm" => "Cameroon",
- "ca" => "Canada",
- "cv" => "Cape Verde",
- "ky" => "Cayman Islands",
- "cf" => "Central African Republic",
- "td" => "Chad",
- "cl" => "Chile",
- "cn" => "China",
- "cx" => "Christmas Island",
- "cc" => "Cocos (Keeling) Islands",
- "co" => "Colombia",
- "km" => "Comoros",
- "cg" => "Congo",
- "cd" => "Congo, the Democratic Republic",
- "ck" => "Cook Islands",
- "cr" => "Costa Rica",
- "ci" => "Cote D'ivoire",
- "hr" => "Croatia",
- "cu" => "Cuba",
- "cy" => "Cyprus",
- "cz" => "Czech Republic",
- "dk" => "Denmark",
- "dj" => "Djibouti",
- "dm" => "Dominica",
- "do" => "Dominican Republic",
- "ec" => "Ecuador",
- "eg" => "Egypt",
- "sv" => "El Salvador",
- "gq" => "Equatorial Guinea",
- "er" => "Eritrea",
- "ee" => "Estonia",
- "et" => "Ethiopia",
- "fk" => "Falkland Islands (Malvinas)",
- "fo" => "Faroe Islands",
- "fj" => "Fiji",
- "fi" => "Finland",
- "fr" => "France",
- "gf" => "French Guiana",
- "pf" => "French Polynesia",
- "tf" => "French Southern Territories",
- "ga" => "Gabon",
- "gm" => "Gambia",
- "ge" => "Georgia",
- "de" => "Germany",
- "gh" => "Ghana",
- "gi" => "Gibraltar",
- "gr" => "Greece",
- "gl" => "Greenland",
- "gd" => "Grenada",
- "gp" => "Guadeloupe",
- "gu" => "Guam",
- "gt" => "Guatemala",
- "gn" => "Guinea",
- "gw" => "Guinea-Bissau",
- "gy" => "Guyana",
- "ht" => "Haiti",
- "hm" => "Heard Island and Mcdonald Islands",
- "va" => "Holy See (Vatican City State)",
- "hn" => "Honduras",
- "hk" => "Hong Kong",
- "hu" => "Hungary",
- "is" => "Iceland",
- "in" => "India",
- "id" => "Indonesia",
- "ir" => "Iran, Islamic Republic",
- "iq" => "Iraq",
- "ie" => "Ireland",
- "il" => "Israel",
- "it" => "Italy",
- "jm" => "Jamaica",
- "jp" => "Japan",
- "jo" => "Jordan",
- "kz" => "Kazakhstan",
- "ke" => "Kenya",
- "ki" => "Kiribati",
- "kp" => "Korea, Democratic People's Republic",
- "kr" => "Korea, Republic",
- "kw" => "Kuwait",
- "kg" => "Kyrgyzstan",
- "la" => "Lao People's Democratic Republic",
- "lv" => "Latvia",
- "lb" => "Lebanon",
- "ls" => "Lesotho",
- "lr" => "Liberia",
- "ly" => "Libyan Arab Jamahiriya",
- "li" => "Liechtenstein",
- "lt" => "Lithuania",
- "lu" => "Luxembourg",
- "mo" => "Macao",
- "mk" => "Macedonia, the Former Yugosalv Republic",
- "mg" => "Madagascar",
- "mw" => "Malawi",
- "my" => "Malaysia",
- "mv" => "Maldives",
- "ml" => "Mali",
- "mt" => "Malta",
- "mh" => "Marshall Islands",
- "mq" => "Martinique",
- "mr" => "Mauritania",
- "mu" => "Mauritius",
- "yt" => "Mayotte",
- "mx" => "Mexico",
- "fm" => "Micronesia, Federated States",
- "md" => "Moldova, Republic",
- "mc" => "Monaco",
- "mn" => "Mongolia",
- "ms" => "Montserrat",
- "ma" => "Morocco",
- "mz" => "Mozambique",
- "mm" => "Myanmar",
- "na" => "Namibia",
- "nr" => "Nauru",
- "np" => "Nepal",
- "nl" => "Netherlands",
- "an" => "Netherlands Antilles",
- "nc" => "New Caledonia",
- "nz" => "New Zealand",
- "ni" => "Nicaragua",
- "ne" => "Niger",
- "ng" => "Nigeria",
- "nu" => "Niue",
- "nf" => "Norfolk Island",
- "mp" => "Northern Mariana Islands",
- "no" => "Norway",
- "om" => "Oman",
- "pk" => "Pakistan",
- "pw" => "Palau",
- "ps" => "Palestinian Territory, Occupied",
- "pa" => "Panama",
- "pg" => "Papua New Guinea",
- "py" => "Paraguay",
- "pe" => "Peru",
- "ph" => "Philippines",
- "pn" => "Pitcairn",
- "pl" => "Poland",
- "pt" => "Portugal",
- "pr" => "Puerto Rico",
- "qa" => "Qatar",
- "re" => "Reunion",
- "ro" => "Romania",
- "ru" => "Russian Federation",
- "rw" => "Rwanda",
- "sh" => "Saint Helena",
- "kn" => "Saint Kitts and Nevis",
- "lc" => "Saint Lucia",
- "pm" => "Saint Pierre and Miquelon",
- "vc" => "Saint Vincent and the Grenadines",
- "ws" => "Samoa",
- "sm" => "San Marino",
- "st" => "Sao Tome and Principe",
- "sa" => "Saudi Arabia",
- "sn" => "Senegal",
- "cs" => "Serbia and Montenegro",
- "sc" => "Seychelles",
- "sl" => "Sierra Leone",
- "sg" => "Singapore",
- "sk" => "Slovakia",
- "si" => "Slovenia",
- "sb" => "Solomon Islands",
- "so" => "Somalia",
- "za" => "South Africa",
- "gs" => "South Georgia and the South Sandwich Islands",
- "es" => "Spain",
- "lk" => "Sri Lanka",
- "sd" => "Sudan",
- "sr" => "Suriname",
- "sj" => "Svalbard and Jan Mayen",
- "sz" => "Swaziland",
- "se" => "Sweden",
- "ch" => "Switzerland",
- "sy" => "Syrian Arab Republic",
- "tw" => "Taiwan, Province of China",
- "tj" => "Tajikistan",
- "tz" => "Tanzania, United Republic",
- "th" => "Thailand",
- "tl" => "Timor-Leste",
- "tg" => "Togo",
- "tk" => "Tokelau",
- "to" => "Tonga",
- "tt" => "Trinidad and Tobago",
- "tn" => "Tunisia",
- "tr" => "Turkey",
- "tm" => "Turkmenistan",
- "tc" => "Turks and Caicos Islands",
- "tv" => "Tuvalu",
- "ug" => "Uganda",
- "ua" => "Ukraine",
- "ae" => "United Arab Emirates",
- "uk" => "United Kingdom",
- "us" => "United States",
- "um" => "United States Minor Outlying Islands",
- "uy" => "Uruguay",
- "uz" => "Uzbekistan",
- "vu" => "Vanuatu",
- "ve" => "Venezuela",
- "vn" => "Viet Nam",
- "vg" => "Virgin Islands, British",
- "vi" => "Virgin Islands, U.S.",
- "wf" => "Wallis and Futuna",
- "eh" => "Western Sahara",
- "ye" => "Yemen",
- "zm" => "Zambia",
- "zw" => "Zimbabwe"
- ]
- ],
- "nsfw" => [
- "display" => "NSFW",
- "option" => [
- "yes" => "Yes", // safe=active
- "no" => "No" // safe=off
- ]
- ],
- "spellcheck" => [
- // display undefined
- "option" => [
- "yes" => "Yes",
- "no" => "No"
- ]
- ]
- ];
-
- switch($page){
-
- case "web":
- return array_merge(
- $base,
- [
- "lang" => [ // lr=<lang> (prefix lang with "lang_")
- "display" => "Language",
- "option" => [
- "any" => "Any language",
- "ar" => "Arabic",
- "bg" => "Bulgarian",
- "ca" => "Catalan",
- "cs" => "Czech",
- "da" => "Danish",
- "de" => "German",
- "el" => "Greek",
- "en" => "English",
- "es" => "Spanish",
- "et" => "Estonian",
- "fi" => "Finnish",
- "fr" => "French",
- "hr" => "Croatian",
- "hu" => "Hungarian",
- "id" => "Indonesian",
- "is" => "Icelandic",
- "it" => "Italian",
- "iw" => "Hebrew",
- "ja" => "Japanese",
- "ko" => "Korean",
- "lt" => "Lithuanian",
- "lv" => "Latvian",
- "nl" => "Dutch",
- "no" => "Norwegian",
- "pl" => "Polish",
- "pt" => "Portuguese",
- "ro" => "Romanian",
- "ru" => "Russian",
- "sk" => "Slovak",
- "sl" => "Slovenian",
- "sr" => "Serbian",
- "sv" => "Swedish",
- "tr" => "Turkish",
- "zh-CN" => "Chinese (Simplified)",
- "zh-TW" => "Chinese (Traditional)"
- ]
- ],
- "sort" => [
- "display" => "Sort by",
- "option" => [
- "relevance" => "Relevance",
- "date" => "Date"
- ]
- ],
- "redundant" => [
- "display" => "Remove redundant",
- "option" => [
- "yes" => "Yes",
- "no" => "No",
- ]
- ]
- ]
- );
- break;
-
- case "images":
- return array_merge(
- $base,
- [
- "size" => [ // imgsz
- "display" => "Size",
- "option" => [
- "any" => "Any size",
- "l" => "Large",
- "m" => "Medium",
- "i" => "Icon",
- "qsvga" => "Larger than 400x300",
- "vga" => "Larger than 640x480",
- "svga" => "Larger than 800x600",
- "xga" => "Larger than 1024x768",
- "2mp" => "Larger than 2MP",
- "4mp" => "Larger than 4MP",
- "6mp" => "Larger than 6MP",
- "8mp" => "Larger than 8MP",
- "10mp" => "Larger than 10MP",
- "12mp" => "Larger than 12MP",
- "15mp" => "Larger than 15MP",
- "20mp" => "Larger than 20MP",
- "40mp" => "Larger than 40MP",
- "70mp" => "Larger than 70MP"
- ]
- ],
- "color" => [ // imgc
- "display" => "Color",
- "option" => [
- "any" => "Any color",
- "color" => "Full color",
- "bnw" => "Black & white",
- "trans" => "Transparent",
- // from here, imgcolor
- "red" => "Red",
- "orange" => "Orange",
- "yellow" => "Yellow",
- "green" => "Green",
- "teal" => "Teal",
- "blue" => "Blue",
- "purple" => "Purple",
- "pink" => "Pink",
- "white" => "White",
- "gray" => "Gray",
- "black" => "Black",
- "brown" => "Brown"
- ]
- ],
- "format" => [ // as_filetype
- "display" => "Format",
- "option" => [
- "any" => "Any format",
- "jpg" => "JPG",
- "gif" => "GIF",
- "png" => "PNG",
- "bmp" => "BMP",
- "svg" => "SVG",
- "webp" => "WEBP",
- "ico" => "ICO",
- "craw" => "RAW"
- ]
- ]
- ]
- );
- break;
- }
- }
-
- private function get($proxy, $url, $get = [], $reqtype = self::req_js){
-
- $curlproc = curl_init();
-
- if($get !== []){
-
- $get = http_build_query($get);
- $url .= "?" . $get;
- }
-
- curl_setopt($curlproc, CURLOPT_URL, $url);
-
- // http2 bypass
- curl_setopt($curlproc, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_2_0);
-
- curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
-
- if($reqtype === self::req_js){
-
- curl_setopt($curlproc, CURLOPT_HTTPHEADER,
- ["User-Agent: " . config::USER_AGENT,
- "Accept: */*",
- "Accept-Language: en-US,en;q=0.5",
- "Accept-Encoding: gzip",
- "DNT: 1",
- "Sec-GPC: 1",
- "Alt-Used: cse.google.com",
- "Connection: keep-alive",
- "Referer: https://cse.google.com/cse?cx=" . config::GOOGLE_CX_ENDPOINT,
- "Sec-Fetch-Dest: script",
- "Sec-Fetch-Mode: no-cors",
- "Sec-Fetch-Site: same-origin",
- "TE: trailers"]
- );
- }else{
-
- curl_setopt($curlproc, CURLOPT_HTTPHEADER,
- ["User-Agent: " . config::USER_AGENT,
- "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/png,image/svg+xml,*/*;q=0.8",
- "Accept-Language: en-US,en;q=0.5",
- "Accept-Encoding: gzip",
- "DNT: 1",
- "Sec-GPC: 1",
- "Connection: keep-alive",
- "Upgrade-Insecure-Requests: 1",
- "Sec-Fetch-Dest: document",
- "Sec-Fetch-Mode: navigate",
- "Sec-Fetch-Site: none",
- "Sec-Fetch-User: ?1",
- "Priority: u=0, i"]
- );
- }
-
- curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
- curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
- curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
- curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
- curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
-
- $this->backend->assign_proxy($curlproc, $proxy);
-
- $data = curl_exec($curlproc);
-
- if(curl_errno($curlproc)){
-
- throw new Exception(curl_error($curlproc));
- }
-
- curl_close($curlproc);
- return $data;
- }
-
- public function web($get){
-
- // page 1
- // https://cse.google.com/cse/element/v1?rsz=filtered_cse&num=10&hl=en&source=gcsc&cselibv=8fa85d58e016b414&cx=d4e68b99b876541f0&q=asmr&safe=active&cse_tok=AB-tC_6RPUTmB4XK0lE9e1AFFC5r%3A1729563832926&lr=&cr=&gl=&filter=0&sort=&as_oq=&as_sitesearch=&exp=cc%2Capo&oq=asmr&gs_l=partner-web.3..0i512i433j0i512i433i131l2j0i512i433j0i512i433i131j0i512i433j0i512i433i131l2j0i512l2.10902.266627.5.267157.11.10.0.0.0.0.188.1108.2j7.9.0.csems%2Cnrl%3D10...0....1.34.partner-web..42.14.1500.WJQvMvfXkx4&cseclient=hosted-page-client&callback=google.search.cse.api8223&rurl=https%3A%2F%2Fcse.google.com%2Fcse%3Fcx%3Dd4e68b99b876541f0%23gsc.tab%3D0%26gsc.q%3Dtest%26gsc.sort%3D
-
- // page 2
- // https://cse.google.com/cse/element/v1?rsz=filtered_cse&num=10&hl=en&source=gcsc&start=10&cselibv=8fa85d58e016b414&cx=d4e68b99b876541f0&q=asmr&safe=active&cse_tok=AB-tC_6RPUTmB4XK0lE9e1AFFC5r%3A1729563832926&lr=&cr=&gl=&filter=0&sort=&as_oq=&as_sitesearch=&exp=cc%2Capo&callback=google.search.cse.api3595&rurl=https%3A%2F%2Fcse.google.com%2Fcse%3Fcx%3Dd4e68b99b876541f0%23gsc.tab%3D0%26gsc.q%3Dtest%26gsc.sort%3D
-
- if($get["npt"]){
-
- [$req_params, $proxy] =
- $this->backend->get(
- $get["npt"],
- "web"
- );
-
- $req_params =
- json_decode(
- $req_params,
- true
- );
-
- $json =
- $this->get(
- $proxy,
- "https://cse.google.com/cse/element/v1",
- $req_params,
- self::req_js
- );
-
- }else{
-
- $proxy = $this->backend->get_ip();
- $params = $this->generate_token($proxy);
-
- //$json = file_get_contents("scraper/google_cse.txt");
- $req_params = [
- "rsz" => "filtered_cse",
- "num" => 20,
- "hl" => "en",
- "source" => "gcsc",
- "cselibv" => $params["lib"],
- "cx" => config::GOOGLE_CX_ENDPOINT,
- "q" => $get["s"],
- "safe" => $get["nsfw"] == "yes" ? "off" : "active",
- "cse_tok" => $params["token"],
- "lr" => $get["lang"] == "any" ? "" : "lang_" . $get["lang"],
- "cr" => $get["country"] == "any" ? "" : "country" . strtoupper($get["country"]),
- "gl" => "",
- "filter" => $get["redundant"] == "yes" ? "1" : "0",
- "sort" => $get["sort"] == "relevance" ? "" : "date",
- "as_oq" => "",
- "as_sitesearch" => "",
- "exp" => "cc,apo",
- "oq" => $get["s"],
- "gs_l" => "partner-web.3...33294.34225.3.34597.26.11.0.0.0.0.201.1132.6j4j1.11.0.csems,nrl=10...0....1.34.partner-web..34.19.1897.FKEeG5yh2iw",
- "cseclient" => "hosted-page-client",
- "callback" => "google.search.cse.api" . random_int(4000, 99999),
- "rurl" => "https://cse.google.com/cse?cx=" . config::GOOGLE_CX_ENDPOINT . "#gsc.tab=0&gsc.q=" . $get["s"] . "&gsc.sort="
- ];
-
- if($get["spellcheck"] == "no"){
-
- $req_params["nfpr"] = "1";
- }
-
- $json =
- $this->get(
- $proxy,
- "https://cse.google.com/cse/element/v1",
- $req_params,
- self::req_js
- );
-
- unset($req_params["gs_l"]);
- $req_params["start"] = 0;
- }
-
- $req_params["start"] += 20;
-
- if(
- !preg_match(
- '/google\.search\.cse\.[A-Za-z0-9]+\(([\S\s]*)\);/i',
- $json,
- $json
- )
- ){
-
- throw new Exception("Failed to grep JSON");
- }
-
- $json = json_decode($json[1], true);
-
- if(isset($json["error"])){
-
- if(isset($json["error"]["errors"][0]["message"])){
-
- throw new Exception("Google returned an error: " . $json["error"]["errors"][0]["message"]);
- }
-
- if(isset($json["error"]["message"])){
-
- throw new Exception("Google returned an error: " . $json["error"]["message"]);
- }
-
- throw new Exception("Google returned an error object");
- }
-
- $out = [
- "status" => "ok",
- "spelling" => [
- "type" => "no_correction",
- "using" => null,
- "correction" => null
- ],
- "npt" => null,
- "answer" => [],
- "web" => [],
- "image" => [],
- "video" => [],
- "news" => [],
- "related" => []
- ];
-
- // detect word correction
- if(isset($json["spelling"]["type"])){
-
- switch($json["spelling"]["type"]){
-
- case "DYM": // did you mean? @TODO fix wording
- $type = "including";
- break;
-
- case "SPELL_CORRECTED_RESULTS": // not many results for
- $type = "not_many";
- break;
-
- default:
- $type = "not_many";
- }
-
- if(isset($json["spelling"]["originalQuery"])){
-
- $using = $json["spelling"]["originalQuery"];
- }
- elseif(isset($json["spelling"]["anchor"])){
-
- $using = html_entity_decode(strip_tags($json["spelling"]["anchor"]));
- }elseif(isset($json["spelling"]["originalAnchor"])){
-
- $using = html_entity_decode(strip_tags($json["spelling"]["originalAnchor"]));
- }
-
- $out["spelling"] = [
- "type" => $type,
- "using" => $using,
- "correction" => $json["spelling"]["correctedQuery"]
- ];
- }
-
- if(!isset($json["results"])){
-
- return $out;
- }
-
- foreach($json["results"] as $result){
-
- // get date from description
- $description =
- explode(
- "...",
- trim($result["contentNoFormatting"], " ."),
- 2
- );
-
- if(count($description) === 2){
-
- if($date = strtotime($description[0])){
-
- $description = ltrim($description[1]);
- }else{
-
- $date = null;
- $description = implode("...", $description);
- }
- }else{
-
- $description = implode("...", $description);
- $date = null;
- }
-
- $description = trim($description, " .");
-
- // get thumbnails
- if(isset($result["richSnippet"]["cseThumbnail"]["src"])){
-
- $thumb = [
- "url" => $this->unshit_thumb($result["richSnippet"]["cseThumbnail"]["src"]),
- "ratio" => "1:1"
- ];
- }
- elseif(isset($result["richSnippet"]["cseImage"]["src"])){
-
- $thumb = [
- "url" => $result["richSnippet"]["cseImage"]["src"],
- "ratio" => "1:1"
- ];
- }else{
-
- $thumb = [
- "url" => null,
- "ratio" => null
- ];
- }
-
- if($thumb["url"] !== null){
-
- $found_size = false;
-
- // find correct ratio
-
- if(
- isset($result["richSnippet"]["cseThumbnail"]["width"]) &&
- isset($result["richSnippet"]["cseThumbnail"]["height"])
- ){
- $found_size = true;
- $width = (int)$result["richSnippet"]["cseThumbnail"]["width"];
- $height = (int)$result["richSnippet"]["cseThumbnail"]["height"];
- }
- elseif(
- isset($result["richSnippet"]["metatags"]["ogImageWidth"]) &&
- isset($result["richSnippet"]["metatags"]["ogImageHeight"])
- ){
- $found_size = true;
- $width = (int)$result["richSnippet"]["metatags"]["ogImageWidth"];
- $height = (int)$result["richSnippet"]["metatags"]["ogImageHeight"];
- }
-
- // calculate rounded ratio
- if($found_size){
-
- $aspect_ratio = $width / $height;
-
- if($aspect_ratio >= 1.5){
-
- $thumb["ratio"] = "16:9";
- }
- elseif($aspect_ratio >= 0.8){
-
- $thumb["ratio"] = "1:1";
- }else{
-
- $thumb["ratio"] = "9:16";
- }
- }
- }
-
- $out["web"][] = [
- "title" => rtrim($result["titleNoFormatting"], " ."),
- "description" => $description,
- "url" => $result["unescapedUrl"],
- "date" => $date,
- "type" => "web",
- "thumb" => $thumb,
- "sublink" => [],
- "table" => []
- ];
- }
-
- // detect next page
- if(
- isset($json["cursor"]["isExactTotalResults"]) || // detects last page
- !isset($json["cursor"]["pages"]) // detects no results on page
- ){
-
- return $out;
- }
-
- // get next page
- $out["npt"] =
- $this->backend->store(
- json_encode(
- $req_params
- ),
- "web",
- $proxy
- );
-
- return $out;
- }
-
- public function image($get){
-
- if($get["npt"]){
-
- [$req_params, $proxy] =
- $this->backend->get(
- $get["npt"],
- "images"
- );
-
- $req_params =
- json_decode(
- $req_params,
- true
- );
-
- $json =
- $this->get(
- $proxy,
- "https://cse.google.com/cse/element/v1",
- $req_params,
- self::req_js
- );
-
- }else{
-
- $proxy = $this->backend->get_ip();
- $params = $this->generate_token($proxy);
-
- //$json = file_get_contents("scraper/google_cse.txt");
- $req_params = [
- "rsz" => "filtered_cse",
- "num" => 20,
- "hl" => "en",
- "source" => "gcsc",
- "cselibv" => $params["lib"],
- "searchtype" => "image",
- "cx" => config::GOOGLE_CX_ENDPOINT,
- "q" => $get["s"],
- "safe" => $get["nsfw"] == "yes" ? "off" : "active",
- "cse_tok" => $params["token"],
- "exp" => "cc,apo",
- "cseclient" => "hosted-page-client",
- "callback" => "google.search.cse.api" . random_int(4000, 99999),
- "rurl" => "https://cse.google.com/cse?cx=" . config::GOOGLE_CX_ENDPOINT . "#gsc.tab=1&gsc.q=" . $get["s"] . "&gsc.sort="
- ];
-
- // add additional hidden filters
-
- // country (image search uses cr instead of gl)
- if($get["country"] != "any"){
-
- $req_params["cr"] = "country" . strtoupper($get["country"]);
- }
-
- // nsfw
- $req_params["safe"] = $get["nsfw"] == "yes" ? "off" : "active";
-
- // size
- if($get["size"] != "any"){
-
- $req_params["imgsz"] = $get["size"];
- }
-
- // format
- if($get["format"] != "any"){
-
- $req_params["as_filetype"] = $get["format"];
- }
-
- // color
- if($get["color"] != "any"){
-
- if(
- $get["color"] == "color" ||
- $get["color"] == "trans"
- ){
-
- $req_params["imgc"] = $get["color"];
- }elseif($get["color"] == "bnw"){
-
- $req_params["imgc"] = "gray";
- }else{
-
- $req_params["imgcolor"] = $get["color"];
- }
- }
-
- $json =
- $this->get(
- $proxy,
- "https://cse.google.com/cse/element/v1",
- $req_params,
- self::req_js
- );
-
- $req_params["start"] = 0;
- }
-
- $req_params["start"] += 20;
-
- if(
- !preg_match(
- '/google\.search\.cse\.[A-Za-z0-9]+\(([\S\s]*)\);/i',
- $json,
- $json
- )
- ){
-
- throw new Exception("Failed to grep JSON");
- }
-
- $json = json_decode($json[1], true);
-
- if(isset($json["error"])){
-
- if(isset($json["error"]["errors"][0]["message"])){
-
- throw new Exception("Google returned an error: " . $json["error"]["errors"][0]["message"]);
- }
-
- if(isset($json["error"]["message"])){
-
- throw new Exception("Google returned an error: " . $json["error"]["message"]);
- }
-
- throw new Exception("Google returned an error object");
- }
-
- $out = [
- "status" => "ok",
- "npt" => null,
- "image" => []
- ];
-
- // detect next page
- if(
- isset($json["cursor"]["isExactTotalResults"]) || // detects last page
- !isset($json["cursor"]["pages"]) // detects no results on page
- ){
-
- return $out;
- }
-
- foreach($json["results"] as $result){
-
- $out["image"][] = [
- "title" => rtrim($result["titleNoFormatting"], " ."),
- "source" => [
- [
- "url" => $result["unescapedUrl"],
- "width" => (int)$result["width"],
- "height" => (int)$result["height"]
- ],
- [
- "url" => $result["tbLargeUrl"],
- "width" => (int)$result["tbLargeWidth"],
- "height" => (int)$result["tbLargeHeight"]
- ]
- ],
- "url" => $result["originalContextUrl"]
- ];
- }
-
- // get next page
- $out["npt"] =
- $this->backend->store(
- json_encode(
- $req_params
- ),
- "images",
- $proxy
- );
-
- return $out;
- }
-
- private function generate_token($proxy){
-
- $html =
- $this->get(
- $proxy,
- "https://cse.google.com/cse",
- [
- "cx" => config::GOOGLE_CX_ENDPOINT
- ],
- self::req_html
- );
-
- // detect captcha
- $this->fuckhtml->load($html);
-
- $title =
- $this->fuckhtml
- ->getElementsByTagName(
- "title"
- );
-
- if(
- count($title) !== 0 &&
- $title[0]["innerHTML"] == "302 Moved"
- ){
-
- throw new Exception("Google returned a captcha");
- }
-
- // get token
- preg_match(
- '/relativeUrl=\'([^\']+)\';/i',
- $html,
- $js_uri
- );
-
- if(!isset($js_uri[1])){
-
- throw new Exception("Failed to grep search token");
- }
-
- $js_uri =
- $this->fuckhtml
- ->parseJsString(
- $js_uri[1]
- );
-
- // get parameters
- $js =
- $this->get(
- $proxy,
- "https://cse.google.com" . $js_uri,
- [],
- self::req_js
- );
-
- preg_match(
- '/}\)\(({[\S\s]+})\);/',
- $js,
- $json
- );
-
- if(!isset($json[1])){
-
- throw new Exception("Failed to grep JSON parameters");
- }
-
- $json = json_decode($json[1], true);
-
- return [
- "token" => $json["cse_token"],
- "lib" => $json["cselibVersion"]
- ];
- }
-
- private function unshit_thumb($url){
- // https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcQINE2vbnNLHXqoZr3RVsaEJFyOsj1_BiBnJch-e1nyz3oia7Aj5xVj
- // https://i.ytimg.com/vi/PZVIyA5ER3Y/mqdefault.jpg?sqp=-oaymwEFCJQBEFM&rs=AMzJL3nXeaCpdIar-ltNwl82Y82cIJfphA
-
- $parts = parse_url($url);
-
- if(
- isset($parts["host"]) &&
- preg_match(
- '/tbn.*\.gstatic\.com/',
- $parts["host"]
- )
- ){
-
- parse_str($parts["query"], $params);
-
- if(isset($params["q"])){
-
- return "https://" . $parts["host"] . "/images?q=" . $params["q"];
- }
- }
-
- return $url;
- }
- }
|