123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435 |
- <?php
- class greppr{
-
- public function __construct(){
-
- include "lib/backend.php";
- $this->backend = new backend("greppr");
-
- include "lib/fuckhtml.php";
- $this->fuckhtml = new fuckhtml();
- }
-
- public function getfilters($page){
-
- return [];
- }
-
- private function get($proxy, $url, $get = [], $cookie = false){
-
- $curlproc = curl_init();
-
- if($get !== []){
- $get = http_build_query($get);
- $url .= "?" . $get;
- }
-
- curl_setopt($curlproc, CURLOPT_URL, $url);
-
- curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
-
- if($cookie === false){
-
- curl_setopt($curlproc, CURLOPT_HTTPHEADER,
- ["User-Agent: " . config::USER_AGENT,
- "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
- "Accept-Language: en-US,en;q=0.5",
- "Accept-Encoding: gzip",
- "DNT: 1",
- "Connection: keep-alive",
- "Upgrade-Insecure-Requests: 1",
- "Sec-Fetch-Dest: document",
- "Sec-Fetch-Mode: navigate",
- "Sec-Fetch-Site: none",
- "Sec-Fetch-User: ?1"]
- );
- }else{
-
- curl_setopt($curlproc, CURLOPT_HTTPHEADER,
- ["User-Agent: " . config::USER_AGENT,
- "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
- "Accept-Language: en-US,en;q=0.5",
- "Accept-Encoding: gzip",
- "Cookie: PHPSESSID=" . $cookie,
- "DNT: 1",
- "Connection: keep-alive",
- "Upgrade-Insecure-Requests: 1",
- "Sec-Fetch-Dest: document",
- "Sec-Fetch-Mode: navigate",
- "Sec-Fetch-Site: none",
- "Sec-Fetch-User: ?1"]
- );
- }
-
- curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
- curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
- curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
- curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
- curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
-
- $this->backend->assign_proxy($curlproc, $proxy);
-
- $headers = [];
-
- curl_setopt(
- $curlproc,
- CURLOPT_HEADERFUNCTION,
- function($curlproc, $header) use (&$headers){
-
- $len = strlen($header);
- $header = explode(':', $header, 2);
-
- if(count($header) < 2){
-
- // ignore invalid headers
- return $len;
- }
-
- $headers[strtolower(trim($header[0]))] = trim($header[1]);
- return $len;
- }
- );
-
- $data = curl_exec($curlproc);
-
- if(curl_errno($curlproc)){
-
- throw new Exception(curl_error($curlproc));
- }
-
- curl_close($curlproc);
-
- return [
- "headers" => $headers,
- "data" => $data
- ];
- }
-
- public function web($get, $first_attempt = true){
-
- if($get["npt"]){
-
- [$q, $proxy] = $this->backend->get($get["npt"], "web");
-
- $q = json_decode($q, true);
-
- }else{
-
- $search = $get["s"];
- if(strlen($search) === 0){
-
- throw new Exception("Search term is empty!");
- }
-
- $proxy = $this->backend->get_ip();
- }
-
- // get token
- // token[0] = static token that changes once a day
- // token[1] = dynamic token that changes on every request
- // token[1] = PHPSESSID cookie
- $tokens = apcu_fetch("greppr_token");
-
- if(
- $tokens === false ||
- $first_attempt === false // force token fetch
- ){
-
- // we haven't gotten the token yet, get it
- try{
-
- $response =
- $this->get(
- $proxy,
- "https://greppr.org",
- []
- );
- }catch(Exception $error){
-
- throw new Exception("Failed to fetch search tokens");
- }
-
- $tokens = $this->parse_token($response);
-
- if($tokens === false){
-
- throw new Exception("Failed to grep search tokens");
- }
- }
-
- try{
-
- if($get["npt"]){
-
- $params = [
- $tokens[0] => $q["q"],
- "s" => $q["s"],
- "l" => 30,
- "n" => $tokens[1]
- ];
- }else{
-
- $params = [
- $tokens[0] => $search,
- "n" => $tokens[1]
- ];
- }
-
- $searchresults = $this->get(
- $proxy,
- "https://greppr.org/search",
- $params,
- $tokens[2]
- );
- }catch(Exception $error){
-
- throw new Exception("Failed to fetch search page");
- }
-
- if(strlen($searchresults["data"]) === 0){
-
- // redirected to main page, which means we got old token
- // generate a new one
-
- // ... unless we just tried to do that
- if($first_attempt === false){
-
- throw new Exception("Failed to get a new search token");
- }
-
- return $this->web($get, false);
- }
-
- // refresh the token with new data (this also triggers fuckhtml load)
- $this->parse_token($searchresults, $tokens[2]);
-
- // response object
- $out = [
- "status" => "ok",
- "spelling" => [
- "type" => "no_correction",
- "using" => null,
- "correction" => null
- ],
- "npt" => null,
- "answer" => [],
- "web" => [],
- "image" => [],
- "video" => [],
- "news" => [],
- "related" => []
- ];
-
- // get results for later
- $results =
- $this->fuckhtml
- ->getElementsByClassName(
- "result",
- "div"
- );
-
- // check for next page
- $next_elem =
- $this->fuckhtml
- ->getElementsByClassName(
- "pagination",
- "ul"
- );
-
- if(count($next_elem) !== 0){
-
- $this->fuckhtml->load($next_elem[0]);
-
- $as =
- $this->fuckhtml
- ->getElementsByClassName(
- "page-link",
- "a"
- );
-
- $break = false;
- foreach($as as $a){
-
- if($break === true){
-
- parse_str(
- $this->fuckhtml
- ->getTextContent(
- $a["attributes"]["href"]
- ),
- $values
- );
-
- $values = array_values($values);
-
- $out["npt"] =
- $this->backend->store(
- json_encode(
- [
- "q" => $values[0],
- "s" => $values[1]
- ]
- ),
- "web",
- $proxy
- );
- break;
- }
-
- if($a["attributes"]["href"] == "#"){
-
- $break = true;
- }
- }
- }
-
- // scrape results
- foreach($results as $result){
-
- $this->fuckhtml->load($result);
-
- $a =
- $this->fuckhtml
- ->getElementsByTagName(
- "a"
- )[0];
-
- $description =
- $this->fuckhtml
- ->getElementsByClassName(
- "highlightedDesc",
- "p"
- );
-
- if(count($description) === 0){
-
- $description = null;
- }else{
-
- $description =
- $this->limitstrlen(
- $this->fuckhtml
- ->getTextContent(
- $description[0]
- )
- );
- }
-
- $date =
- $this->fuckhtml
- ->getElementsByTagName(
- "p"
- );
-
- $date =
- strtotime(
- explode(
- ":",
- $this->fuckhtml
- ->getTextContent(
- $date[count($date) - 1]["innerHTML"]
- )
- )[1]
- );
-
- $out["web"][] = [
- "title" =>
- $this->fuckhtml
- ->getTextContent(
- $a["innerHTML"]
- ),
- "description" => $description,
- "url" =>
- $this->fuckhtml
- ->getTextContent(
- $a["attributes"]["href"]
- ),
- "date" => $date,
- "type" => "web",
- "thumb" => [
- "url" => null,
- "ratio" => null
- ],
- "sublink" => [],
- "table" => []
- ];
- }
-
- return $out;
- }
-
- private function parse_token($response, $cookie = false){
-
- $this->fuckhtml->load($response["data"]);
- $scripts =
- $this->fuckhtml
- ->getElementsByTagName("script");
-
- $found = false;
- foreach($scripts as $script){
-
- preg_match(
- '/window\.location ?= ?\'\/search\?([^=]+).*&n=([0-9]+)/',
- $script["innerHTML"],
- $tokens
- );
-
- if(isset($tokens[1])){
-
- $found = true;
- break;
- }
- }
-
- if($found === false){
-
- return false;
- }
-
- $tokens = [
- $tokens[1],
- $tokens[2]
- ];
-
- if($cookie !== false){
-
- // we already specified a cookie, so use the one we have already
- $tokens[] = $cookie;
- apcu_store("greppr_token", $tokens);
-
- return $tokens;
- }
-
- if(!isset($response["headers"]["set-cookie"])){
-
- // server didn't send a cookie
- return false;
- }
-
- // get cookie
- preg_match(
- '/PHPSESSID=([^;]+)/',
- $response["headers"]["set-cookie"],
- $cookie
- );
-
- if(!isset($cookie[1])){
-
- // server sent an unexpected cookie
- return false;
- }
-
- $tokens[] = $cookie[1];
- apcu_store("greppr_token", $tokens);
-
- return $tokens;
- }
-
- private function limitstrlen($text){
-
- return explode("\n", wordwrap($text, 300, "\n"))[0];
- }
- }
|