1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174 |
- <?php
- class mojeek{
- public function __construct(){
-
- include "lib/fuckhtml.php";
- $this->fuckhtml = new fuckhtml();
-
- include "lib/backend.php";
- $this->backend = new backend("mojeek");
- }
-
- public function getfilters($page){
-
- switch($page){
-
- case "web":
- return [
- "focus" => [
- "display" => "Focus",
- "option" => [
- "any" => "No focus",
- "blogs" => "Blogs",
- "Dictionary" => "Dictionary",
- "Recipes" => "Recipes",
- "Time" => "Time",
- "Weather" => "Weather"
- ]
- ],
- "lang" => [
- "display" => "Language",
- "option" => [
- "any" => "Any language",
- "af" => "Afrikaans",
- "sq" => "Albanian",
- "an" => "Aragonese",
- "ay" => "Aymara",
- "bi" => "Bislama",
- "br" => "Breton",
- "ca" => "Catalan",
- "kw" => "Cornish",
- "co" => "Corsican",
- "hr" => "Croatian",
- "da" => "Danish",
- "nl" => "Dutch",
- "dz" => "Dzongkha",
- "en" => "English",
- "fj" => "Fijian",
- "fi" => "Finnish",
- "fr" => "French",
- "gd" => "Gaelic",
- "gl" => "Galician",
- "de" => "German",
- "ht" => "Haitian",
- "io" => "Ido",
- "id" => "Indonesian",
- "ia" => "Interlingua",
- "ie" => "Interlingue",
- "ga" => "Irish",
- "it" => "Italian",
- "rw" => "Kinyarwanda",
- "la" => "Latin",
- "li" => "Limburgish",
- "lb" => "Luxembourgish",
- "no" => "Norwegian",
- "nb" => "Norwegian Bokmål",
- "nn" => "Norwegian Nynorsk",
- "oc" => "Occitan (post 1500)",
- "pl" => "Polish",
- "pt" => "Portuguese",
- "rm" => "Romansh",
- "rn" => "Rundi",
- "sg" => "Sango",
- "so" => "Somali",
- "es" => "Spanish",
- "sw" => "Swahili",
- "ss" => "Swati",
- "sv" => "Swedish",
- "ty" => "Tahitian",
- "to" => "Tonga (Tonga Islands)",
- "ts" => "Tsonga",
- "vo" => "Volapük",
- "wa" => "Walloon",
- "cy" => "Welsh",
- "xh" => "Xhosa",
- "zu" => "Zulu"
- ]
- ],
- "country" => [
- "display" => "Country",
- "option" => [
- "any" => "No location bias",
- "af" => "Afghanistan",
- "ax" => "Åland Islands",
- "al" => "Albania",
- "dz" => "Algeria",
- "as" => "American Samoa",
- "ad" => "Andorra",
- "ao" => "Angola",
- "ai" => "Anguilla",
- "aq" => "Antarctica",
- "ag" => "Antigua and Barbuda",
- "ar" => "Argentina",
- "am" => "Armenia",
- "aw" => "Aruba",
- "au" => "Australia",
- "at" => "Austria",
- "az" => "Azerbaijan",
- "bs" => "Bahamas",
- "bh" => "Bahrain",
- "bd" => "Bangladesh",
- "bb" => "Barbados",
- "by" => "Belarus",
- "be" => "Belgium",
- "bz" => "Belize",
- "bj" => "Benin",
- "bm" => "Bermuda",
- "bt" => "Bhutan",
- "bo" => "Bolivia (Plurinational State of)",
- "bq" => "Bonaire, Sint Eustatius and Saba",
- "ba" => "Bosnia and Herzegovina",
- "bw" => "Botswana",
- "bv" => "Bouvet Island",
- "br" => "Brazil",
- "io" => "British Indian Ocean Territory",
- "bn" => "Brunei Darussalam",
- "bg" => "Bulgaria",
- "bf" => "Burkina Faso",
- "bi" => "Burundi",
- "cv" => "Cabo Verde",
- "kh" => "Cambodia",
- "cm" => "Cameroon",
- "ca" => "Canada",
- "ky" => "Cayman Islands",
- "cf" => "Central African Republic",
- "td" => "Chad",
- "cl" => "Chile",
- "cn" => "China",
- "cx" => "Christmas Island",
- "cc" => "Cocos (Keeling) Islands",
- "co" => "Colombia",
- "km" => "Comoros",
- "cg" => "Congo",
- "cd" => "Congo (Democratic Republic of the)",
- "ck" => "Cook Islands",
- "cr" => "Costa Rica",
- "ci" => "Côte d'Ivoire",
- "hr" => "Croatia",
- "cu" => "Cuba",
- "cw" => "Curaçao",
- "cy" => "Cyprus",
- "cz" => "Czechia",
- "dk" => "Denmark",
- "dj" => "Djibouti",
- "dm" => "Dominica",
- "do" => "Dominican Republic",
- "ec" => "Ecuador",
- "eg" => "Egypt",
- "sv" => "El Salvador",
- "gq" => "Equatorial Guinea",
- "er" => "Eritrea",
- "ee" => "Estonia",
- "et" => "Ethiopia",
- "fk" => "Falkland Islands (Malvinas)",
- "fo" => "Faroe Islands",
- "fj" => "Fiji",
- "fi" => "Finland",
- "fr" => "France",
- "gf" => "French Guiana",
- "pf" => "French Polynesia",
- "tf" => "French Southern Territories",
- "ga" => "Gabon",
- "gm" => "Gambia",
- "ge" => "Georgia",
- "de" => "Germany",
- "gh" => "Ghana",
- "gi" => "Gibraltar",
- "gr" => "Greece",
- "gl" => "Greenland",
- "gd" => "Grenada",
- "gp" => "Guadeloupe",
- "gu" => "Guam",
- "gt" => "Guatemala",
- "gg" => "Guernsey",
- "gn" => "Guinea",
- "gw" => "Guinea-Bissau",
- "gy" => "Guyana",
- "ht" => "Haiti",
- "hm" => "Heard Island and McDonald Islands",
- "va" => "Holy See",
- "hn" => "Honduras",
- "hk" => "Hong Kong",
- "hu" => "Hungary",
- "is" => "Iceland",
- "in" => "India",
- "id" => "Indonesia",
- "ir" => "Iran (Islamic Republic of)",
- "iq" => "Iraq",
- "ie" => "Ireland",
- "im" => "Isle of Man",
- "il" => "Israel",
- "it" => "Italy",
- "jm" => "Jamaica",
- "jp" => "Japan",
- "je" => "Jersey",
- "jo" => "Jordan",
- "kz" => "Kazakhstan",
- "ke" => "Kenya",
- "ki" => "Kiribati",
- "kp" => "Korea (Democratic People's Republic of)",
- "kr" => "Korea (Republic of)",
- "kw" => "Kuwait",
- "kg" => "Kyrgyzstan",
- "la" => "Lao People's Democratic Republic",
- "lv" => "Latvia",
- "lb" => "Lebanon",
- "ls" => "Lesotho",
- "lr" => "Liberia",
- "ly" => "Libya",
- "li" => "Liechtenstein",
- "lt" => "Lithuania",
- "lu" => "Luxembourg",
- "mo" => "Macao",
- "mk" => "Macedonia (the former Yugoslav Republic of)",
- "mg" => "Madagascar",
- "mw" => "Malawi",
- "my" => "Malaysia",
- "mv" => "Maldives",
- "ml" => "Mali",
- "mt" => "Malta",
- "mh" => "Marshall Islands",
- "mq" => "Martinique",
- "mr" => "Mauritania",
- "mu" => "Mauritius",
- "yt" => "Mayotte",
- "mx" => "Mexico",
- "fm" => "Micronesia (Federated States of)",
- "md" => "Moldova (Republic of)",
- "mc" => "Monaco",
- "mn" => "Mongolia",
- "me" => "Montenegro",
- "ms" => "Montserrat",
- "ma" => "Morocco",
- "mz" => "Mozambique",
- "mm" => "Myanmar",
- "na" => "Namibia",
- "nr" => "Nauru",
- "np" => "Nepal",
- "nl" => "Netherlands",
- "nc" => "New Caledonia",
- "nz" => "New Zealand",
- "ni" => "Nicaragua",
- "ne" => "Niger",
- "ng" => "Nigeria",
- "nu" => "Niue",
- "nf" => "Norfolk Island",
- "mp" => "Northern Mariana Islands",
- "no" => "Norway",
- "om" => "Oman",
- "pk" => "Pakistan",
- "pw" => "Palau",
- "ps" => "Palestine, State of",
- "pa" => "Panama",
- "pg" => "Papua New Guinea",
- "py" => "Paraguay",
- "pe" => "Peru",
- "ph" => "Philippines",
- "pn" => "Pitcairn",
- "pl" => "Poland",
- "pt" => "Portugal",
- "pr" => "Puerto Rico",
- "qa" => "Qatar",
- "re" => "Réunion",
- "ro" => "Romania",
- "ru" => "Russian Federation",
- "rw" => "Rwanda",
- "bl" => "Saint Barthélemy",
- "sh" => "Saint Helena, Ascension and Tristan da Cunha",
- "kn" => "Saint Kitts and Nevis",
- "lc" => "Saint Lucia",
- "mf" => "Saint Martin (French part)",
- "pm" => "Saint Pierre and Miquelon",
- "vc" => "Saint Vincent and the Grenadines",
- "ws" => "Samoa",
- "sm" => "San Marino",
- "st" => "Sao Tome and Principe",
- "sa" => "Saudi Arabia",
- "sn" => "Senegal",
- "rs" => "Serbia",
- "sc" => "Seychelles",
- "sl" => "Sierra Leone",
- "sg" => "Singapore",
- "sx" => "Sint Maarten (Dutch part)",
- "sk" => "Slovakia",
- "si" => "Slovenia",
- "sb" => "Solomon Islands",
- "so" => "Somalia",
- "za" => "South Africa",
- "gs" => "South Georgia and South Sandwich Islands",
- "ss" => "South Sudan",
- "es" => "Spain",
- "lk" => "Sri Lanka",
- "sd" => "Sudan",
- "sr" => "Suriname",
- "sj" => "Svalbard and Jan Mayen",
- "sz" => "Swaziland",
- "se" => "Sweden",
- "ch" => "Switzerland",
- "sy" => "Syrian Arab Republic",
- "tw" => "Taiwan",
- "tj" => "Tajikistan",
- "tz" => "Tanzania, United Republic of",
- "th" => "Thailand",
- "tl" => "Timor-Leste",
- "tg" => "Togo",
- "tk" => "Tokelau",
- "to" => "Tonga",
- "tt" => "Trinidad and Tobago",
- "tn" => "Tunisia",
- "tr" => "Turkey",
- "tm" => "Turkmenistan",
- "tc" => "Turks and Caicos Islands",
- "tv" => "Tuvalu",
- "ug" => "Uganda",
- "ua" => "Ukraine",
- "ae" => "United Arab Emirates",
- "gb" => "United Kingdom",
- "us" => "United States of America",
- "um" => "United States Minor Outlying Islands",
- "uy" => "Uruguay",
- "uz" => "Uzbekistan",
- "vu" => "Vanuatu",
- "ve" => "Venezuela (Bolivarian Republic of)",
- "vn" => "Viet Nam",
- "vg" => "Virgin Islands (British)",
- "vi" => "Virgin Islands (U.S.)",
- "wf" => "Wallis and Futuna",
- "eh" => "Western Sahara",
- "ye" => "Yemen",
- "zm" => "Zambia",
- "zw" => "Zimbabwe"
- ]
- ],
- "region" => [
- "display" => "Region",
- "option" => [
- "any" => "Any region",
- "eu" => "European Union",
- "de" => "Germany",
- "fr" => "France",
- "uk" => "United Kingdom"
- ]
- ],
- "domain" => [
- "display" => "Results per domain",
- "option" => [
- "1" => "1 result",
- "2" => "2 results",
- "3" => "3 results",
- "4" => "4 results",
- "5" => "5 results",
- "10" => "10 results",
- "0" => "Unlimited",
- ]
- ]
- ];
- break;
-
- case "news":
- return [];
- }
- }
-
- private function get($proxy, $url, $get = []){
-
- $headers = [
- "User-Agent: " . config::USER_AGENT,
- "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
- "Accept-Language: en-US,en;q=0.5",
- "Accept-Encoding: gzip",
- "DNT: 1",
- "Connection: keep-alive",
- "Upgrade-Insecure-Requests: 1",
- "Sec-Fetch-Dest: document",
- "Sec-Fetch-Mode: navigate",
- "Sec-Fetch-Site: none",
- "Sec-Fetch-User: ?1"
- ];
-
- $curlproc = curl_init();
-
- if($get !== []){
- $get = http_build_query($get);
- $url .= "?" . $get;
- }
-
- curl_setopt($curlproc, CURLOPT_URL, $url);
-
- curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
- curl_setopt($curlproc, CURLOPT_HTTPHEADER, $headers);
-
- curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
- curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
- curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
- curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
- curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
- $this->backend->assign_proxy($curlproc, $proxy);
-
- $data = curl_exec($curlproc);
-
- if(curl_errno($curlproc)){
-
- throw new Exception(curl_error($curlproc));
- }
-
- curl_close($curlproc);
- return $data;
- }
-
- public function web($get){
-
- if($get["npt"]){
-
- [$token, $proxy] = $this->backend->get($get["npt"], "web");
-
- try{
- $html =
- $this->get(
- $proxy,
- "https://www.mojeek.com" . $token,
- []
- );
- }catch(Exception $error){
-
- throw new Exception("Failed to get HTML");
- }
-
- }else{
-
- $search = $get["s"];
- if(strlen($search) === 0){
-
- throw new Exception("Search term is empty!");
- }
-
- $proxy = $this->backend->get_ip();
- $lang = $get["lang"];
- $country = $get["country"];
- $region = $get["region"];
- $domain = $get["domain"];
- $focus = $get["focus"];
-
- $params = [
- "q" => $search,
- "t" => 20, // number of results/page
- "tn" => 7, // number of news results/page
- "date" => 1, // show date
- "tlen" => 128, // max length of title
- "dlen" => 511, // max length of description
- "arc" => ($country == "any" ? "none" : $country) // location. don't use autodetect!
- ];
-
- switch($focus){
-
- case "any": break;
-
- case "blogs":
- $params["fmt"] = "sst";
- $params["sst"] = "1";
- break;
-
- default:
- $params["foc_t"] = $focus;
- break;
- }
-
- if($lang != "any"){
-
- $params["lb"] = $lang;
- }
-
- if($region != "any"){
-
- $params["reg"] = $region;
- }
-
- if($domain != "1"){
-
- $params["si"] = $domain;
- }
-
- try{
- $html =
- $this->get(
- $proxy,
- "https://www.mojeek.com/search",
- $params
- );
- }catch(Exception $error){
-
- throw new Exception("Failed to get HTML");
- }
- /*
- $handle = fopen("scraper/mojeek.html", "r");
- $html = fread($handle, filesize("scraper/mojeek.html"));
- fclose($handle);*/
-
- }
-
- $out = [
- "status" => "ok",
- "spelling" => [
- "type" => "no_correction",
- "using" => null,
- "correction" => null
- ],
- "npt" => null,
- "answer" => [],
- "web" => [],
- "image" => [],
- "video" => [],
- "news" => [],
- "related" => []
- ];
-
- $this->fuckhtml->load($html);
-
- $results =
- $this->fuckhtml
- ->getElementsByClassName("results-standard", "ul");
-
- if(count($results) === 0){
-
- return $out;
- }
-
- /*
- Get all search result divs
- */
- foreach($results as $container){
-
- $this->fuckhtml->load($container);
- $results =
- $this->fuckhtml
- ->getElementsByTagName("li");
-
- foreach($results as $result){
-
- $data = [
- "title" => null,
- "description" => null,
- "url" => null,
- "date" => null,
- "type" => "web",
- "thumb" => [
- "url" => null,
- "ratio" => null
- ],
- "sublink" => [],
- "table" => []
- ];
-
- $this->fuckhtml->load($result);
-
- $title =
- $this->fuckhtml
- ->getElementsByClassName("title", "a")[0];
-
- $data["title"] =
- html_entity_decode(
- $this->fuckhtml
- ->getTextContent(
- $title["innerHTML"]
- )
- );
-
- $data["url"] =
- html_entity_decode(
- $this->fuckhtml
- ->getTextContent(
- $title["attributes"]["href"]
- )
- );
-
- $description =
- $this->fuckhtml
- ->getElementsByClassName(
- "s", "p"
- );
-
- if(count($description) !== 0){
-
- $data["description"] =
- $this->titledots(
- html_entity_decode(
- $this->fuckhtml
- ->getTextContent(
- $description[0]
- )
- )
- );
- }
-
- $date =
- $this->fuckhtml
- ->getElementsByClassName(
- "mdate",
- "span"
- );
-
- if(count($date) !== 0){
-
- $data["date"] =
- strtotime(
- $this->fuckhtml
- ->getTextContent(
- $date[0]
- )
- );
- }
-
- $out["web"][] = $data;
- }
- }
-
- /*
- Get instant answers
- */
- $this->fuckhtml->load($html);
-
- $infoboxes =
- $this->fuckhtml
- ->getElementsByClassName(
- "infobox infobox-top",
- "div"
- );
-
- foreach($infoboxes as $infobox){
-
- $answer = [
- "title" => null,
- "description" => [],
- "url" => null,
- "thumb" => null,
- "table" => [],
- "sublink" => []
- ];
-
- // load first part with title + short definition
- $infobox_html =
- explode(
- "<hr>",
- $infobox["innerHTML"]
- );
-
- $this->fuckhtml->load($infobox_html[0]);
-
- // title
- $answer["title"] =
- $this->fuckhtml
- ->getTextContent(
- $this->fuckhtml
- ->getElementsByTagName("h1")[0]
- );
-
- // short definition
- $definition =
- $this->fuckhtml
- ->getElementsByTagName(
- "p"
- );
-
- if(count($definition) !== 0){
-
- $answer["description"][] = [
- "type" => "quote",
- "value" =>
- $this->fuckhtml
- ->getTextContent(
- $definition[0]
- )
- ];
- }
- // get thumbnail, if it exists
- $this->fuckhtml->load($infobox_html[1]);
-
- $thumb =
- $this->fuckhtml
- ->getElementsByClassName("float-right", "img");
-
- if(count($thumb) !== 0){
-
- preg_match(
- '/\/image\?img=([^&]+)/i',
- $thumb[0]["attributes"]["src"],
- $thumb
- );
-
- if(count($thumb) === 2){
-
- $answer["thumb"] =
- urldecode(
- $this->fuckhtml
- ->getTextContent(
- $thumb[1]
- )
- );
- }
- }
-
- // get description
- $ps =
- $this->fuckhtml
- ->getElementsByTagName("p");
-
- $first_tag = true;
- foreach($ps as $p){
-
- $this->fuckhtml->load($p);
-
- if(
- preg_match(
- '/^\s*<strong>/i',
- $p["innerHTML"]
- )
- ){
-
- /*
- Parse table
- */
-
- $strong =
- $this->fuckhtml
- ->getElementsByTagName("strong")[0];
-
- $p["innerHTML"] =
- str_replace($strong["innerHTML"], "", $p["innerHTML"]);
-
- $strong =
- preg_replace(
- '/:$/',
- "",
- ucfirst(
- $this->fuckhtml
- ->getTextContent(
- $strong
- )
- )
- );
-
- $answer["table"][trim($strong)] =
- trim(
- $this->fuckhtml
- ->getTextContent(
- $p
- )
- );
-
- continue;
- }
-
- $as =
- $this->fuckhtml
- ->getElementsByClassName("svg-icon");
-
- if(count($as) !== 0){
-
- /*
- Parse websites
- */
- foreach($as as $a){
-
- $answer["sublink"][
- ucfirst(explode(" ", $a["attributes"]["class"], 2)[1])
- ] =
- $this->fuckhtml
- ->getTextContent(
- $a["attributes"]["href"]
- );
- }
-
- continue;
- }
-
- /*
- Parse text content
- */
- $tags =
- $this->fuckhtml
- ->getElementsByTagName("*");
-
- $i = 0;
- foreach($tags as $tag){
-
- $c = count($answer["description"]);
-
- // remove tag from innerHTML
- $p["innerHTML"] =
- explode($tag["outerHTML"], $p["innerHTML"], 2);
-
- if(count($p["innerHTML"]) === 2){
-
- if(
- $i === 0 &&
- $c !== 0 &&
- $answer["description"][$c - 1]["type"] == "link"
- ){
-
- $append = "\n\n";
- }else{
-
- $append = "";
- }
-
- if($p["innerHTML"][0] != ""){
- $answer["description"][] = [
- "type" => "text",
- "value" => $append . trim($p["innerHTML"][0])
- ];
- }
-
- $p["innerHTML"] = $p["innerHTML"][1];
- }else{
-
- $p["innerHTML"] = $p["innerHTML"][0];
- }
-
- switch($tag["tagName"]){
-
- case "a":
-
- $value =
- $this->fuckhtml
- ->getTextContent(
- $tag
- );
-
- if(strtolower($value) == "wikipedia"){
-
- if($c !== 0){
- $answer["description"][$c - 1]["value"] =
- rtrim($answer["description"][$c - 1]["value"]);
- }
- break;
- }
-
- $answer["description"][] = [
- "type" => "link",
- "url" =>
- $this->fuckhtml
- ->getTextContent(
- $tag["attributes"]["href"]
- ),
- "value" =>
- $this->fuckhtml
- ->getTextContent(
- $tag
- )
- ];
- break;
- }
-
- $i++;
- }
- }
-
- // get URL
- $this->fuckhtml->load($infobox_html[2]);
-
- $answer["url"] =
- $this->fuckhtml
- ->getTextContent(
- $this->fuckhtml
- ->getElementsByTagName(
- "a"
- )[0]
- ["attributes"]
- ["href"]
- );
-
- // append answer
- $out["answer"][] = $answer;
- }
-
- /*
- Get news
- */
- $this->fuckhtml->load($html);
-
- $news =
- $this->fuckhtml
- ->getElementsByClassName(
- "results news-results",
- "div"
- );
-
- if(count($news) !== 0){
-
- $this->fuckhtml->load($news[0]);
-
- $lis =
- $this->fuckhtml
- ->getElementsByTagName("li");
-
- foreach($lis as $li){
-
- $this->fuckhtml->load($li);
-
- $a =
- $this->fuckhtml
- ->getElementsByClassName(
- "ob",
- "a"
- );
-
- if(count($a) === 0){
-
- continue;
- }
-
- $a = $a[0];
-
- $date =
- explode(
- " - ",
- $this->fuckhtml
- ->getTextContent(
- $this->fuckhtml
- ->getElementsByTagName(
- "span"
- )[0]
- )
- );
-
- $date =
- strtotime(
- $date[count($date) - 1]
- );
-
- $out["news"][] = [
- "title" =>
- html_entity_decode(
- $this->fuckhtml
- ->getTextContent(
- $a
- )
- ),
- "description" => null,
- "date" => $date,
- "thumb" => [
- "url" => null,
- "ratio" => null
- ],
- "url" =>
- $this->fuckhtml
- ->getTextContent(
- $a["attributes"]["href"]
- )
- ];
- }
- }
-
- /*
- Get next page
- */
- $this->fuckhtml->load($html);
-
- $pagination =
- $this->fuckhtml
- ->getElementsByClassName("pagination");
-
- if(count($pagination) !== false){
-
- $this->fuckhtml->load($pagination[0]);
- $as =
- $this->fuckhtml
- ->getElementsByTagName("a");
-
- foreach($as as $a){
-
- if($a["innerHTML"] == "Next"){
-
- $out["npt"] = $this->backend->store(
- $this->fuckhtml
- ->getTextContent(
- $a["attributes"]["href"]
- ),
- "web",
- $proxy
- );
- }
- }
- }
-
- return $out;
- }
-
- public function news($get){
-
- $search = $get["s"];
-
- if(strlen($search) === 0){
-
- throw new Exception("Search term is empty!");
- }
-
- $out = [
- "status" => "ok",
- "npt" => null,
- "news" => []
- ];
-
- try{
- $html =
- $this->get(
- $this->backend->get_ip(),
- "https://www.mojeek.com/search",
- [
- "q" => $search,
- "fmt" => "news"
- ]
- );
- }catch(Exception $error){
-
- throw new Exception("Failed to get HTML");
- }
- /*
- $handle = fopen("scraper/mojeek.html", "r");
- $html = fread($handle, filesize("scraper/mojeek.html"));
- fclose($handle);
- */
-
- $this->fuckhtml->load($html);
-
- $articles =
- $this->fuckhtml->getElementsByTagName("article");
-
- foreach($articles as $article){
-
- $this->fuckhtml->load($article);
-
- $data = [
- "title" => null,
- "author" => null,
- "description" => null,
- "date" => null,
- "thumb" =>
- [
- "url" => null,
- "ratio" => null
- ],
- "url" => null
- ];
-
- $a = $this->fuckhtml->getElementsByTagName("a")[0];
-
- $data["title"] =
- $this->fuckhtml
- ->getTextContent(
- $a["attributes"]["title"]
- );
-
- $data["url"] =
- $this->fuckhtml
- ->getTextContent(
- $a["attributes"]["href"]
- );
-
- $p = $this->fuckhtml->getElementsByTagName("p");
-
- $data["description"] =
- $this->titledots(
- $this->fuckhtml
- ->getTextContent(
- $this->fuckhtml
- ->getElementsByClassName(
- "s",
- $p
- )[0]
- )
- );
-
- if($data["description"] == ""){
-
- $data["description"] = null;
- }
-
- // get date from big node
- $date =
- $this->fuckhtml
- ->getElementsByClassName(
- "date",
- $p
- );
-
- if(count($date) !== 0){
-
- $data["date"] =
- strtotime(
- $this->fuckhtml
- ->getTextContent(
- $date[0]
- )
- );
- }
-
- // grep date + author
- $s =
- $this->fuckhtml
- ->getElementsByClassName(
- "i",
- $p
- )[0];
-
- $this->fuckhtml->load($s);
-
- $a =
- $this->fuckhtml
- ->getElementsByTagName("a");
-
- if(count($a) !== 0){
-
- // parse big node information
- $data["author"] =
- htmlspecialchars_decode(
- $this->fuckhtml
- ->getTextContent(
- $a[0]["innerHTML"]
- )
- );
- }else{
-
- // parse smaller nodes
- $replace =
- $this->fuckhtml
- ->getElementsByTagName("time")[0];
-
- $data["date"] =
- strtotime(
- $this->fuckhtml
- ->getTextContent(
- $replace
- )
- );
-
- $s["innerHTML"] =
- str_replace(
- $replace["outerHTML"],
- "",
- $s["innerHTML"]
- );
-
- $data["author"] =
- preg_replace(
- '/ • $/',
- "",
- $s["innerHTML"]
- );
- }
-
- $out["news"][] = $data;
- }
-
- return $out;
- }
-
- private function titledots($title){
-
- return trim($title, ". \t\n\r\0\x0B");
- }
- }
-
|