1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051405240534054405540564057405840594060406140624063406440654066406740684069407040714072407340744075407640774078407940804081408240834084408540864087408840894090409140924093409440954096409740984099410041014102410341044105410641074108410941104111411241134114411541164117411841194120412141224123412441254126412741284129413041314132413341344135413641374138413941404141414241434144414541464147414841494150415141524153415441554156415741584159416041614162416341644165416641674168416941704171417241734174417541764177417841794180418141824183418441854186418741884189419041914192419341944195419641974198419942004201420242034204420542064207420842094210421142124213421442154216421742184219422042214222422342244225422642274228422942304231423242334234423542364237423842394240424142424243424442454246424742484249425042514252425342544255425642574258425942604261426242634264426542664267426842694270427142724273427442754276427742784279428042814282428342844285428642874288428942904291429242934294429542964297429842994300430143024303430443054306430743084309431043114312431343144315431643174318431943204321432243234324432543264327432843294330433143324333433443354336433743384339434043414342434343444345434643474348434943504351435243534354435543564357435843594360436143624363436443654366436743684369437043714372437343744375437643774378437943804381438243834384438543864387438843894390439143924393439443954396439743984399440044014402440344044405440644074408440944104411441244134414441544164417441844194420442144224423442444254426442744284429443044314432443344344435443644374438443944404441444244434444444544464447444844494450445144524453445444554456445744584459446044614462446344644465446644674468446944704471447244734474447544764477447844794480448144824483448444854486448744884489449044914492449344944495449644974498449945004501450245034504450545064507450845094510451145124513451445154516451745184519452045214522452345244525452645274528452945304531453245334534453545364537453845394540454145424543454445454546454745484549455045514552455345544555455645574558455945604561456245634564456545664567456845694570457145724573457445754576457745784579458045814582458345844585458645874588458945904591459245934594459545964597459845994600460146024603460446054606460746084609461046114612461346144615461646174618461946204621462246234624462546264627462846294630463146324633463446354636463746384639464046414642464346444645464646474648464946504651465246534654465546564657465846594660466146624663466446654666466746684669467046714672467346744675467646774678467946804681468246834684468546864687468846894690469146924693469446954696469746984699470047014702470347044705470647074708470947104711471247134714471547164717471847194720472147224723472447254726472747284729473047314732473347344735473647374738473947404741474247434744474547464747474847494750475147524753475447554756475747584759476047614762476347644765476647674768476947704771477247734774477547764777477847794780478147824783478447854786478747884789479047914792479347944795479647974798479948004801480248034804480548064807480848094810481148124813481448154816481748184819482048214822482348244825482648274828482948304831483248334834483548364837483848394840484148424843484448454846484748484849485048514852485348544855485648574858485948604861486248634864486548664867486848694870487148724873487448754876487748784879488048814882488348844885488648874888488948904891489248934894489548964897489848994900490149024903490449054906490749084909491049114912491349144915491649174918491949204921492249234924492549264927492849294930493149324933493449354936493749384939494049414942494349444945494649474948494949504951495249534954495549564957495849594960496149624963496449654966496749684969497049714972497349744975497649774978497949804981498249834984498549864987498849894990499149924993499449954996499749984999500050015002500350045005500650075008500950105011501250135014501550165017501850195020502150225023502450255026502750285029503050315032503350345035503650375038503950405041504250435044504550465047 |
- <?php
- // @TODO check for consent.google.com page, if need be
- class google{
-
- public function __construct(){
-
- include "lib/fuckhtml.php";
- $this->fuckhtml = new fuckhtml();
-
- include "lib/backend.php";
- $this->backend = new backend("google");
- }
-
- public function getfilters($page){
-
- $base = [
- "country" => [ // gl=<country> (image: cr=countryAF)
- "display" => "Country",
- "option" => [
- "any" => "Instance's country",
- "af" => "Afghanistan",
- "al" => "Albania",
- "dz" => "Algeria",
- "as" => "American Samoa",
- "ad" => "Andorra",
- "ao" => "Angola",
- "ai" => "Anguilla",
- "aq" => "Antarctica",
- "ag" => "Antigua and Barbuda",
- "ar" => "Argentina",
- "am" => "Armenia",
- "aw" => "Aruba",
- "au" => "Australia",
- "at" => "Austria",
- "az" => "Azerbaijan",
- "bs" => "Bahamas",
- "bh" => "Bahrain",
- "bd" => "Bangladesh",
- "bb" => "Barbados",
- "by" => "Belarus",
- "be" => "Belgium",
- "bz" => "Belize",
- "bj" => "Benin",
- "bm" => "Bermuda",
- "bt" => "Bhutan",
- "bo" => "Bolivia",
- "ba" => "Bosnia and Herzegovina",
- "bw" => "Botswana",
- "bv" => "Bouvet Island",
- "br" => "Brazil",
- "io" => "British Indian Ocean Territory",
- "bn" => "Brunei Darussalam",
- "bg" => "Bulgaria",
- "bf" => "Burkina Faso",
- "bi" => "Burundi",
- "kh" => "Cambodia",
- "cm" => "Cameroon",
- "ca" => "Canada",
- "cv" => "Cape Verde",
- "ky" => "Cayman Islands",
- "cf" => "Central African Republic",
- "td" => "Chad",
- "cl" => "Chile",
- "cn" => "China",
- "cx" => "Christmas Island",
- "cc" => "Cocos (Keeling) Islands",
- "co" => "Colombia",
- "km" => "Comoros",
- "cg" => "Congo",
- "cd" => "Congo, the Democratic Republic",
- "ck" => "Cook Islands",
- "cr" => "Costa Rica",
- "ci" => "Cote D'ivoire",
- "hr" => "Croatia",
- "cu" => "Cuba",
- "cy" => "Cyprus",
- "cz" => "Czech Republic",
- "dk" => "Denmark",
- "dj" => "Djibouti",
- "dm" => "Dominica",
- "do" => "Dominican Republic",
- "ec" => "Ecuador",
- "eg" => "Egypt",
- "sv" => "El Salvador",
- "gq" => "Equatorial Guinea",
- "er" => "Eritrea",
- "ee" => "Estonia",
- "et" => "Ethiopia",
- "fk" => "Falkland Islands (Malvinas)",
- "fo" => "Faroe Islands",
- "fj" => "Fiji",
- "fi" => "Finland",
- "fr" => "France",
- "gf" => "French Guiana",
- "pf" => "French Polynesia",
- "tf" => "French Southern Territories",
- "ga" => "Gabon",
- "gm" => "Gambia",
- "ge" => "Georgia",
- "de" => "Germany",
- "gh" => "Ghana",
- "gi" => "Gibraltar",
- "gr" => "Greece",
- "gl" => "Greenland",
- "gd" => "Grenada",
- "gp" => "Guadeloupe",
- "gu" => "Guam",
- "gt" => "Guatemala",
- "gn" => "Guinea",
- "gw" => "Guinea-Bissau",
- "gy" => "Guyana",
- "ht" => "Haiti",
- "hm" => "Heard Island and Mcdonald Islands",
- "va" => "Holy See (Vatican City State)",
- "hn" => "Honduras",
- "hk" => "Hong Kong",
- "hu" => "Hungary",
- "is" => "Iceland",
- "in" => "India",
- "id" => "Indonesia",
- "ir" => "Iran, Islamic Republic",
- "iq" => "Iraq",
- "ie" => "Ireland",
- "il" => "Israel",
- "it" => "Italy",
- "jm" => "Jamaica",
- "jp" => "Japan",
- "jo" => "Jordan",
- "kz" => "Kazakhstan",
- "ke" => "Kenya",
- "ki" => "Kiribati",
- "kp" => "Korea, Democratic People's Republic",
- "kr" => "Korea, Republic",
- "kw" => "Kuwait",
- "kg" => "Kyrgyzstan",
- "la" => "Lao People's Democratic Republic",
- "lv" => "Latvia",
- "lb" => "Lebanon",
- "ls" => "Lesotho",
- "lr" => "Liberia",
- "ly" => "Libyan Arab Jamahiriya",
- "li" => "Liechtenstein",
- "lt" => "Lithuania",
- "lu" => "Luxembourg",
- "mo" => "Macao",
- "mk" => "Macedonia, the Former Yugosalv Republic",
- "mg" => "Madagascar",
- "mw" => "Malawi",
- "my" => "Malaysia",
- "mv" => "Maldives",
- "ml" => "Mali",
- "mt" => "Malta",
- "mh" => "Marshall Islands",
- "mq" => "Martinique",
- "mr" => "Mauritania",
- "mu" => "Mauritius",
- "yt" => "Mayotte",
- "mx" => "Mexico",
- "fm" => "Micronesia, Federated States",
- "md" => "Moldova, Republic",
- "mc" => "Monaco",
- "mn" => "Mongolia",
- "ms" => "Montserrat",
- "ma" => "Morocco",
- "mz" => "Mozambique",
- "mm" => "Myanmar",
- "na" => "Namibia",
- "nr" => "Nauru",
- "np" => "Nepal",
- "nl" => "Netherlands",
- "an" => "Netherlands Antilles",
- "nc" => "New Caledonia",
- "nz" => "New Zealand",
- "ni" => "Nicaragua",
- "ne" => "Niger",
- "ng" => "Nigeria",
- "nu" => "Niue",
- "nf" => "Norfolk Island",
- "mp" => "Northern Mariana Islands",
- "no" => "Norway",
- "om" => "Oman",
- "pk" => "Pakistan",
- "pw" => "Palau",
- "ps" => "Palestinian Territory, Occupied",
- "pa" => "Panama",
- "pg" => "Papua New Guinea",
- "py" => "Paraguay",
- "pe" => "Peru",
- "ph" => "Philippines",
- "pn" => "Pitcairn",
- "pl" => "Poland",
- "pt" => "Portugal",
- "pr" => "Puerto Rico",
- "qa" => "Qatar",
- "re" => "Reunion",
- "ro" => "Romania",
- "ru" => "Russian Federation",
- "rw" => "Rwanda",
- "sh" => "Saint Helena",
- "kn" => "Saint Kitts and Nevis",
- "lc" => "Saint Lucia",
- "pm" => "Saint Pierre and Miquelon",
- "vc" => "Saint Vincent and the Grenadines",
- "ws" => "Samoa",
- "sm" => "San Marino",
- "st" => "Sao Tome and Principe",
- "sa" => "Saudi Arabia",
- "sn" => "Senegal",
- "cs" => "Serbia and Montenegro",
- "sc" => "Seychelles",
- "sl" => "Sierra Leone",
- "sg" => "Singapore",
- "sk" => "Slovakia",
- "si" => "Slovenia",
- "sb" => "Solomon Islands",
- "so" => "Somalia",
- "za" => "South Africa",
- "gs" => "South Georgia and the South Sandwich Islands",
- "es" => "Spain",
- "lk" => "Sri Lanka",
- "sd" => "Sudan",
- "sr" => "Suriname",
- "sj" => "Svalbard and Jan Mayen",
- "sz" => "Swaziland",
- "se" => "Sweden",
- "ch" => "Switzerland",
- "sy" => "Syrian Arab Republic",
- "tw" => "Taiwan, Province of China",
- "tj" => "Tajikistan",
- "tz" => "Tanzania, United Republic",
- "th" => "Thailand",
- "tl" => "Timor-Leste",
- "tg" => "Togo",
- "tk" => "Tokelau",
- "to" => "Tonga",
- "tt" => "Trinidad and Tobago",
- "tn" => "Tunisia",
- "tr" => "Turkey",
- "tm" => "Turkmenistan",
- "tc" => "Turks and Caicos Islands",
- "tv" => "Tuvalu",
- "ug" => "Uganda",
- "ua" => "Ukraine",
- "ae" => "United Arab Emirates",
- "uk" => "United Kingdom",
- "us" => "United States",
- "um" => "United States Minor Outlying Islands",
- "uy" => "Uruguay",
- "uz" => "Uzbekistan",
- "vu" => "Vanuatu",
- "ve" => "Venezuela",
- "vn" => "Viet Nam",
- "vg" => "Virgin Islands, British",
- "vi" => "Virgin Islands, U.S.",
- "wf" => "Wallis and Futuna",
- "eh" => "Western Sahara",
- "ye" => "Yemen",
- "zm" => "Zambia",
- "zw" => "Zimbabwe"
- ]
- ],
- "nsfw" => [
- "display" => "NSFW",
- "option" => [
- "yes" => "Yes", // safe=active
- "no" => "No" // safe=off
- ]
- ]
- ];
-
- switch($page){
-
- case "web":
- return array_merge(
- $base,
- [
- "lang" => [ // lr=<lang> (prefix lang with "lang_")
- "display" => "Language",
- "option" => [
- "any" => "Any language",
- "ar" => "Arabic",
- "bg" => "Bulgarian",
- "ca" => "Catalan",
- "cs" => "Czech",
- "da" => "Danish",
- "de" => "German",
- "el" => "Greek",
- "en" => "English",
- "es" => "Spanish",
- "et" => "Estonian",
- "fi" => "Finnish",
- "fr" => "French",
- "hr" => "Croatian",
- "hu" => "Hungarian",
- "id" => "Indonesian",
- "is" => "Icelandic",
- "it" => "Italian",
- "iw" => "Hebrew",
- "ja" => "Japanese",
- "ko" => "Korean",
- "lt" => "Lithuanian",
- "lv" => "Latvian",
- "nl" => "Dutch",
- "no" => "Norwegian",
- "pl" => "Polish",
- "pt" => "Portuguese",
- "ro" => "Romanian",
- "ru" => "Russian",
- "sk" => "Slovak",
- "sl" => "Slovenian",
- "sr" => "Serbian",
- "sv" => "Swedish",
- "tr" => "Turkish",
- "zh-CN" => "Chinese (Simplified)",
- "zh-TW" => "Chinese (Traditional)"
- ]
- ],
- "newer" => [ // tbs
- "display" => "Newer than",
- "option" => "_DATE"
- ],
- "older" => [
- "display" => "Older than",
- "option" => "_DATE"
- ],
- "spellcheck" => [
- "display" => "Spellcheck",
- "option" => [
- "yes" => "Yes",
- "no" => "No"
- ]
- ]
- ]
- );
- break;
-
- case "images":
- return array_merge(
- $base,
- [
- "time" => [ // tbs=qdr:<time>
- "display" => "Time posted",
- "option" => [
- "any" => "Any time",
- "d" => "Past 24 hours",
- "w" => "Past week",
- "m" => "Past month",
- "y" => "Past year"
- ]
- ],
- "size" => [ // imgsz
- "display" => "Size",
- "option" => [
- "any" => "Any size",
- "l" => "Large",
- "m" => "Medium",
- "i" => "Icon",
- "qsvga" => "Larger than 400x300",
- "vga" => "Larger than 640x480",
- "svga" => "Larger than 800x600",
- "xga" => "Larger than 1024x768",
- "2mp" => "Larger than 2MP",
- "4mp" => "Larger than 4MP",
- "6mp" => "Larger than 6MP",
- "8mp" => "Larger than 8MP",
- "10mp" => "Larger than 10MP",
- "12mp" => "Larger than 12MP",
- "15mp" => "Larger than 15MP",
- "20mp" => "Larger than 20MP",
- "40mp" => "Larger than 40MP",
- "70mp" => "Larger than 70MP"
- ]
- ],
- "ratio" => [ // imgar
- "display" => "Aspect ratio",
- "option" => [
- "any" => "Any ratio",
- "t|xt" => "Tall",
- "s" => "Square",
- "w" => "Wide",
- "xw" => "Panoramic"
- ]
- ],
- "color" => [ // imgc
- "display" => "Color",
- "option" => [
- "any" => "Any color",
- "color" => "Full color",
- "bnw" => "Black & white",
- "trans" => "Transparent",
- // from here, imgcolor
- "red" => "Red",
- "orange" => "Orange",
- "yellow" => "Yellow",
- "green" => "Green",
- "teal" => "Teal",
- "blue" => "Blue",
- "purple" => "Purple",
- "pink" => "Pink",
- "white" => "White",
- "gray" => "Gray",
- "black" => "Black",
- "brown" => "Brown"
- ]
- ],
- "type" => [ // tbs=itp:<type>
- "display" => "Type",
- "option" => [
- "any" => "Any type",
- "clipart" => "Clip Art",
- "lineart" => "Line Drawing",
- "animated" => "Animated"
- ]
- ],
- "format" => [ // as_filetype
- "display" => "Format",
- "option" => [
- "any" => "Any format",
- "jpg" => "JPG",
- "gif" => "GIF",
- "png" => "PNG",
- "bmp" => "BMP",
- "svg" => "SVG",
- "webp" => "WEBP",
- "ico" => "ICO",
- "craw" => "RAW"
- ]
- ],
- "rights" => [ // tbs=sur:<rights>
- "display" => "Usage rights",
- "option" => [
- "any" => "Any license",
- "cl" => "Creative Commons licenses",
- "ol" => "Commercial & other licenses"
- ]
- ]
- ]
- );
- break;
-
- case "videos":
- return array_merge(
- $base,
- [
- "newer" => [ // tbs
- "display" => "Newer than",
- "option" => "_DATE"
- ],
- "older" => [
- "display" => "Older than",
- "option" => "_DATE"
- ],
- "duration" => [
- "display" => "Duration",
- "option" => [
- "any" => "Any duration",
- "s" => "Short (0-4min)", // tbs=dur:s
- "m" => "Medium (4-20min)", // tbs=dur:m
- "l" => "Long (20+ min)" // tbs=dur:l
- ]
- ],
- "quality" => [
- "display" => "Quality",
- "option" => [
- "any" => "Any quality",
- "h" => "High quality" // tbs=hq:h
- ]
- ],
- "captions" => [
- "display" => "Captions",
- "option" => [
- "any" => "No preference",
- "yes" => "Closed captioned" // tbs=cc:1
- ]
- ]
- ]
- );
- break;
-
- case "news":
- return array_merge(
- $base,
- [
- "newer" => [ // tbs
- "display" => "Newer than",
- "option" => "_DATE"
- ],
- "older" => [
- "display" => "Older than",
- "option" => "_DATE"
- ],
- "sort" => [
- "display" => "Sort",
- "option" => [
- "relevance" => "Relevance",
- "date" => "Date" // sbd:1
- ]
- ]
- ]
- );
- break;
- }
- }
-
- private function get($proxy, $url, $get = []){
-
- $headers = [
- "User-Agent: " . config::USER_AGENT,
- "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
- "Accept-Language: en-US,en;q=0.5",
- "Accept-Encoding: gzip",
- "DNT: 1",
- //"Cookie: SOCS=CAESNQgCEitib3FfaWRlbnRpdHlmcm9udGVuZHVpc2VydmVyXzIwMjQwMzE3LjA4X3AwGgJlbiAEGgYIgM7orwY",
- "Connection: keep-alive",
- "Upgrade-Insecure-Requests: 1",
- "Sec-Fetch-Dest: document",
- "Sec-Fetch-Mode: navigate",
- "Sec-Fetch-Site: none",
- "Sec-Fetch-User: ?1",
- "Priority: u=1",
- "TE: trailers"
- ];
-
- $curlproc = curl_init();
-
- if($get !== []){
- $get = http_build_query($get);
- $url .= "?" . $get;
- }
-
- curl_setopt($curlproc, CURLOPT_URL, $url);
-
- curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
- curl_setopt($curlproc, CURLOPT_HTTPHEADER, $headers);
-
- // use http2
- curl_setopt($curlproc, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_2_0);
-
- curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
- curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
- curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
- curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
- curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
-
- // follow redirects
- curl_setopt($curlproc, CURLOPT_FOLLOWLOCATION, true);
- $this->backend->assign_proxy($curlproc, $proxy);
-
- $data = curl_exec($curlproc);
-
- if(curl_errno($curlproc)){
-
- throw new Exception(curl_error($curlproc));
- }
-
- curl_close($curlproc);
- return $data;
- }
-
-
-
-
- private function parsepage($html, $pagetype, $search, $proxy, $params){
-
- $out = [
- "status" => "ok",
- "spelling" => [
- "type" => "no_correction",
- "using" => null,
- "correction" => null
- ],
- "npt" => null,
- "answer" => [],
- "web" => [],
- "image" => [],
- "video" => [],
- "news" => [],
- "related" => []
- ];
-
- $this->fuckhtml->load($html);
-
- $this->detect_sorry();
-
- // parse all <style> tags
- $this->parsestyles();
-
- // get javascript images
- $this->scrape_dimg($html);
-
- // get html blobs
- preg_match_all(
- '/function\(\){window\.jsl\.dh\(\'([^\']+?)\',\'(.+?[^\'])\'\);/',
- $html,
- $blobs
- );
-
- $this->blobs = [];
- if(isset($blobs[1])){
-
- for($i=0; $i<count($blobs[1]); $i++){
-
- $this->blobs[$blobs[1][$i]] =
- $this->fuckhtml
- ->parseJsString(
- $blobs[2][$i]
- );
- }
- }
-
- $this->scrape_imagearr($html);
-
- //
- // load result column
- //
- $result_div =
- $this->fuckhtml
- ->getElementById(
- "center_col",
- "div"
- );
-
- if($result_div === false){
-
- throw new Exception("Failed to grep result div");
- }
-
- $this->fuckhtml->load($result_div);
-
- //
- // Get word corrections
- //
- $correction =
- $this->fuckhtml
- ->getElementById(
- "fprs",
- "p"
- );
-
- if($correction){
-
- $this->fuckhtml->load($correction);
-
- $a =
- $this->fuckhtml
- ->getElementsByTagName(
- "a"
- );
-
- $using =
- $this->fuckhtml
- ->getElementById(
- "fprsl",
- $a
- );
-
- if($using){
-
- $using =
- $this->fuckhtml
- ->getTextContent(
- $using
- );
-
- $spans =
- $this->fuckhtml
- ->getElementsByTagName(
- "span"
- );
-
- $type_span =
- $this->fuckhtml
- ->getTextContent(
- $spans[0]
- );
-
- $type = "not_many";
-
- if(
- stripos(
- $type_span,
- "Showing results for"
- ) !== false
- ){
-
- $type = "including";
- }
-
- $correction =
- $this->fuckhtml
- ->getTextContent(
- $a[count($a) - 1]
- );
-
- $out["spelling"] = [
- "type" => $type,
- "using" => $using,
- "correction" => $correction
- ];
- }
-
- // reset
- $this->fuckhtml->load($result_div);
- }else{
-
- // get the "Did you mean?" prompt
- $taw =
- $this->fuckhtml
- ->getElementById(
- "taw"
- );
-
- if($taw){
-
- $this->fuckhtml->load($taw);
-
- $as =
- $this->fuckhtml
- ->getElementsByTagName(
- "a"
- );
-
- if(count($as) !== 0){
-
- $text =
- $this->fuckhtml
- ->getTextContent(
- $as[0]
- );
-
- // @TODO implement did_you_mean
- $out["spelling"] = [
- "type" => "including",
- "using" => $search,
- "correction" => $text
- ];
- }
- }
-
- $this->fuckhtml->load($result_div);
- }
-
- //
- // get notices
- //
- $botstuff =
- $this->fuckhtml
- ->getElementById(
- "botstuff"
- );
-
- // important for later
- $last_page = false;
-
- if($botstuff){
-
- $this->fuckhtml->load($botstuff);
-
- $cards =
- $this->fuckhtml
- ->getElementsByClassName(
- $this->getstyle(
- [
- "line-height" => "normal"
- ]
- ),
- "div"
- );
-
- foreach($cards as $card){
-
- $this->fuckhtml->load($card);
-
- $h2 =
- $this->fuckhtml
- ->getElementsByTagName(
- "h2"
- );
-
- if(count($h2) !== 0){
-
- $title =
- $this->fuckhtml
- ->getTextContent(
- $h2[0]
- );
-
- $card["innerHTML"] =
- str_replace(
- $h2[0]["outerHTML"],
- "",
- $card["innerHTML"]
- );
- }else{
-
- $title = "Notice";
- }
-
- $div =
- $this->fuckhtml
- ->getElementsByTagName(
- "div"
- );
-
- // probe for related searches div, if found, ignore it cause its shit
- $probe =
- $this->fuckhtml
- ->getElementsByAttributeValue(
- "role",
- "list",
- $div
- );
-
- // also probe for children
- if(count($probe) === 0){
-
- $probe =
- $this->fuckhtml
- ->getElementsByClassName(
- $this->getstyle(
- [
- "flex-shrink" => "0",
- "-moz-box-flex" => "0",
- "flex-grow" => "0",
- "overflow" => "hidden"
- ]
- ),
- $div
- );
- }
-
- if(count($probe) === 0){
-
- $description = [];
-
- $as =
- $this->fuckhtml
- ->getElementsByTagName(
- "a"
- );
-
- if(count($as) !== 0){
-
- $first = true;
-
- foreach($as as $a){
-
- $text_link =
- $this->fuckhtml
- ->getTextContent(
- $a
- );
-
- if(stripos($text_link, "repeat the search") !== false){
-
- $last_page = true;
- break 2;
- }
-
- $parts =
- explode(
- $a["outerHTML"],
- $card["innerHTML"],
- 2
- );
-
- $card["innerHTML"] = $parts[1];
-
- $value =
- preg_replace(
- '/ +/',
- " ",
- $this->fuckhtml
- ->getTextContent(
- $parts[0],
- false,
- false
- )
- );
-
- if(strlen(trim($value)) !== 0){
-
- $description[] = [
- "type" => "text",
- "value" => $value
- ];
-
- if($first){
-
- $description[0]["value"] =
- ltrim($description[0]["value"]);
- }
- }
-
- $first = false;
-
- $description[] = [
- "type" => "link",
- "url" =>
- $this->fuckhtml
- ->getTextContent(
- $a["attributes"]
- ["href"]
- ),
- "value" => $text_link
- ];
- }
-
- $text =
- $this->fuckhtml
- ->getTextContent(
- $card["innerHTML"],
- false,
- false
- );
-
- if(strlen(trim($text)) !== 0){
-
- $description[] = [
- "type" => "text",
- "value" =>
- rtrim(
- $text
- )
- ];
- }
- }
-
- if(count($description) !== 0){
-
- $out["answer"][] = [
- "title" => $title,
- "description" => $description,
- "url" => null,
- "thumb" => null,
- "table" => [],
- "sublink" => []
- ];
- }
- }
- }
-
- // reset
- $this->fuckhtml->load($html);
- }
-
- //
- // get "Related Searches" and "People also search for"
- //
- $relateds =
- $this->fuckhtml
- ->getElementsByClassName(
- "wyccme",
- "div"
- );
-
- foreach($relateds as $related){
-
- $text =
- $this->fuckhtml
- ->getTextContent(
- $related
- );
-
- if($text == "More results"){ continue; }
-
- $out["related"][] = $text;
- }
-
- //
- // Get text results
- //
- $results =
- $this->fuckhtml
- ->getElementsByClassName(
- "g",
- "div"
- );
-
- $this->skip_next = false;
-
- foreach($results as $result){
-
- if($this->skip_next){
-
- $this->skip_next = false;
- continue;
- }
-
- $this->fuckhtml->load($result);
-
- $web = [
- "title" => null,
- "description" => null,
- "url" => null,
- "date" => null,
- "type" => "web",
- "thumb" => [
- "url" => null,
- "ratio" => null
- ],
- "sublink" => [],
- "table" => []
- ];
-
- // Detect presence of sublinks
- $g =
- $this->fuckhtml
- ->getElementsByClassName(
- "g",
- "div"
- );
-
- $sublinks = [];
- if(count($g) > 0){
-
- $table =
- $this->fuckhtml
- ->getElementsByTagName(
- "table"
- );
-
- if(count($table) !== 0){
-
- // found some sublinks!
-
- $this->fuckhtml->load($table[0]);
-
- $tds =
- $this->fuckhtml
- ->getElementsByTagName(
- "td"
- );
-
- foreach($tds as $td){
-
- $this->fuckhtml->load($td);
-
- $a =
- $this->fuckhtml
- ->getElementsByTagName(
- "a"
- );
-
- if(
- count($a) === 0 ||
- (
- isset($a[0]["attributes"]["class"]) &&
- $a[0]["attributes"]["class"] == "fl"
- )
- ){
-
- continue;
- }
-
- $td["innerHTML"] =
- str_replace(
- $a[0]["outerHTML"],
- "",
- $td["innerHTML"]
- );
-
- $web["sublink"][] = [
- "title" =>
- $this->titledots(
- $this->fuckhtml
- ->getTextContent(
- $a[0]
- )
- ),
- "description" =>
- html_entity_decode(
- $this->titledots(
- $this->fuckhtml
- ->getTextContent(
- $td
- )
- )
- ),
- "url" =>
- $this->unshiturl(
- $a[0]
- ["attributes"]
- ["href"]
- ),
- "date" => null
- ];
- }
-
- // reset
- $this->fuckhtml->load($result);
- }
-
- // skip on next iteration
- $this->skip_next = true;
- }
-
- // get title
- $h3 =
- $this->fuckhtml
- ->getElementsByTagName(
- "h3"
- );
-
- if(count($h3) === 0){
-
- continue;
- }
-
- $web["title"] =
- $this->titledots(
- $this->fuckhtml
- ->getTextContent(
- $h3[0]
- )
- );
-
- // get url
- $as =
- $this->fuckhtml
- ->getElementsByTagName(
- "a"
- );
-
- $web["url"] =
- $this->unshiturl(
- $as[0]
- ["attributes"]
- ["href"]
- );
-
- if(
- !preg_match(
- '/^http/',
- $web["url"]
- )
- ){
-
- // skip if invalid url is found
- continue;
- }
-
- //
- // probe for twitter carousel
- //
- $carousel =
- $this->fuckhtml
- ->getElementsByTagName(
- "g-scrolling-carousel"
- );
-
- if(count($carousel) !== 0){
-
- $this->fuckhtml->load($carousel[0]);
-
- $items =
- $this->fuckhtml
- ->getElementsByTagName(
- "g-inner-card"
- );
-
- $has_thumbnail = false;
-
- foreach($items as $item){
-
- $this->fuckhtml->load($item);
-
- if($has_thumbnail === false){
-
- // get thumbnail
- $thumb =
- $this->fuckhtml
- ->getElementsByTagName(
- "img"
- );
-
- if(
- count($thumb) !== 0 &&
- isset($thumb[0]["attributes"]["id"])
- ){
-
- $web["thumb"] = [
- "url" =>
- $this->getdimg(
- $thumb[0]["attributes"]["id"]
- ),
- "ratio" => "16:9"
- ];
-
- $has_thumbnail = true;
- }
-
- // or else, try getting a thumbnail from next container
- }
-
- // cache div
- $div =
- $this->fuckhtml
- ->getElementsByTagName(
- "div"
- );
-
- // get link
- $links =
- $this->fuckhtml
- ->getElementsByTagName(
- "a"
- );
-
- // get description of carousel sublink
- $description =
- $this->fuckhtml
- ->getElementsByAttributeValue(
- "role",
- "heading",
- $div
- );
-
- if(count($description) !== 0){
-
- $description =
- $this->titledots(
- $this->fuckhtml
- ->getTextContent(
- $description[0]
- )
- );
- }else{
-
- $description = null;
- }
-
- $bottom =
- $this->fuckhtml
- ->getElementsByAttributeValue(
- "style",
- "z-index:2",
- $div
- );
-
- $title = null;
- $date = null;
- if(count($bottom) !== 0){
-
- $this->fuckhtml->load($bottom[0]);
-
- $spans =
- $this->fuckhtml
- ->getElementsByTagName(
- "span"
- );
-
- $title =
- $this->fuckhtml
- ->getTextContent(
- $spans[0]
- );
-
- $date =
- strtotime(
- $this->fuckhtml
- ->getTextContent(
- $spans[count($spans) - 1]
- )
- );
- }
-
- $web["sublink"][] = [
- "title" => $title,
- "description" => $description,
- "url" =>
- $this->unshiturl(
- $links[0]
- ["attributes"]
- ["href"]
- ),
- "date" => $date
- ];
- }
-
- $out["web"][] = $web;
- continue;
- }
-
- //
- // get viewcount, time posted and follower count from <cite> tag
- //
- $cite =
- $this->fuckhtml
- ->getElementsByTagName(
- "cite"
- );
-
- if(count($cite) !== 0){
-
- $this->fuckhtml->load($cite[0]);
-
- $spans =
- $this->fuckhtml
- ->getElementsByTagName("span");
-
- if(count($spans) === 0){
-
- $cites =
- explode(
- "·",
- $this->fuckhtml
- ->getTextContent(
- $cite[0]
- )
- );
-
- foreach($cites as $cite){
-
- $cite = trim($cite);
-
- if(
- preg_match(
- '/(.+) (views|followers|likes)$/',
- $cite,
- $match
- )
- ){
-
- $web["table"][ucfirst($match[2])] =
- $match[1];
- }elseif(
- preg_match(
- '/ago$/',
- $cite
- )
- ){
-
- $web["date"] =
- strtotime($cite);
- }
- }
- }
-
- // reset
- $this->fuckhtml->load($result);
- }
-
- //
- // attempt to fetch description cleanly
- //
- $description =
- $this->fuckhtml
- ->getElementsByAttributeValue(
- "style",
- "-webkit-line-clamp:2"
- );
-
- if(count($description) !== 0){
-
- $web["description"] =
- $this->titledots(
- $this->fuckhtml
- ->getTextContent(
- $description[0]
- )
- );
- }else{
-
- // use ANOTHER method where the description is a header of the result
- $description =
- $this->fuckhtml
- ->getElementsByAttributeValue(
- "data-attrid",
- "wa:/description"
- );
-
- if(count($description) !== 0){
-
- // get date off that shit
- $date =
- $this->fuckhtml
- ->getElementsByClassName(
- $this->getstyle(
- [
- "font-size" => "12px",
- "line-height" => "1.34",
- "display" => "inline-block",
- "font-family" => "google sans,arial,sans-serif",
- "padding-right" => "0",
- "white-space" => "nowrap"
- ]
- ),
- "span"
- );
-
- if(count($date) !== 0){
-
- $description[0]["innerHTML"] =
- str_replace(
- $date[0]["outerHTML"],
- "",
- $description[0]["innerHTML"]
- );
-
- $web["date"] =
- strtotime(
- $this->fuckhtml
- ->getTextContent(
- $date[0]
- )
- );
- }
-
- $web["description"] =
- $this->fuckhtml
- ->getTextContent(
- $description[0]
- );
- }else{
-
- // Yes.. You guessed it, use ANOTHER method to get descriptions
- // off youtube containers
- $description =
- $this->fuckhtml
- ->getElementsByClassName(
- $this->getstyle(
- [
- "-webkit-box-orient" => "vertical",
- "display" => "-webkit-box",
- "font-size" => "14px",
- "-webkit-line-clamp" => "2",
- "line-height" => "22px",
- "overflow" => "hidden",
- "word-break" => "break-word",
- "color" => "#4d5156"
- ]
- ),
- "div"
- );
-
- if(count($description) !== 0){
-
- // check for video duration
- $duration =
- $this->fuckhtml
- ->getElementsByClassName(
- $this->getstyle(
- [
- "background-color" => "rgba(0,0,0,0.6)",
- "color" => "#fff",
- "fill" => "#fff"
- ]
- ),
- "div"
- );
-
- if(count($duration) !== 0){
-
- $web["table"]["Duration"] =
- $this->fuckhtml
- ->getTextContent(
- $duration[0]
- );
- }
-
- $web["description"] =
- $this->titledots(
- html_entity_decode(
- $this->fuckhtml
- ->getTextContent(
- $description[0]
- )
- )
- );
-
- // get author + time posted
- $info =
- $this->fuckhtml
- ->getElementsByClassName(
- $this->getstyle(
- [
- "color" => "var(" . $this->getcolorvar("#70757a") . ")",
- "font-size" => "14px",
- "line-height" => "20px",
- "margin-top" => "12px"
- ]
- ),
- "div"
- );
-
- if(count($info) !== 0){
-
- $info =
- explode(
- "·",
- $this->fuckhtml
- ->getTextContent(
- $info[0]
- )
- );
-
- switch(count($info)){
-
- case 3:
- $web["table"]["Author"] = trim($info[1]);
- $web["date"] = strtotime(trim($info[2]));
- break;
-
- case 2:
- $web["date"] = strtotime(trim($info[1]));
- break;
- }
- }
- }
- }
- }
-
- //
- // get categories of content within the search result
- //
- $cats =
- $this->fuckhtml
- ->getElementsByAttributeName(
- "data-sncf",
- "div"
- );
-
- foreach($cats as $cat){
-
- $this->fuckhtml->load($cat);
-
- // detect image category
- $images =
- $this->fuckhtml
- ->getElementsByTagName(
- "img"
- );
-
- if(count($images) !== 0){
-
- foreach($images as $image){
-
- if(isset($image["attributes"]["id"])){
- // we found an image
-
- if(isset($image["attributes"]["width"])){
-
- $width = (int)$image["attributes"]["width"];
-
- if($width == 110){
-
- $ratio = "1:1";
- }elseif($width > 110){
-
- $ratio = "16:9";
- }else{
-
- $ratio = "9:16";
- }
- }else{
-
- $ratio = "1:1";
- }
-
- $web["thumb"] = [
- "url" => $this->getdimg($image["attributes"]["id"]),
- "ratio" => $ratio
- ];
-
- continue 2;
- }
- }
- }
-
- // Detect rating
- $spans_unfiltered =
- $this->fuckhtml
- ->getElementsByTagName(
- "span"
- );
-
- $spans =
- $this->fuckhtml
- ->getElementsByAttributeName(
- "aria-label",
- $spans_unfiltered
- );
-
- foreach($spans as $span){
-
- if(
- preg_match(
- '/^Rated/',
- $span["attributes"]["aria-label"]
- )
- ){
-
- // found rating
- // scrape rating
- preg_match(
- '/([0-9.]+).*([0-9.]+)/',
- $span["attributes"]["aria-label"],
- $rating
- );
-
- if(isset($rating[1])){
-
- $web["table"]["Rating"] =
- $rating[1] . "/" . $rating[2];
- }
-
- $has_seen_reviews = 0;
- foreach($spans_unfiltered as $span_unfiltered){
-
- if(
- preg_match(
- '/([0-9,.]+) +([A-z]+)$/',
- $this->fuckhtml
- ->getTextContent(
- $span_unfiltered
- ),
- $votes
- )
- ){
-
- $has_seen_reviews++;
- $web["table"][ucfirst($votes[2])] = $votes[1];
- continue;
- }
-
- $text =
- $this->fuckhtml
- ->getTextContent(
- $span_unfiltered
- );
-
- if(
- $text == " " ||
- $text == ""
- ){
-
- break;
- }
-
- switch($has_seen_reviews){
-
- case 1:
- // scrape price
- $web["table"]["Price"] = $text;
- $has_seen_reviews++;
- break;
-
- case 2:
- // scrape platform
- $web["table"]["Platform"] = $text;
- $has_seen_reviews++;
- break;
-
- case 3:
- // Scrape type
- $web["table"]["Medium"] = $text;
- break;
- }
- }
-
- continue 2;
- }
- }
-
- // check if its a table of small sublinks
- $table =
- $this->fuckhtml
- ->getElementsByClassName(
- $this->getstyle(
- [
- "display" => "table",
- "white-space" => "nowrap",
- "margin" => "5px 0",
- "line-height" => "1.58",
- "color" => "var(" . $this->getcolorvar("#70757a") . ")"
- ]
- ),
- "div"
- );
-
- if(count($table) !== 0){
-
- $this->fuckhtml->load($table[0]);
-
- $rows =
- $this->fuckhtml
- ->getElementsByClassName(
- $this->getstyle(
- [
- "display" => "flex",
- "white-space" => "normal"
- ]
- ),
- "div"
- );
-
- foreach($rows as $row){
-
- $this->fuckhtml->load($row);
-
- $sublink = [
- "title" => null,
- "description" => null,
- "url" => null,
- "date" => null
- ];
-
- $link =
- $this->fuckhtml
- ->getElementsByTagName(
- "a"
- )[0];
-
- $sublink["title"] =
- $this->titledots(
- $this->fuckhtml
- ->getTextContent(
- $link
- )
- );
-
- $sublink["url"] =
- $this->unshiturl(
- $link
- ["attributes"]
- ["href"]
- );
-
- $row["innerHTML"] =
- str_replace(
- $link["outerHTML"],
- "",
- $row["innerHTML"]
- );
-
- $this->fuckhtml->load($row);
-
- $spans =
- $this->fuckhtml
- ->getElementsByTagName(
- "span"
- );
-
- foreach($spans as $span){
-
- $text =
- $this->fuckhtml
- ->getTextContent(
- $span
- );
-
- if(
- preg_match(
- '/answers?$/',
- $text
- )
- ){
-
- $sublink["description"] =
- $text;
-
- continue;
- }
-
- $time = strtotime($text);
-
- if($time !== false){
-
- $sublink["date"] = $time;
- }
- }
-
- $web["sublink"][] = $sublink;
- }
-
- // reset
- $this->fuckhtml->load($cat);
- continue;
- }
-
- // check if its an answer header
- $answer_header =
- $this->fuckhtml
- ->getElementsByClassName(
- $this->getstyle(
- [
- "overflow" => "hidden",
- "text-overflow" => "ellipsis"
- ]
- ),
- "span"
- );
-
- if(count($answer_header) !== 0){
-
- $link =
- $this->fuckhtml
- ->getElementsByTagName(
- "a"
- );
-
- $cat["innerHTML"] =
- str_replace(
- $link[0]["outerHTML"],
- "",
- $cat["innerHTML"]
- );
-
- $web["sublink"][] = [
- "title" =>
- $this->fuckhtml
- ->getTextContent(
- $link[0]
- ),
- "description" =>
- $this->titledots(
- trim(
- str_replace(
- "\xc2\xa0",
- " ",
- html_entity_decode(
- $this->fuckhtml
- ->getTextContent(
- $cat
- )
- )
- ),
- " ·"
- )
- ),
- "url" =>
- $this->fuckhtml
- ->getTextContent(
- $link[0]
- ["attributes"]
- ["href"]
- ),
- "date" => null
- ];
-
- continue;
- }
-
- // check if its list of small sublinks
- $urls =
- $this->fuckhtml
- ->getElementsByTagName(
- "a"
- );
-
- if(count($urls) !== 0){
-
- // found small links
- foreach($urls as $url){
-
- $target =
- $this->fuckhtml
- ->getTextContent(
- $url
- ["attributes"]
- ["href"]
- );
-
- if(
- !preg_match(
- '/^http/',
- $target
- )
- ){
-
- continue;
- }
-
- $web["sublink"][] = [
- "title" =>
- $this->titledots(
- $this->fuckhtml
- ->getTextContent(
- $url
- )
- ),
- "description" => null,
- "url" => $target,
- "date" => null
- ];
- }
-
- continue;
- }
-
- // we probed everything, assume this is the description
- // if we didn't find one cleanly previously
- if($web["description"] === null){
- $web["description"] =
- $this->titledots(
- $this->fuckhtml
- ->getTextContent(
- $cat
- )
- );
- }
- }
-
- // check if description contains date
- $description = explode("—", $web["description"], 2);
-
- if(
- count($description) === 2 &&
- strlen($description[0]) <= 20
- ){
-
- $date = strtotime($description[0]);
-
- if($date !== false){
-
- $web["date"] = $date;
- $web["description"] = ltrim($description[1]);
- }
- }
-
- // fetch youtube thumbnail
- $thumbnail =
- $this->fuckhtml
- ->getElementsByClassName(
- $this->getstyle(
- [
- "border-radius" => "8px",
- "height" => "fit-content",
- "justify-content" => "center",
- "margin-right" => "20px",
- "margin-top" => "4px",
- "position" => "relative",
- "width" => "fit-content"
- ]
- ),
- "div"
- );
-
- if(count($thumbnail) !== 0){
-
- // load thumbnail container
- $this->fuckhtml->load($thumbnail[0]);
-
- $image =
- $this->fuckhtml
- ->getElementsByTagName(
- "img"
- );
-
- if(
- count($image) !== 0 &&
- isset($image[0]["attributes"]["id"])
- ){
-
- $web["thumb"] = [
- "url" =>
- $this->unshit_thumb(
- $this->getdimg(
- $image[0]["attributes"]["id"]
- )
- ),
- "ratio" => "16:9"
- ];
- }
-
- // reset
- $this->fuckhtml->load($result);
- }
-
- $out["web"][] = $web;
- }
-
- // reset
- $this->fuckhtml->load($result_div);
-
- //
- // Get instant answers
- //
- $answer_containers =
- $this->fuckhtml
- ->getElementsByClassName(
- $this->getstyle(
- [
- "padding-left" => "0px",
- "padding-right" => "0px"
- ]
- ),
- "div"
- );
-
- $date_class =
- $this->getstyle(
- [
- "font-size" => "12px",
- "line-height" => "1.34",
- "display" => "inline-block",
- "font-family" => "google sans,arial,sans-serif",
- "padding-right" => "0",
- "white-space" => "nowrap"
- ]
- );
-
- foreach($answer_containers as $container){
-
- $this->fuckhtml->load($container);
-
- $web = [
- "title" => null,
- "description" => null,
- "url" => null,
- "date" => null,
- "type" => "web",
- "thumb" => [
- "url" => null,
- "ratio" => null
- ],
- "sublink" => [],
- "table" => []
- ];
-
- $answers =
- $this->fuckhtml
- ->getElementsByAttributeName(
- "aria-controls",
- "div"
- );
-
- $item_insert_pos = 1;
- foreach($answers as $answer){
-
- $out["related"][] =
- $this->fuckhtml
- ->getTextContent(
- $answer
- );
-
- if(
- isset(
- $this->blobs[
- $answer
- ["attributes"]
- ["aria-controls"]
- ]
- )
- ){
-
- $this->fuckhtml->load(
- $this->blobs[
- $answer
- ["attributes"]
- ["aria-controls"]
- ]
- );
-
- $divs =
- $this->fuckhtml
- ->getElementsByAttributeName(
- "id",
- "div"
- );
-
- foreach($divs as $div){
-
- if(
- !isset(
- $this->blobs[
- $div
- ["attributes"]
- ["id"]
- ]
- )
- ){
-
- continue;
- }
-
- $this->fuckhtml->load(
- $this->blobs[
- $div
- ["attributes"]
- ["id"]
- ]
- );
-
- // get url
- $as =
- $this->fuckhtml
- ->getElementsByTagName(
- "a"
- );
-
- if(count($as) !== 0){
-
- $web["url"] =
- $this->unshiturl(
- $as[0]["attributes"]["href"]
- );
-
- // skip entries that redirect to a search
- if(
- !preg_match(
- '/^http/',
- $web["url"]
- )
- ){
-
- continue 3;
- }
- }
-
- // get title
- $h3 =
- $this->fuckhtml
- ->getElementsByTagName(
- "h3"
- );
-
- if(count($h3) !== 0){
-
- $web["title"] =
- $this->titledots(
- $this->fuckhtml
- ->getTextContent(
- $h3[0]
- )
- );
- }
-
- $description =
- $this->fuckhtml
- ->getElementsByAttributeValue(
- "data-attrid",
- "wa:/description",
- "div"
- );
-
- if(count($description) !== 0){
-
- // check for date
- $this->fuckhtml->load($description[0]);
-
- $date =
- $this->fuckhtml
- ->getElementsByClassName(
- $date_class,
- "span"
- );
-
- if(count($date) !== 0){
-
- $description[0]["innerHTML"] =
- str_replace(
- $date[0]["outerHTML"],
- "",
- $description[0]["innerHTML"]
- );
-
- $web["date"] =
- strtotime(
- $this->fuckhtml
- ->getTextContent(
- $date[0]
- )
- );
- }
-
- $web["description"] =
- ltrim(
- $this->fuckhtml
- ->getTextContent(
- $description[0]
- ),
- ": "
- );
- }
- }
-
- foreach($out["web"] as $item){
-
- if($item["url"] == $web["url"]){
-
- continue 2;
- }
- }
-
- array_splice($out["web"], $item_insert_pos, 0, [$web]);
- $item_insert_pos++;
- }
- }
- }
-
- // reset
- $this->fuckhtml->load($result_div);
-
- //
- // Scrape word definition
- //
- $definition_container =
- $this->fuckhtml
- ->getElementsByClassName(
- "lr_container",
- "div"
- );
-
- if(count($definition_container) !== 0){
-
- $this->fuckhtml->load($definition_container[0]);
-
- // get header
- $header =
- $this->fuckhtml
- ->getElementsByAttributeValue(
- "data-attrid",
- "EntryHeader",
- "div"
- );
-
- if(count($header) !== 0){
-
- $description = [];
-
- $this->fuckhtml->load($header[0]);
-
- $title_div =
- $this->fuckhtml
- ->getElementsByClassName(
- $this->getstyle(
- [
- "font-family" => "google sans,arial,sans-serif",
- "font-size" => "28px",
- "line-height" => "36px"
- ]
- )
- );
-
- if(count($title_div) !== 0){
-
- $title =
- $this->fuckhtml
- ->getTextContent(
- $title_div[0]
- );
- }else{
-
- $title = "Word definition";
- }
-
- $subtext_div =
- $this->fuckhtml
- ->getElementsByClassName(
- $this->getstyle(
- [
- "font-family" => "arial,sans-serif",
- "font-size" => "14px",
- "line-height" => "22px"
- ]
- ),
- "span"
- );
-
- if(count($subtext_div) !== 0){
-
- $description[] = [
- "type" => "quote",
- "value" =>
- $this->fuckhtml
- ->getTextContent(
- $subtext_div[0]
- )
- ];
- }
-
- // get audio
- $audio =
- $this->fuckhtml
- ->getElementsByTagName(
- "audio"
- );
-
- if(count($audio) !== 0){
-
- $this->fuckhtml->load($audio[0]);
-
- $source =
- $this->fuckhtml
- ->getElementsByTagName(
- "source"
- );
-
- if(count($source) !== 0){
-
- $description[] = [
- "type" => "audio",
- "url" =>
- preg_replace(
- '/^\/\//',
- "https://",
- $this->fuckhtml
- ->getTextContent(
- $source[0]
- ["attributes"]
- ["src"]
- )
- )
- ];
- }
-
- }
-
- // remove header to avoid confusion
- $definition_container[0]["innerHTML"] =
- str_replace(
- $header[0]["outerHTML"],
- "",
- $definition_container[0]["innerHTML"]
- );
-
- // reset
- $this->fuckhtml->load($definition_container[0]);
-
- $vmods =
- $this->fuckhtml
- ->getElementsByClassName(
- "vmod",
- "div"
- );
-
- foreach($vmods as $category){
-
- if(
- !isset(
- $category
- ["attributes"]
- ["data-topic"]
- ) ||
- $category
- ["attributes"]
- ["class"] != "vmod"
- ){
-
- continue;
- }
-
- $this->fuckhtml->load($category);
-
- // get category type
- $type =
- $this->fuckhtml
- ->getElementsByTagName(
- "i"
- );
-
- if(count($type) !== 0){
-
- $description[] = [
- "type" => "title",
- "value" =>
- $this->fuckhtml
- ->getTextContent(
- $type[0]
- )
- ];
- }
-
- // get heading text
- $headings =
- $this->fuckhtml
- ->getElementsByClassName(
- "xpdxpnd",
- "div"
- );
-
- foreach($headings as $heading){
-
- $description[] = [
- "type" => "quote",
- "value" =>
- $this->fuckhtml
- ->getTextContent(
- $heading
- )
- ];
- }
-
- $definitions =
- $this->fuckhtml
- ->getElementsByAttributeValue(
- "data-attrid",
- "SenseDefinition",
- "div"
- );
-
- $i = 1;
- $text = [];
-
- foreach($definitions as $definition){
-
- $text[] =
- $i . ". " .
- $this->fuckhtml
- ->getTextContent(
- $definition
- );
-
- $i++;
- }
-
- if(count($text) !== 0){
-
- $description[] = [
- "type" => "text",
- "value" =>
- implode("\n", $text)
- ];
- }
- }
-
- $out["answer"][] = [
- "title" => $title,
- "description" => $description,
- "url" => null,
- "thumb" => null,
- "table" => [],
- "sublink" => []
- ];
- }
-
- // reset
- $this->fuckhtml->load($result_div);
- }
-
- //
- // scrape elements with a g-section-with-header
- // includes: images, news carousels
- //
-
- $g_sections =
- $this->fuckhtml
- ->getElementsByTagName(
- "g-section-with-header"
- );
-
- if(count($g_sections) !== 0){
- foreach($g_sections as $g_section){
-
- // parse elements with a g-section-with-header
- $this->fuckhtml->load($g_section);
-
- $div_title =
- $this->fuckhtml
- ->getElementsByClassName(
- "a-no-hover-decoration",
- "a"
- );
-
- if(count($div_title) !== 0){
-
- // title detected, skip
- continue;
- }
-
- // no title detected: detect news container
- $news =
- $this->fuckhtml
- ->getElementsByClassName(
- $this->getstyle(
- [
- "outline-offset" => "-1px",
- "outline-width" => "1px",
- "display" => "flex",
- "flex-direction" => "column",
- "flex-grow" => "1"
- ]
- )
- );
-
- foreach($news as $new){
-
- $this->fuckhtml->load($new);
-
- $image =
- $this->fuckhtml
- ->getElementsByAttributeName(
- "id",
- "img"
- );
-
- if(
- count($image) !== 0 &&
- !(
- isset($image[0]["attributes"]["style"]) &&
- strpos(
- $image[0]["attributes"]["style"],
- "height:18px"
- ) !== false
- )
- ){
-
- $thumb = [
- "url" =>
- $this->getdimg(
- $image[0]
- ["attributes"]
- ["id"]
- ),
- "ratio" => "1:1"
- ];
- }
-
- $title =
- $this->titledots(
- $this->fuckhtml
- ->getTextContent(
- $this->fuckhtml
- ->getElementsByAttributeValue(
- "role",
- "heading",
- "div"
- )[0]
- )
- );
-
- $date_div =
- $this->fuckhtml
- ->getElementsByAttributeName(
- "style",
- "div"
- );
-
- if(count($date_div) !== 0){
-
- foreach($date_div as $div){
-
- if(
- strpos(
- $div["attributes"]["style"],
- "bottom:"
- ) !== false
- ){
- $date =
- strtotime(
- $this->fuckhtml
- ->getTextContent(
- $div
- )
- );
-
- break;
- }
- }
- }else{
-
- $date = null;
- }
-
- $out["news"][] = [
- "title" => $title,
- "description" => null,
- "date" => $date,
- "thumb" => $thumb,
- "url" =>
- $this->fuckhtml
- ->getTextContent(
- $new
- ["attributes"]
- ["href"]
- )
- ];
- }
- }
-
- // reset
- $this->fuckhtml->load($result_div);
- }
-
- //
- // Parse images (carousel, left hand-side)
- //
- $image_carousels =
- $this->fuckhtml
- ->getElementsByAttributeValue(
- "id",
- "media_result_group",
- "div"
- );
-
- if(count($image_carousels) !== 0){
-
- foreach($image_carousels as $image_carousel){
-
- $this->fuckhtml->load($image_carousel);
-
- // get related searches in image carousel
- $relateds =
- $this->fuckhtml
- ->getElementsByClassName(
- $this->getstyle(
- [
- "display" => "inline-block",
- "margin-right" => "6px",
- "outline" => "none",
- "padding" => "6px 0"
- ],
- "a"
- )
- );
-
- foreach($relateds as $related){
-
- if(!isset($related["innerHTML"])){
-
- // found an image
- continue;
- }
-
- $text =
- $this->fuckhtml
- ->getTextContent(
- $related
- );
-
- if($text != ""){
-
- $out["related"][] = $text;
- }
- }
-
- $div =
- $this->fuckhtml
- ->getElementsByTagName(
- "div"
- );
-
- // get loaded images
- $images =
- $this->fuckhtml
- ->getElementsByClassName(
- "ivg-i",
- $div
- );
-
- foreach($images as $image){
-
- $this->fuckhtml->load($image);
-
- $img_tags =
- $this->fuckhtml
- ->getElementsByTagName(
- "img"
- );
-
- if(
- !isset($image["attributes"]["data-docid"]) ||
- !isset($this->image_arr[$image["attributes"]["data-docid"]])
- ){
-
- continue;
- }
-
- // search for the right image tag
- $image_tag = false;
- foreach($img_tags as $img){
-
- if(
- isset(
- $img
- ["attributes"]
- ["alt"]
- ) &&
- trim(
- $img
- ["attributes"]
- ["alt"]
- ) != ""
- ){
-
- $image_tag = $img;
- break;
- }
- }
-
- if($image_tag === false){
-
- continue;
- }
-
- $out["image"][] = [
- "title" =>
- $this->titledots(
- $this->fuckhtml
- ->getTextContent(
- $image_tag
- ["attributes"]
- ["alt"]
- )
- ),
- "source" =>
- $this->image_arr[
- $image
- ["attributes"]
- ["data-docid"]
- ],
- "url" =>
- $this->fuckhtml
- ->getTextContent(
- $image
- ["attributes"]
- ["data-lpage"]
- )
- ];
- }
-
- // get unloaded javascript images
- $images_js_sel =
- $this->fuckhtml
- ->getElementsByAttributeName(
- "id",
- $div
- );
-
- $loaded = [];
-
- foreach($images_js_sel as $sel){
-
- if(
- !isset($this->blobs[$sel["attributes"]["id"]]) ||
- in_array((string)$sel["attributes"]["id"], $loaded, true)
- ){
-
- // not an unloaded javascript image
- continue;
- }
-
- $loaded[] = $sel["attributes"]["id"];
-
- // get yet another javascript component
- $this->fuckhtml->load($this->blobs[$sel["attributes"]["id"]]);
-
- // get js node: contains title & url
- $js_node =
- $this->fuckhtml
- ->getElementsByTagName(
- "div"
- )[0];
-
- if(!isset($this->blobs[$js_node["attributes"]["id"]])){
-
- // did not find refer id
- continue;
- }
-
- // load second javascript component
- $this->fuckhtml->load($this->blobs[$js_node["attributes"]["id"]]);
-
- // get title from image alt text.
- // data-src from this image is cropped, ignore it..
- $img =
- $this->fuckhtml
- ->getElementsByTagName(
- "img"
- )[0];
-
- $out["image"][] = [
- "title" =>
- $this->fuckhtml
- ->getTextContent(
- $img["attributes"]["alt"]
- ),
- "source" =>
- $this->image_arr[
- $js_node["attributes"]["data-docid"]
- ],
- "url" =>
- $this->fuckhtml
- ->getTextContent(
- $js_node["attributes"]["data-lpage"]
- )
- ];
- }
- }
-
- // reset
- $this->fuckhtml->load($result_div);
- }
-
- //
- // Parse videos
- //
- $this->fuckhtml->load($result_div);
-
- $videos =
- $this->fuckhtml
- ->getElementsByAttributeName(
- "data-vid",
- "div"
- );
-
- foreach($videos as $video){
-
- $this->fuckhtml->load($video);
-
- // get url
- $url =
- $this->fuckhtml
- ->getTextContent(
- $video
- ["attributes"]
- ["data-surl"]
- );
-
- foreach($out["web"] as $link){
-
- if($link["url"] == $url){
-
- // ignore if we already have the video in $out["web"]
- continue 2;
- }
- }
-
- // get heading element
- $heading =
- $this->fuckhtml
- ->getElementsByAttributeValue(
- "role",
- "heading",
- "div"
- );
-
- if(count($heading) === 0){
-
- // no heading, fuck this.
- continue;
- }
-
- // get thumbnail before loading heading object
- $image =
- $this->fuckhtml
- ->getElementsByAttributeName(
- "id",
- "img"
- );
-
- if(count($image) !== 0){
-
- $thumb = [
- "url" => $this->getdimg($image[0]["attributes"]["id"]),
- "ratio" => "16:9"
- ];
- }else{
-
- $thumb = [
- "url" => null,
- "ratio" => null
- ];
- }
-
- // get duration
- $duration_div =
- $this->fuckhtml
- ->getElementsByClassName(
- $this->getstyle(
- [
- "border-radius" => "10px",
- "font-family" => "arial,sans-serif-medium,sans-serif",
- "font-size" => "12px",
- "line-height" => "16px",
- "padding-block" => "2px",
- "padding-inline" => "8px"
- ]
- ),
- "div"
- );
-
- if(count($duration_div) !== 0){
-
- $duration =
- $this->hms2int(
- $this->fuckhtml
- ->getTextContent(
- $duration_div[0]
- )
- );
- }else{
-
- // check if its a livestream
- $duration =
- $this->fuckhtml
- ->getElementsByClassName(
- $this->getstyle(
- [
- "background-color" => "#d93025",
- "border-radius" => "10px",
- "color" => "#fff",
- "font-family" => "arial,sans-serif-medium,sans-serif",
- "font-size" => "12px",
- "line-height" => "16px",
- "padding-block" => "2px",
- "padding-inline" => "8px"
- ]
- ),
- "span"
- );
-
- if(count($duration) !== 0){
-
- $duration = "_LIVE";
- }else{
-
- $duration = null;
- }
- }
-
- // load heading
- $this->fuckhtml->load($heading[0]);
-
- // get title
- $title =
- $this->fuckhtml
- ->getElementsByClassName(
- $this->getstyle(
- [
- "font-family" => "arial,sans-serif",
- "font-size" => "16px",
- "font-weight" => "400",
- "line-height" => "24px"
- ]
- ),
- "div"
- );
-
- if(count($title) === 0){
-
- // ?? no title
- continue;
- }
-
- $title =
- $this->titledots(
- $this->fuckhtml
- ->getTextContent(
- $title[0]
- )
- );
-
- // get date
- $date_div =
- $this->fuckhtml
- ->getElementsByClassName(
- $this->getstyle(
- [
- "color" => "var(" . $this->getcolorvar("#70757a") . ")",
- "font-size" => "14px"
- ]
- ),
- "div"
- );
-
- if(count($date_div) !== 0){
-
- $date = strtotime(
- $this->fuckhtml
- ->getTextContent(
- $date_div[0]
- )
- );
-
- if($date === false){
-
- // failed to parse date
- $date = null;
- }
- }else{
-
- $date = null;
- }
-
- $out["video"][] = [
- "title" => $title,
- "description" => null,
- "date" => $date,
- "duration" => $duration,
- "views" => null,
- "thumb" => $thumb,
- "url" => $url
- ];
- }
-
- //
- // Parse featured results (which contain images, fuck the rest desu)
- //
- $this->fuckhtml->load($html);
- $top =
- $this->fuckhtml
- ->getElementsByAttributeValue(
- "aria-label",
- "Featured results",
- "div"
- );
-
- if(count($top) !== 0){
-
- $this->fuckhtml->load($top[0]);
-
- // get images
- $grid =
- $this->fuckhtml
- ->getElementsByClassName(
- $this->getstyle(
- [
- "border-radius" => "20px",
- "display" => "grid",
- "grid-gap" => "2px",
- "grid-template-rows" => "repeat(2,minmax(0,1fr))",
- "overflow" => "hidden",
- "bottom" => "0",
- "left" => "0",
- "right" => "0",
- "top" => "0",
- "position" => "absolute",
- ]
- ),
- "div"
- );
-
- if(count($grid) !== 0){
-
- // we found image grid
- $this->fuckhtml->load($grid[0]);
-
- $images_div =
- $this->fuckhtml
- ->getElementsByAttributeName(
- "data-attrid",
- "div"
- );
-
- foreach($images_div as $image_div){
-
- $this->fuckhtml->load($image_div);
-
- $image =
- $this->fuckhtml
- ->getElementsByTagName(
- "img"
- );
-
- if(
- count($image) === 0 ||
- !isset($image_div["attributes"]["data-docid"]) ||
- !isset($this->image_arr[$image_div["attributes"]["data-docid"]])
- ){
-
- // ?? no image, continue
- continue;
- }
-
- $out["image"][] = [
- "title" =>
- $this->titledots(
- $this->fuckhtml
- ->getTextContent(
- $image[0]["attributes"]["alt"]
- )
- ),
- "source" =>
- $this->image_arr[
- $image_div["attributes"]["data-docid"]
- ],
- "url" =>
- $this->fuckhtml
- ->getTextContent(
- $image_div["attributes"]["data-lpage"]
- )
- ];
- }
- }
- }
-
-
- //
- // craft $npt token
- //
- if(
- $last_page === false &&
- count($out["web"]) !== 0
- ){
- if(!isset($params["start"])){
-
- $params["start"] = 20;
- }else{
-
- $params["start"] += 20;
- }
-
- $out["npt"] =
- $this->backend
- ->store(
- json_encode($params),
- $pagetype,
- $proxy
- );
- }
-
-
- //
- // Parse right handside
- //
- $this->fuckhtml->load($html);
-
- $rhs =
- $this->fuckhtml
- ->getElementById(
- "rhs"
- );
-
- if($rhs === null){
-
- return $out;
- }
-
- $this->fuckhtml->load($rhs);
-
- // get images gallery
- $image_gallery =
- $this->fuckhtml
- ->getElementsByAttributeValue(
- "data-rc",
- "ivg-i",
- "div"
- );
-
- if(count($image_gallery) !== 0){
-
- $this->fuckhtml->load($image_gallery[0]);
-
- // get images
- $images_div =
- $this->fuckhtml
- ->getElementsByClassName(
- "ivg-i",
- "div"
- );
-
- foreach($images_div as $image_div){
-
- $this->fuckhtml->load($image_div);
-
- $image =
- $this->fuckhtml
- ->getElementsByTagName(
- "img"
- );
-
- if(
- count($image) === 0 ||
- !isset(
- $this->image_arr[
- $image_div
- ["attributes"]
- ["data-docid"]
- ]
- )
- ){
-
- continue;
- }
-
- foreach($out["image"] as $existing_image){
-
- // might already exist
- if(
- $existing_image["source"][1]["url"] ==
- $this->image_arr[
- $image_div
- ["attributes"]
- ["data-docid"]
- ][1]["url"]
- ){
-
- continue 2;
- }
- }
-
- $out["image"][] = [
- "title" =>
- $this->titledots(
- $this->fuckhtml
- ->getTextContent(
- $image[0]
- ["attributes"]
- ["alt"]
- )
- ),
- "source" =>
- $this->image_arr[
- $image_div
- ["attributes"]
- ["data-docid"]
- ],
- "url" =>
- $this->fuckhtml
- ->getTextContent(
- $image_div
- ["attributes"]
- ["data-lpage"]
- )
- ];
- }
-
- // reset
- $this->fuckhtml->load($rhs);
- }
-
- // get header container
- $header =
- $this->fuckhtml
- ->getElementsByClassName(
- $this->getstyle(
- [
- "padding" => "0 0 16px 20px",
- "display" => "flex"
- ]
- ),
- "div"
- );
-
- // stop parsing wikipedia heads if there isn't a header
- $description = [];
- $title = "About";
-
- if(count($header) !== 0){
-
- $this->fuckhtml->load($header[0]);
-
- // g-snackbar-action present: we found a button instead
- if(
- count(
- $this->fuckhtml
- ->getElementsByTagName(
- "g-snackbar-action"
- )
- ) !== 0
- ){
-
- $title_tag =
- $this->fuckhtml
- ->getElementsByAttributeValue(
- "data-attrid",
- "title",
- "div"
- );
-
- if(count($title_tag) !== 0){
- $title =
- $this->fuckhtml
- ->getTextContent(
- $title_tag[0]
- );
-
- $header[0]["innerHTML"] =
- str_replace(
- $title_tag[0]["outerHTML"],
- "",
- $header[0]["innerHTML"]
- );
-
- // if header still contains text, add it as a subtitle in description
- $subtitle =
- $this->fuckhtml
- ->getTextContent(
- $header[0]
- );
-
- if(strlen($subtitle) !== 0){
-
- $description[] = [
- "type" => "quote",
- "value" => $subtitle
- ];
- }
- }
- }
-
- // reset
- $this->fuckhtml->load($rhs);
- }
-
- // get description elements
- $url = null;
-
- $text =
- $this->fuckhtml
- ->getElementsByAttributeValue(
- "data-attrid",
- "description",
- "div"
- );
-
- if(count($text) !== 0){
-
- $this->fuckhtml->load($text[0]);
-
- $a =
- $this->fuckhtml
- ->getElementsByTagName(
- "a"
- );
-
- if(count($a) !== 0){
- // get link and remove it from description
-
- $a = $a[count($a) - 1];
-
- $text[0]["innerHTML"] =
- str_replace(
- $a["outerHTML"],
- "",
- $text[0]["innerHTML"]
- );
-
- $url =
- $this->fuckhtml
- ->getTextContent(
- $a
- ["attributes"]
- ["href"]
- );
- }
-
- $description[] = [
- "type" => "text",
- "value" =>
- html_entity_decode(
- preg_replace(
- '/^Description/',
- "",
- $this->fuckhtml
- ->getTextContent(
- $text[0]
- )
- )
- )
- ];
-
- // reset
- $this->fuckhtml->load($rhs);
- }
-
- // get reviews (google play, steam, etc)
- $review_container =
- $this->fuckhtml
- ->getElementsByClassName(
- $this->getstyle(
- [
- "align-items" => "start",
- "display" => "flex"
- ]
- ),
- "div"
- );
-
- if(count($review_container) !== 0){
-
- $this->fuckhtml->load($review_container[0]);
-
- $as =
- $this->fuckhtml
- ->getElementsByTagName(
- "a"
- );
-
- if(count($as) !== 0){
-
- $description[] = [
- "type" => "title",
- "value" => "Ratings"
- ];
-
- foreach($as as $a){
-
- $this->fuckhtml->load($a);
-
- $spans =
- $this->fuckhtml
- ->getElementsByTagName(
- "span"
- );
-
- if(count($spans) >= 2){
-
- $value =
- trim(
- $this->fuckhtml
- ->getTextContent(
- $spans[1]
- ),
- "· "
- );
-
- if(
- $value == "" &&
- isset($spans[2])
- ){
-
- $value =
- $this->fuckhtml
- ->getTextContent(
- $spans[2]
- );
- }
-
- $description[] = [
- "type" => "link",
- "url" =>
- $this->fuckhtml
- ->getTextContent(
- $a["attributes"]
- ["href"]
- ),
- "value" => $value
- ];
-
- $description[] = [
- "type" => "text",
- "value" =>
- ": " .
- $this->fuckhtml
- ->getTextContent(
- $spans[0]
- ) . "\n"
- ];
- }
- }
- }
-
- // reset
- $this->fuckhtml->load($rhs);
- }
-
- // initialize sublinks
- $sublinks = [];
-
- // get description from business
- if(count($description) === 0){
-
- $data_attrid =
- $this->fuckhtml
- ->getElementsByAttributeName(
- "data-attrid"
- );
-
- $summary =
- $this->fuckhtml
- ->getElementsByAttributeValue(
- "data-attrid",
- "kc:/local:one line summary",
- $data_attrid
- );
-
- if(count($summary) !== 0){
-
- $description[] = [
- "type" => "quote",
- "value" =>
- $this->fuckhtml
- ->getTextContent(
- $summary[0]
- )
- ];
-
- // remove summary so it doesnt get parsed as a table
- $rhs["innerHTML"] =
- str_replace(
- $summary[0]["outerHTML"],
- "",
- $rhs["innerHTML"]
- );
-
- $this->fuckhtml->load($rhs);
- }
-
- $address =
- $this->fuckhtml
- ->getElementsByAttributeValue(
- "data-attrid",
- "kc:/location/location:address",
- $data_attrid
- );
-
- if(count($address) !== 0){
-
- $description[] = [
- "type" => "text",
- "value" =>
- $this->fuckhtml
- ->getTextContent(
- $address[0]
- )
- ];
- }
-
- // get title
- $title_div =
- $this->fuckhtml
- ->getElementsByAttributeValue(
- "data-attrid",
- "title",
- $data_attrid
- );
-
- if(count($title_div) !== 0){
-
- $title =
- $this->fuckhtml
- ->getTextContent(
- $title_div[0]
- );
- }
-
- // get phone number
- $phone =
- $this->fuckhtml
- ->getElementsByAttributeValue(
- "data-attrid",
- "kc:/local:alt phone",
- $data_attrid
- );
-
- if(count($phone) !== 0){
-
- $this->fuckhtml->load($phone[0]);
-
- $sublinks["Call"] =
- "tel:" .
- $this->fuckhtml
- ->getTextContent(
- $this->fuckhtml
- ->getElementsByAttributeName(
- "aria-label",
- "span"
- )[0]
- );
-
- $this->fuckhtml->load($rhs);
- }
- }
-
- if(count($description) === 0){
-
- // still no description? abort
- return $out;
- }
-
- // get table elements
- $table = [];
- $table_elems =
- $this->fuckhtml
- ->getElementsByClassName(
- $this->getstyle(
- [
- "margin-top" => "7px"
- ]
- ),
- "div"
- );
-
- foreach($table_elems as $elem){
-
- $this->fuckhtml->load($elem);
-
- $spans =
- $this->fuckhtml
- ->getElementsByTagName(
- "span"
- );
-
- if(count($spans) === 0){
-
- // ?? invalid
- continue;
- }
-
- $elem["innerHTML"] =
- str_replace(
- $spans[0]["outerHTML"],
- "",
- $elem["innerHTML"]
- );
-
- $key =
- rtrim(
- $this->fuckhtml
- ->getTextContent(
- $spans[0]
- ),
- ": "
- );
-
- if(
- $key == "" ||
- $key == "Phone"
- ){
-
- continue;
- }
-
- if($key == "Hours"){
-
- $hours = [];
-
- $this->fuckhtml->load($elem);
-
- $trs =
- $this->fuckhtml
- ->getElementsByTagName(
- "tr"
- );
-
- foreach($trs as $tr){
-
- $this->fuckhtml->load($tr);
-
- $tds =
- $this->fuckhtml
- ->getElementsByTagName(
- "td"
- );
-
- if(count($tds) === 2){
-
- $hours[] =
- $this->fuckhtml
- ->getTextContent(
- $tds[0]
- ) . ": " .
- $this->fuckhtml
- ->getTextContent(
- $tds[1]
- );
- }
- }
-
- if(count($hours) !== 0){
-
- $hours = implode("\n", $hours);
- $table["Hours"] = $hours;
- }
-
- continue;
- }
-
- $table[$key] =
- preg_replace(
- '/ +/',
- " ",
- $this->fuckhtml
- ->getTextContent(
- $elem
- )
- );
- }
-
- // reset
- $this->fuckhtml->load($rhs);
-
- // get the website div
- $as =
- $this->fuckhtml
- ->getElementsByAttributeValue(
- "data-attrid",
- "visit_official_site",
- "a"
- );
-
- if(count($as) !== 0){
-
- $sublinks["Website"] =
- str_replace(
- "http://",
- "https://",
- $this->fuckhtml
- ->getTextContent(
- $as[0]
- ["attributes"]
- ["href"]
- )
- );
- }else{
-
- // get website through button
- $button =
- $this->fuckhtml
- ->getElementsByClassName(
- "ab_button",
- "a"
- );
-
- if(count($button) !== 0){
-
- $sublinks["Website"] =
- $this->unshiturl(
- $this->fuckhtml
- ->getTextContent(
- $button[0]
- ["attributes"]
- ["href"]
- )
- );
- }
- }
-
- // get social media links
- $as =
- $this->fuckhtml
- ->getElementsByTagName(
- "g-link"
- );
-
- foreach($as as $a){
-
- $this->fuckhtml->load($a);
-
- $link =
- $this->fuckhtml
- ->getElementsByTagName(
- "a"
- );
-
- if(count($link) === 0){
-
- continue;
- }
-
- $sublink_title =
- $this->fuckhtml
- ->getTextContent(
- $a
- );
-
- if($sublink_title == "X (Twitter)"){
-
- $sublink_title = "Twitter";
- }
-
- $sublinks[$sublink_title] =
- $this->fuckhtml
- ->getTextContent(
- $link[0]
- ["attributes"]
- ["href"]
- );
- }
-
- // reset
- $this->fuckhtml->load($rhs);
-
- // get those round containers
- $containers =
- $this->fuckhtml
- ->getElementsByClassName(
- "tpa-ci"
- );
-
- foreach($containers as $container){
-
- $this->fuckhtml->load($container);
-
- $as =
- $this->fuckhtml
- ->getElementsByTagName(
- "a"
- );
-
- if(count($as) === 0){
-
- continue;
- }
-
- $sublinks[
- $this->fuckhtml
- ->getTextContent(
- $as[0]
- )
- ] =
- $this->fuckhtml
- ->getTextContent(
- $as[0]
- ["attributes"]
- ["href"]
- );
- }
-
- $out["answer"][] = [
- "title" => $title,
- "description" => $description,
- "url" => $url,
- "thumb" => null,
- "table" => $table,
- "sublink" => $sublinks
- ];
-
- return $out;
- }
-
-
- private function scrape_dimg($html){
-
- // get images loaded through javascript
- $this->dimg = [];
-
- preg_match_all(
- '/function\(\){google\.ldi=({.*?});/',
- $html,
- $dimg
- );
-
- if(isset($dimg[1])){
-
- foreach($dimg[1] as $i){
-
- $tmp = json_decode($i, true);
- foreach($tmp as $key => $value){
-
- $this->dimg[$key] =
- $this->unshit_thumb(
- $value
- );
- }
- }
- }
-
- // get additional javascript base64 images
- preg_match_all(
- '/var s=\'(data:image\/[^\']+)\';var ii=\[((?:\'[^\']+\',?)+)\];/',
- $html,
- $dimg
- );
-
- if(isset($dimg[1])){
-
- for($i=0; $i<count($dimg[1]); $i++){
-
- $delims = explode(",", $dimg[2][$i]);
- $string =
- $this->fuckhtml
- ->parseJsString(
- $dimg[1][$i]
- );
-
- foreach($delims as $delim){
-
- $this->dimg[trim($delim, "'")] = $string;
- }
- }
- }
- }
-
-
- private function scrape_imagearr($html){
- // get image links arrays
- preg_match_all(
- '/\[0,"([^"]+)",\["([^"]+)\",([0-9]+),([0-9]+)\],\["([^"]+)",([0-9]+),([0-9]+)\]/',
- $html,
- $image_arr
- );
-
- $this->image_arr = [];
- if(isset($image_arr[1])){
-
- for($i=0; $i<count($image_arr[1]); $i++){
-
- $this->image_arr[$image_arr[1][$i]] =
- [
- [
- "url" =>
- $this->fuckhtml
- ->parseJsString(
- $image_arr[5][$i]
- ),
- "width" => (int)$image_arr[7][$i],
- "height" => (int)$image_arr[6][$i]
- ],
- [
- "url" =>
- $this->unshit_thumb(
- $this->fuckhtml
- ->parseJsString(
- $image_arr[2][$i]
- )
- ),
- "width" => (int)$image_arr[4][$i],
- "height" => (int)$image_arr[3][$i]
- ]
- ];
- }
- }
- }
-
-
- private function getdimg($dimg){
-
- return isset($this->dimg[$dimg]) ? $this->dimg[$dimg] : null;
- }
-
-
- private function unshit_thumb($url){
- // https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcQINE2vbnNLHXqoZr3RVsaEJFyOsj1_BiBnJch-e1nyz3oia7Aj5xVj
- // https://i.ytimg.com/vi/PZVIyA5ER3Y/mqdefault.jpg?sqp=-oaymwEFCJQBEFM&rs=AMzJL3nXeaCpdIar-ltNwl82Y82cIJfphA
-
- $parts = parse_url($url);
-
- if(
- isset($parts["host"]) &&
- preg_match(
- '/tbn.*\.gstatic\.com/',
- $parts["host"]
- )
- ){
-
- parse_str($parts["query"], $params);
-
- if(isset($params["q"])){
-
- return "https://" . $parts["host"] . "/images?q=" . $params["q"];
- }
- }
-
- return $url;
- }
-
-
- private function parsestyles(){
-
- $styles = [];
- $style_div =
- $this->fuckhtml
- ->getElementsByTagName(
- "style"
- );
-
- $raw_styles = "";
-
- foreach($style_div as $style){
-
- $raw_styles .= $style["innerHTML"];
- }
-
- // filter out media/keyframe queries
- $raw_styles =
- preg_replace(
- '/@\s*(?!font-face)[^{]+\s*{[\S\s]+?}\s*}/',
- "",
- $raw_styles
- );
-
- // get styles
- preg_match_all(
- '/(.+?){([\S\s]*?)}/',
- $raw_styles,
- $matches
- );
-
- for($i=0; $i<count($matches[1]); $i++){
-
- // get style values
- preg_match_all(
- '/([^:;]+):([^;]*?(?:\([^)]+\)[^;]*?)?)(?:;|$)/',
- $matches[2][$i],
- $values_regex
- );
-
- $values = [];
- for($k=0; $k<count($values_regex[1]); $k++){
-
- $values[trim($values_regex[1][$k])] =
- strtolower(trim($values_regex[2][$k]));
- }
-
- $names = explode(",", $matches[1][$i]);
-
- // h1,h2,h3 will each get their own array index
- foreach($names as $name){
-
- $name = trim($name, "}\t\n\r\0\x0B");
-
- foreach($values as $key => $value){
-
- $styles[$name][$key] = $value;
- }
- }
- }
-
- foreach($styles as $key => $values){
-
- $styles[$key]["_c"] = count($values);
- }
-
- $this->styles = $styles;
-
- // get CSS colors
- $this->css_colors = [];
-
- if(isset($this->styles[":root"])){
-
- foreach($this->styles[":root"] as $key => $value){
-
- $this->css_colors[$value] = strtolower($key);
- }
- }
- }
-
-
-
- private function getstyle($styles){
-
- $styles["_c"] = count($styles);
-
- foreach($this->styles as $style_key => $style_values){
-
- if(count(array_intersect_assoc($style_values, $styles)) === $styles["_c"] + 1){
-
- $style_key =
- explode(" ", $style_key);
-
- $style_key = $style_key[count($style_key) - 1];
-
- return
- ltrim(
- str_replace(
- [".", "#"],
- " ",
- $style_key
- )
- );
- }
- }
-
- return false;
- }
-
-
-
- private function getcolorvar($color){
-
- if(isset($this->css_colors[$color])){
-
- return $this->css_colors[$color];
- }
-
- return null;
- }
-
-
-
- public function web($get){
-
- if($get["npt"]){
-
- [$params, $proxy] = $this->backend->get($get["npt"], "web");
- $params = json_decode($params, true);
-
- $search = $params["q"];
-
- }else{
- $search = $get["s"];
- $country = $get["country"];
- $nsfw = $get["nsfw"];
- $lang = $get["lang"];
- $older = $get["older"];
- $newer = $get["newer"];
- $spellcheck = $get["spellcheck"];
- $proxy = $this->backend->get_ip();
-
- $offset = 0;
-
- $params = [
- "q" => $search,
- "hl" => "en",
- "num" => 20 // get 20 results
- ];
-
- // country
- if($country != "any"){
-
- $params["gl"] = $country;
- }
-
- // nsfw
- $params["safe"] = $nsfw == "yes" ? "off" : "active";
-
- // language
- if($lang != "any"){
-
- $params["lr"] = "lang_" . $lang;
- }
-
- // generate tbs
- $tbs = [];
-
- // get date
- $older = $older === false ? null : date("m/d/Y", $older);
- $newer = $newer === false ? null : date("m/d/Y", $newer);
-
- if(
- $older !== null ||
- $newer !== null
- ){
-
- $tbs["cdr"] = "1";
- $tbs["cd_min"] = $newer;
- $tbs["cd_max"] = $older;
- }
-
- // spellcheck filter
- if($spellcheck == "no"){
-
- $params["nfpr"] = "1";
- }
-
- if(count($tbs) !== 0){
-
- $params["tbs"] = "";
-
- foreach($tbs as $key => $value){
-
- $params["tbs"] .= $key . ":" . $value . ",";
- }
-
- $params["tbs"] = rtrim($params["tbs"], ",");
- }
- }
-
- try{
- $html =
- $this->get(
- $proxy,
- "https://www.google.com/search",
- $params
- );
- }catch(Exception $error){
-
- throw new Exception("Failed to get HTML");
- }
-
- //$html = file_get_contents("scraper/google.html");
-
- return $this->parsepage($html, "web", $search, $proxy, $params);
- }
-
-
-
- public function video($get){
-
- if($get["npt"]){
-
- [$params, $proxy] = $this->backend->get($get["npt"], "video");
- $params = json_decode($params, true);
-
- $search = $params["q"];
-
- }else{
- $search = $get["s"];
- $country = $get["country"];
- $nsfw = $get["nsfw"];
- $older = $get["older"];
- $newer = $get["newer"];
- $duration = $get["duration"];
- $quality = $get["quality"];
- $captions = $get["captions"];
- $proxy = $this->backend->get_ip();
-
- $params = [
- "q" => $search,
- "tbm" => "vid",
- "hl" => "en",
- "num" => "20"
- ];
-
- // country
- if($country != "any"){
-
- $params["gl"] = $country;
- }
-
- // nsfw
- $params["safe"] = $nsfw == "yes" ? "off" : "active";
-
- $tbs = [];
-
- // get date
- $older = $older === false ? null : date("m/d/Y", $older);
- $newer = $newer === false ? null : date("m/d/Y", $newer);
-
- if(
- $older !== null ||
- $newer !== null
- ){
-
- $tbs["cdr"] = "1";
- $tbs["cd_min"] = $newer;
- $tbs["cd_max"] = $older;
- }
-
- // duration
- if($duration != "any"){
-
- $tbs[] = "dur:" . $duration;
- }
-
- // quality
- if($quality != "any"){
-
- $tbs[] = "hq:" . $quality;
- }
-
- // captions
- if($captions != "any"){
-
- $tbs[] = "cc:" . $captions;
- }
-
- // append tbs
- if(count($tbs) !== 0){
-
- $params["tbs"] =
- implode(",", $tbs);
- }
- }
-
- try{
- $html =
- $this->get(
- $proxy,
- "https://www.google.com/search",
- $params
- );
- }catch(Exception $error){
-
- throw new Exception("Failed to get HTML");
- }
-
- //$html = file_get_contents("scraper/google.html");
-
- $response = $this->parsepage($html, "videos", $search, $proxy, $params);
- $out = [
- "status" => "ok",
- "npt" => $response["npt"],
- "video" => [],
- "author" => [],
- "livestream" => [],
- "playlist" => [],
- "reel" => []
- ];
-
- foreach($response["web"] as $result){
-
- $out["video"][] = [
- "title" => $result["title"],
- "description" => $result["description"],
- "author" => [
- "name" => isset($result["table"]["Author"]) ? $result["table"]["Author"] : null,
- "url" => null,
- "avatar" => null
- ],
- "date" => $result["date"],
- "duration" => isset($result["table"]["Duration"]) ? $this->hms2int($result["table"]["Duration"]) : null,
- "views" => null,
- "thumb" => $result["thumb"],
- "url" => $result["url"]
- ];
- }
-
- return $out;
- }
-
-
-
- public function news($get){
-
- if($get["npt"]){
-
- [$req, $proxy] = $this->backend->get($get["npt"], "news");
- /*parse_str(
- parse_url($req, PHP_URL_QUERY),
- $search
- );*/
-
- try{
-
- $html =
- $this->get(
- $proxy,
- "https://www.google.com" . $req,
- []
- );
- }catch(Exception $error){
-
- throw new Exception("Failed to get HTML");
- }
-
- }else{
- $search = $get["s"];
- $country = $get["country"];
- $nsfw = $get["nsfw"];
- $older = $get["older"];
- $newer = $get["newer"];
- $sort = $get["sort"];
- $proxy = $this->backend->get_ip();
-
- $params = [
- "q" => $search,
- "tbm" => "nws",
- "hl" => "en",
- "num" => "20"
- ];
-
- // country
- if($country != "any"){
-
- $params["gl"] = $country;
- }
-
- // nsfw
- $params["safe"] = $nsfw == "yes" ? "off" : "active";
-
- $tbs = [];
-
- // get date
- $older = $older === false ? null : date("m/d/Y", $older);
- $newer = $newer === false ? null : date("m/d/Y", $newer);
-
- if(
- $older !== null ||
- $newer !== null
- ){
-
- $tbs["cdr"] = "1";
- $tbs["cd_min"] = $newer;
- $tbs["cd_max"] = $older;
- }
-
- // relevance
- if($sort == "date"){
-
- $tbs["sbd"] = "1";
- }
-
- // append tbs
- if(count($tbs) !== 0){
-
- $params["tbs"] = "";
-
- foreach($tbs as $key => $value){
-
- $params["tbs"] .= $key . ":" . $value . ",";
- }
-
- $params["tbs"] = rtrim($params["tbs"], ",");
- }
-
- //$html = file_get_contents("scraper/google-news.html");
-
- $html =
- $this->get(
- $proxy,
- "https://www.google.com/search",
- $params
- );
- }
-
- $out = [
- "status" => "ok",
- "npt" => null,
- "news" => []
- ];
-
- $this->fuckhtml->load($html);
-
- $this->detect_sorry();
-
- // get images
- $this->scrape_dimg($html);
-
- // parse styles
- $this->parsestyles();
-
- $center_col =
- $this->fuckhtml
- ->getElementById(
- "center_col",
- "div"
- );
-
- if($center_col === null){
-
- throw new Exception("Could not grep result div");
- }
-
- $this->fuckhtml->load($center_col);
-
- // get next page
- $npt =
- $this->fuckhtml
- ->getElementById(
- "pnnext",
- "a"
- );
-
- if($npt !== false){
-
- $out["npt"] =
- $this->backend->store(
- $this->fuckhtml
- ->getTextContent(
- $npt["attributes"]
- ["href"]
- ),
- "news",
- $proxy
- );
- }
-
- $as =
- $this->fuckhtml
- ->getElementsByAttributeName(
- "jsname",
- "a"
- );
-
- foreach($as as $a){
-
- $this->fuckhtml->load($a);
-
- // get title
- $title =
- $this->fuckhtml
- ->getElementsByAttributeValue(
- "role",
- "heading",
- "div"
- );
-
- if(count($title) === 0){
-
- continue;
- }
-
- $title =
- $this->titledots(
- $this->fuckhtml
- ->getTextContent(
- $title[0]
- )
- );
-
- // get thumbnail
- $image =
- $this->fuckhtml
- ->getElementsByAttributeName(
- "id",
- "img"
- );
-
- // check for padded title node, if found, we're inside a carousel
- $probe =
- $this->fuckhtml
- ->getElementsByClassName(
- $this->getstyle(
- [
- "padding" => "16px 16px 40px 16px"
- ]
- ),
- "div"
- );
-
- if(count($probe) !== 0){
-
- $probe = true;
- }else{
-
- $probe = false;
- }
-
- if(
- count($image) !== 0 &&
- !isset($image[0]["attributes"]["width"])
- ){
-
- $thumb = [
- "url" =>
- $this->getdimg(
- $image[0]["attributes"]["id"]
- ),
- "ratio" => $probe === true ? "16:9" : "1:1"
- ];
- }else{
-
- $thumb = [
- "url" => null,
- "ratio" => null
- ];
- }
-
- $description = null;
-
- if($probe === false){
-
- $desc_divs =
- $this->fuckhtml
- ->getElementsByAttributeName(
- "style",
- "div"
- );
-
- foreach($desc_divs as $desc){
-
- if(
- strpos(
- $desc["attributes"]["style"],
- "margin-top:"
- ) !== false
- ){
-
- $description =
- $this->titledots(
- $this->fuckhtml
- ->getTextContent(
- $desc
- )
- );
- break;
- }
- }
- }
-
- // get author
- $author =
- $this->fuckhtml
- ->getElementsByClassName(
- $this->getstyle(
- [
- "overflow" => "hidden",
- "text-align" => "left",
- "text-overflow" => "ellipsis",
- "white-space" => "nowrap",
- "margin-bottom" => "8px"
- ]
- ),
- "div"
- );
-
- if(count($author) !== 0){
-
- $author =
- $this->fuckhtml
- ->getTextContent(
- $author[0]
- );
- }else{
-
- $author = null;
- }
-
- // get date
- $date = null;
-
- $date_div =
- $this->fuckhtml
- ->getElementsByAttributeName(
- "style",
- "div"
- );
-
- foreach($date_div as $d){
-
- $this->fuckhtml->load($d);
-
- $span =
- $this->fuckhtml
- ->getElementsByTagName(
- "span"
- );
-
- if(
- strpos(
- $d["attributes"]["style"],
- "bottom:"
- ) !== false
- ){
-
- $date =
- strtotime(
- $this->fuckhtml
- ->getTextContent(
- $span[count($span) - 1]
- )
- );
- break;
- }
- }
-
- $out["news"][] = [
- "title" => $title,
- "author" => $author,
- "description" => $description,
- "date" => $date,
- "thumb" => $thumb,
- "url" =>
- $this->unshiturl(
- $a["attributes"]
- ["href"]
- )
- ];
- }
-
- return $out;
- }
-
-
-
-
- public function image($get){
-
- // generate parameters
- if($get["npt"]){
-
- [$params, $proxy] =
- $this->backend->get(
- $get["npt"],
- "images"
- );
-
- $params = json_decode($params, true);
- }else{
-
- $search = $get["s"];
- if(strlen($search) === 0){
-
- throw new Exception("Search term is empty!");
- }
-
- $proxy = $this->backend->get_ip();
- $country = $get["country"];
- $nsfw = $get["nsfw"];
- $time = $get["time"];
- $size = $get["size"];
- $ratio = $get["ratio"];
- $color = $get["color"];
- $type = $get["type"];
- $format = $get["format"];
- $rights = $get["rights"];
-
- $params = [
- "q" => $search,
- "udm" => "2" // get images
- ];
-
- // country (image search uses cr instead of gl)
- if($country != "any"){
-
- $params["cr"] = "country" . strtoupper($country);
- }
-
- // nsfw
- $params["safe"] = $nsfw == "yes" ? "off" : "active";
-
- // generate tbs
- $tbs = [];
-
- // time
- if($time != "any"){
-
- $tbs["qdr"] = $time;
- }
-
- // size
- if($size != "any"){
-
- $params["imgsz"] = $size;
- }
-
- // ratio
- if($ratio != "any"){
-
- $params["imgar"] = $ratio;
- }
-
- // color
- if($color != "any"){
-
- if(
- $color == "color" ||
- $color == "trans"
- ){
-
- $params["imgc"] = $color;
- }elseif($color == "bnw"){
-
- $params["imgc"] = "gray";
- }else{
-
- $tbs["ic"] = "specific";
- $tbs["isc"] = $color;
- }
- }
-
- // type
- if($type != "any"){
-
- $tbs["itp"] = $type;
- }
-
- // format
- if($format != "any"){
-
- $params["as_filetype"] = $format;
- }
-
- // rights (tbs)
- if($rights != "any"){
-
- $tbs["sur"] = $rights;
- }
-
- // append tbs
- if(count($tbs) !== 0){
-
- $params["tbs"] = "";
-
- foreach($tbs as $key => $value){
-
- $params["tbs"] .= $key . ":" . $value . ",";
- }
-
- $params["tbs"] = rtrim($params["tbs"], ",");
- }
- }
- /*
- $handle = fopen("scraper/google-img.html", "r");
- $html = fread($handle, filesize("scraper/google-img.html"));
- fclose($handle);*/
-
- try{
- $html =
- $this->get(
- $proxy,
- "https://www.google.com/search",
- $params
- );
- }catch(Exception $error){
-
- throw new Exception("Failed to get search page");
- }
-
- $this->fuckhtml->load($html);
-
- $this->detect_sorry();
-
- // get javascript images
- $this->scrape_imagearr($html);
-
- $out = [
- "status" => "ok",
- "npt" => null,
- "image" => []
- ];
-
- $images =
- $this->fuckhtml
- ->getElementsByClassName(
- "ivg-i",
- "div"
- );
-
- foreach($images as $div){
-
- $this->fuckhtml->load($div);
-
- $image =
- $this->fuckhtml
- ->getElementsByTagName("img")[0];
-
- $out["image"][] = [
- "title" =>
- $this->titledots(
- $this->fuckhtml
- ->getTextContent(
- $image["attributes"]["alt"]
- )
- ),
- "source" =>
- $this->image_arr[
- $div["attributes"]["data-docid"]
- ],
- "url" =>
- $this->fuckhtml
- ->getTextContent(
- $div["attributes"]["data-lpage"]
- )
- ];
- }
-
- // as usual, no way to check if there is a next page reliably
- if(count($out["image"]) > 50){
-
- if(!isset($params["start"])){
-
- $params["start"] = 10;
- }else{
-
- $params["start"] += 10;
- }
-
- $out["npt"] =
- $this->backend
- ->store(
- json_encode($params),
- "image",
- $proxy
- );
- }
-
- return $out;
- }
-
- private function unshiturl($url, $return_size = false){
-
- // decode
- $url =
- $this->fuckhtml
- ->getTextContent($url);
-
- $url_parts = parse_url($url);
-
- if(
- !isset(
- $url_parts["host"]
- )
- ){
-
- // no host, we have a tracking url
- parse_str($url_parts["query"], $query);
-
- if(isset($query["imgurl"])){
-
- $url = $query["imgurl"];
- }
- elseif(isset($query["q"])){
-
- $url = $query["q"];
- }
- }
-
- // rewrite URLs to remove extra tracking parameters
- $domain = parse_url($url, PHP_URL_HOST);
-
- if(
- preg_match(
- '/wikipedia.org$/',
- $domain
- )
- ){
-
- // rewrite wikipedia mobile URLs to desktop
- $url =
- $this->replacedomain(
- $url,
- preg_replace(
- '/([a-z0-9]+)(\.m\.)/',
- '$1.',
- $domain
- )
- );
- }
-
- elseif(
- preg_match(
- '/imdb\.com$|youtube\.[^.]+$/',
- $domain
- )
- ){
-
- // rewrite imdb and youtube mobile URLs too
- $url =
- $this->replacedomain(
- $url,
- preg_replace(
- '/^m\./',
- "",
- $domain
- )
- );
-
- }
-
- elseif(
- preg_match(
- '/play\.google\.[^.]+$/',
- $domain
- )
- ){
-
- // remove referrers from play.google.com
- $oldquery = parse_url($url, PHP_URL_QUERY);
- if($oldquery !== null){
-
- parse_str($oldquery, $query);
- if(isset($query["referrer"])){ unset($query["referrer"]); }
- if(isset($query["hl"])){ unset($query["hl"]); }
- if(isset($query["gl"])){ unset($query["gl"]); }
-
- $query = http_build_query($query);
-
- $url =
- str_replace(
- $oldquery,
- $query,
- $url
- );
- }
- }
-
- elseif(
- preg_match(
- '/twitter\.com$/',
- $domain
- )
- ){
- // remove more referrers from twitter.com
- $oldquery = parse_url($url, PHP_URL_QUERY);
- if($oldquery !== null){
-
- parse_str($oldquery, $query);
- if(isset($query["ref_src"])){ unset($query["ref_src"]); }
-
- $query = http_build_query($query);
-
- $url =
- str_replace(
- $oldquery,
- $query,
- $url
- );
- }
- }
-
- elseif(
- preg_match(
- '/maps\.google\.[^.]+/',
- $domain
- )
- ){
-
- if(stripos($url, "maps?") !== false){
-
- //https://maps.google.com/maps?daddr=Johnny,+603+Rue+St+Georges,+Saint-J%C3%A9r%C3%B4me,+Quebec+J7Z+5B7
- $query = parse_url($url, PHP_URL_QUERY);
- if($query !== null){
-
- parse_str($query, $query);
-
- if(isset($query["daddr"])){
-
- $url =
- "https://maps.google.com/maps?daddr=" .
- urlencode($query["daddr"]);
- }
- }
- }
- }
-
- if($return_size){
-
- return [
- "url" => $url,
- "ref" => isset($query["imgrefurl"]) ? $query["imgrefurl"] : null,
- "thumb_width" => isset($query["tbnw"]) ? (int)$query["tbnw"] : null,
- "thumb_height" => isset($query["tbnh"]) ? (int)$query["tbnh"] : null,
- "image_width" => isset($query["w"]) ? (int)$query["w"] : null,
- "image_height" => isset($query["h"]) ? (int)$query["h"] : null
- ];
- }
-
- return $url;
- }
-
- private function replacedomain($url, $domain){
-
- return
- preg_replace(
- '/(https?:\/\/)([^\/]+)/',
- '$1' . $domain,
- $url
- );
- }
-
- private function titledots($title){
-
- return trim($title, " .\t\n\r\0\x0B…");
- }
-
- private function hms2int($time){
-
- $parts = explode(":", $time, 3);
- $time = 0;
-
- if(count($parts) === 3){
-
- // hours
- $time = $time + ((int)$parts[0] * 3600);
- array_shift($parts);
- }
-
- if(count($parts) === 2){
-
- // minutes
- $time = $time + ((int)$parts[0] * 60);
- array_shift($parts);
- }
-
- // seconds
- $time = $time + (int)$parts[0];
-
- return $time;
- }
-
- private function detect_sorry(){
-
- $recaptcha =
- $this->fuckhtml
- ->getElementById(
- "recaptcha",
- "div"
- );
-
- if($recaptcha !== false){
-
- throw new Exception("Google returned a captcha");
- }
- }
- }
|