123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062406340644065406640674068406940704071407240734074407540764077407840794080408140824083408440854086408740884089409040914092409340944095409640974098409941004101410241034104410541064107410841094110411141124113411441154116411741184119412041214122412341244125412641274128412941304131413241334134413541364137413841394140414141424143414441454146414741484149415041514152415341544155415641574158415941604161416241634164416541664167416841694170417141724173417441754176417741784179418041814182418341844185418641874188418941904191419241934194419541964197419841994200420142024203420442054206420742084209421042114212421342144215421642174218421942204221422242234224422542264227422842294230423142324233423442354236423742384239424042414242424342444245424642474248424942504251425242534254425542564257425842594260426142624263426442654266426742684269427042714272427342744275427642774278427942804281428242834284428542864287428842894290429142924293429442954296429742984299430043014302430343044305430643074308430943104311431243134314431543164317431843194320432143224323432443254326432743284329433043314332433343344335433643374338433943404341434243434344434543464347434843494350435143524353435443554356435743584359436043614362436343644365436643674368436943704371437243734374437543764377437843794380438143824383438443854386438743884389439043914392439343944395439643974398439944004401440244034404440544064407440844094410441144124413441444154416441744184419442044214422442344244425442644274428442944304431443244334434443544364437443844394440444144424443444444454446444744484449445044514452445344544455445644574458445944604461446244634464446544664467446844694470447144724473447444754476447744784479448044814482448344844485448644874488448944904491449244934494449544964497449844994500450145024503450445054506450745084509451045114512451345144515451645174518451945204521452245234524452545264527452845294530453145324533453445354536453745384539454045414542454345444545454645474548454945504551455245534554455545564557455845594560456145624563456445654566456745684569457045714572457345744575457645774578457945804581458245834584458545864587458845894590459145924593459445954596459745984599460046014602460346044605460646074608460946104611461246134614461546164617461846194620462146224623462446254626462746284629463046314632463346344635463646374638463946404641464246434644464546464647464846494650465146524653465446554656465746584659466046614662466346644665466646674668466946704671467246734674467546764677467846794680468146824683468446854686468746884689469046914692469346944695469646974698469947004701470247034704470547064707470847094710471147124713471447154716471747184719472047214722472347244725472647274728472947304731473247334734473547364737473847394740474147424743474447454746474747484749475047514752475347544755475647574758475947604761476247634764476547664767476847694770477147724773477447754776477747784779478047814782478347844785478647874788478947904791479247934794479547964797479847994800480148024803480448054806480748084809481048114812481348144815481648174818481948204821482248234824482548264827482848294830483148324833483448354836483748384839484048414842484348444845484648474848484948504851485248534854485548564857485848594860486148624863486448654866486748684869487048714872487348744875487648774878487948804881488248834884488548864887488848894890489148924893489448954896489748984899490049014902490349044905490649074908490949104911491249134914491549164917491849194920492149224923492449254926492749284929493049314932493349344935493649374938493949404941494249434944494549464947494849494950495149524953495449554956495749584959496049614962496349644965496649674968496949704971497249734974497549764977497849794980498149824983498449854986498749884989499049914992499349944995499649974998499950005001500250035004500550065007500850095010501150125013501450155016501750185019502050215022502350245025502650275028502950305031503250335034503550365037503850395040504150425043504450455046504750485049 |
- <?php
- // @TODO check for consent.google.com page, if need be
- class google{
-
- public function __construct(){
-
- include "lib/fuckhtml.php";
- $this->fuckhtml = new fuckhtml();
-
- include "lib/backend.php";
- $this->backend = new backend("google");
- }
-
- public function getfilters($page){
-
- $base = [
- "country" => [ // gl=<country> (image: cr=countryAF)
- "display" => "Country",
- "option" => [
- "any" => "Instance's country",
- "af" => "Afghanistan",
- "al" => "Albania",
- "dz" => "Algeria",
- "as" => "American Samoa",
- "ad" => "Andorra",
- "ao" => "Angola",
- "ai" => "Anguilla",
- "aq" => "Antarctica",
- "ag" => "Antigua and Barbuda",
- "ar" => "Argentina",
- "am" => "Armenia",
- "aw" => "Aruba",
- "au" => "Australia",
- "at" => "Austria",
- "az" => "Azerbaijan",
- "bs" => "Bahamas",
- "bh" => "Bahrain",
- "bd" => "Bangladesh",
- "bb" => "Barbados",
- "by" => "Belarus",
- "be" => "Belgium",
- "bz" => "Belize",
- "bj" => "Benin",
- "bm" => "Bermuda",
- "bt" => "Bhutan",
- "bo" => "Bolivia",
- "ba" => "Bosnia and Herzegovina",
- "bw" => "Botswana",
- "bv" => "Bouvet Island",
- "br" => "Brazil",
- "io" => "British Indian Ocean Territory",
- "bn" => "Brunei Darussalam",
- "bg" => "Bulgaria",
- "bf" => "Burkina Faso",
- "bi" => "Burundi",
- "kh" => "Cambodia",
- "cm" => "Cameroon",
- "ca" => "Canada",
- "cv" => "Cape Verde",
- "ky" => "Cayman Islands",
- "cf" => "Central African Republic",
- "td" => "Chad",
- "cl" => "Chile",
- "cn" => "China",
- "cx" => "Christmas Island",
- "cc" => "Cocos (Keeling) Islands",
- "co" => "Colombia",
- "km" => "Comoros",
- "cg" => "Congo",
- "cd" => "Congo, the Democratic Republic",
- "ck" => "Cook Islands",
- "cr" => "Costa Rica",
- "ci" => "Cote D'ivoire",
- "hr" => "Croatia",
- "cu" => "Cuba",
- "cy" => "Cyprus",
- "cz" => "Czech Republic",
- "dk" => "Denmark",
- "dj" => "Djibouti",
- "dm" => "Dominica",
- "do" => "Dominican Republic",
- "ec" => "Ecuador",
- "eg" => "Egypt",
- "sv" => "El Salvador",
- "gq" => "Equatorial Guinea",
- "er" => "Eritrea",
- "ee" => "Estonia",
- "et" => "Ethiopia",
- "fk" => "Falkland Islands (Malvinas)",
- "fo" => "Faroe Islands",
- "fj" => "Fiji",
- "fi" => "Finland",
- "fr" => "France",
- "gf" => "French Guiana",
- "pf" => "French Polynesia",
- "tf" => "French Southern Territories",
- "ga" => "Gabon",
- "gm" => "Gambia",
- "ge" => "Georgia",
- "de" => "Germany",
- "gh" => "Ghana",
- "gi" => "Gibraltar",
- "gr" => "Greece",
- "gl" => "Greenland",
- "gd" => "Grenada",
- "gp" => "Guadeloupe",
- "gu" => "Guam",
- "gt" => "Guatemala",
- "gn" => "Guinea",
- "gw" => "Guinea-Bissau",
- "gy" => "Guyana",
- "ht" => "Haiti",
- "hm" => "Heard Island and Mcdonald Islands",
- "va" => "Holy See (Vatican City State)",
- "hn" => "Honduras",
- "hk" => "Hong Kong",
- "hu" => "Hungary",
- "is" => "Iceland",
- "in" => "India",
- "id" => "Indonesia",
- "ir" => "Iran, Islamic Republic",
- "iq" => "Iraq",
- "ie" => "Ireland",
- "il" => "Israel",
- "it" => "Italy",
- "jm" => "Jamaica",
- "jp" => "Japan",
- "jo" => "Jordan",
- "kz" => "Kazakhstan",
- "ke" => "Kenya",
- "ki" => "Kiribati",
- "kp" => "Korea, Democratic People's Republic",
- "kr" => "Korea, Republic",
- "kw" => "Kuwait",
- "kg" => "Kyrgyzstan",
- "la" => "Lao People's Democratic Republic",
- "lv" => "Latvia",
- "lb" => "Lebanon",
- "ls" => "Lesotho",
- "lr" => "Liberia",
- "ly" => "Libyan Arab Jamahiriya",
- "li" => "Liechtenstein",
- "lt" => "Lithuania",
- "lu" => "Luxembourg",
- "mo" => "Macao",
- "mk" => "Macedonia, the Former Yugosalv Republic",
- "mg" => "Madagascar",
- "mw" => "Malawi",
- "my" => "Malaysia",
- "mv" => "Maldives",
- "ml" => "Mali",
- "mt" => "Malta",
- "mh" => "Marshall Islands",
- "mq" => "Martinique",
- "mr" => "Mauritania",
- "mu" => "Mauritius",
- "yt" => "Mayotte",
- "mx" => "Mexico",
- "fm" => "Micronesia, Federated States",
- "md" => "Moldova, Republic",
- "mc" => "Monaco",
- "mn" => "Mongolia",
- "ms" => "Montserrat",
- "ma" => "Morocco",
- "mz" => "Mozambique",
- "mm" => "Myanmar",
- "na" => "Namibia",
- "nr" => "Nauru",
- "np" => "Nepal",
- "nl" => "Netherlands",
- "an" => "Netherlands Antilles",
- "nc" => "New Caledonia",
- "nz" => "New Zealand",
- "ni" => "Nicaragua",
- "ne" => "Niger",
- "ng" => "Nigeria",
- "nu" => "Niue",
- "nf" => "Norfolk Island",
- "mp" => "Northern Mariana Islands",
- "no" => "Norway",
- "om" => "Oman",
- "pk" => "Pakistan",
- "pw" => "Palau",
- "ps" => "Palestinian Territory, Occupied",
- "pa" => "Panama",
- "pg" => "Papua New Guinea",
- "py" => "Paraguay",
- "pe" => "Peru",
- "ph" => "Philippines",
- "pn" => "Pitcairn",
- "pl" => "Poland",
- "pt" => "Portugal",
- "pr" => "Puerto Rico",
- "qa" => "Qatar",
- "re" => "Reunion",
- "ro" => "Romania",
- "ru" => "Russian Federation",
- "rw" => "Rwanda",
- "sh" => "Saint Helena",
- "kn" => "Saint Kitts and Nevis",
- "lc" => "Saint Lucia",
- "pm" => "Saint Pierre and Miquelon",
- "vc" => "Saint Vincent and the Grenadines",
- "ws" => "Samoa",
- "sm" => "San Marino",
- "st" => "Sao Tome and Principe",
- "sa" => "Saudi Arabia",
- "sn" => "Senegal",
- "cs" => "Serbia and Montenegro",
- "sc" => "Seychelles",
- "sl" => "Sierra Leone",
- "sg" => "Singapore",
- "sk" => "Slovakia",
- "si" => "Slovenia",
- "sb" => "Solomon Islands",
- "so" => "Somalia",
- "za" => "South Africa",
- "gs" => "South Georgia and the South Sandwich Islands",
- "es" => "Spain",
- "lk" => "Sri Lanka",
- "sd" => "Sudan",
- "sr" => "Suriname",
- "sj" => "Svalbard and Jan Mayen",
- "sz" => "Swaziland",
- "se" => "Sweden",
- "ch" => "Switzerland",
- "sy" => "Syrian Arab Republic",
- "tw" => "Taiwan, Province of China",
- "tj" => "Tajikistan",
- "tz" => "Tanzania, United Republic",
- "th" => "Thailand",
- "tl" => "Timor-Leste",
- "tg" => "Togo",
- "tk" => "Tokelau",
- "to" => "Tonga",
- "tt" => "Trinidad and Tobago",
- "tn" => "Tunisia",
- "tr" => "Turkey",
- "tm" => "Turkmenistan",
- "tc" => "Turks and Caicos Islands",
- "tv" => "Tuvalu",
- "ug" => "Uganda",
- "ua" => "Ukraine",
- "ae" => "United Arab Emirates",
- "uk" => "United Kingdom",
- "us" => "United States",
- "um" => "United States Minor Outlying Islands",
- "uy" => "Uruguay",
- "uz" => "Uzbekistan",
- "vu" => "Vanuatu",
- "ve" => "Venezuela",
- "vn" => "Viet Nam",
- "vg" => "Virgin Islands, British",
- "vi" => "Virgin Islands, U.S.",
- "wf" => "Wallis and Futuna",
- "eh" => "Western Sahara",
- "ye" => "Yemen",
- "zm" => "Zambia",
- "zw" => "Zimbabwe"
- ]
- ],
- "nsfw" => [
- "display" => "NSFW",
- "option" => [
- "yes" => "Yes", // safe=active
- "no" => "No" // safe=off
- ]
- ]
- ];
-
- switch($page){
-
- case "web":
- return array_merge(
- $base,
- [
- "lang" => [ // lr=<lang> (prefix lang with "lang_")
- "display" => "Language",
- "option" => [
- "any" => "Any language",
- "ar" => "Arabic",
- "bg" => "Bulgarian",
- "ca" => "Catalan",
- "cs" => "Czech",
- "da" => "Danish",
- "de" => "German",
- "el" => "Greek",
- "en" => "English",
- "es" => "Spanish",
- "et" => "Estonian",
- "fi" => "Finnish",
- "fr" => "French",
- "hr" => "Croatian",
- "hu" => "Hungarian",
- "id" => "Indonesian",
- "is" => "Icelandic",
- "it" => "Italian",
- "iw" => "Hebrew",
- "ja" => "Japanese",
- "ko" => "Korean",
- "lt" => "Lithuanian",
- "lv" => "Latvian",
- "nl" => "Dutch",
- "no" => "Norwegian",
- "pl" => "Polish",
- "pt" => "Portuguese",
- "ro" => "Romanian",
- "ru" => "Russian",
- "sk" => "Slovak",
- "sl" => "Slovenian",
- "sr" => "Serbian",
- "sv" => "Swedish",
- "tr" => "Turkish",
- "zh-CN" => "Chinese (Simplified)",
- "zh-TW" => "Chinese (Traditional)"
- ]
- ],
- "newer" => [ // tbs
- "display" => "Newer than",
- "option" => "_DATE"
- ],
- "older" => [
- "display" => "Older than",
- "option" => "_DATE"
- ],
- "spellcheck" => [
- "display" => "Spellcheck",
- "option" => [
- "yes" => "Yes",
- "no" => "No"
- ]
- ]
- ]
- );
- break;
-
- case "images":
- return array_merge(
- $base,
- [
- "time" => [ // tbs=qdr:<time>
- "display" => "Time posted",
- "option" => [
- "any" => "Any time",
- "d" => "Past 24 hours",
- "w" => "Past week",
- "m" => "Past month",
- "y" => "Past year"
- ]
- ],
- "size" => [ // imgsz
- "display" => "Size",
- "option" => [
- "any" => "Any size",
- "l" => "Large",
- "m" => "Medium",
- "i" => "Icon",
- "qsvga" => "Larger than 400x300",
- "vga" => "Larger than 640x480",
- "svga" => "Larger than 800x600",
- "xga" => "Larger than 1024x768",
- "2mp" => "Larger than 2MP",
- "4mp" => "Larger than 4MP",
- "6mp" => "Larger than 6MP",
- "8mp" => "Larger than 8MP",
- "10mp" => "Larger than 10MP",
- "12mp" => "Larger than 12MP",
- "15mp" => "Larger than 15MP",
- "20mp" => "Larger than 20MP",
- "40mp" => "Larger than 40MP",
- "70mp" => "Larger than 70MP"
- ]
- ],
- "ratio" => [ // imgar
- "display" => "Aspect ratio",
- "option" => [
- "any" => "Any ratio",
- "t|xt" => "Tall",
- "s" => "Square",
- "w" => "Wide",
- "xw" => "Panoramic"
- ]
- ],
- "color" => [ // imgc
- "display" => "Color",
- "option" => [
- "any" => "Any color",
- "color" => "Full color",
- "bnw" => "Black & white",
- "trans" => "Transparent",
- // from here, imgcolor
- "red" => "Red",
- "orange" => "Orange",
- "yellow" => "Yellow",
- "green" => "Green",
- "teal" => "Teal",
- "blue" => "Blue",
- "purple" => "Purple",
- "pink" => "Pink",
- "white" => "White",
- "gray" => "Gray",
- "black" => "Black",
- "brown" => "Brown"
- ]
- ],
- "type" => [ // tbs=itp:<type>
- "display" => "Type",
- "option" => [
- "any" => "Any type",
- "clipart" => "Clip Art",
- "lineart" => "Line Drawing",
- "animated" => "Animated"
- ]
- ],
- "format" => [ // as_filetype
- "display" => "Format",
- "option" => [
- "any" => "Any format",
- "jpg" => "JPG",
- "gif" => "GIF",
- "png" => "PNG",
- "bmp" => "BMP",
- "svg" => "SVG",
- "webp" => "WEBP",
- "ico" => "ICO",
- "craw" => "RAW"
- ]
- ],
- "rights" => [ // tbs=sur:<rights>
- "display" => "Usage rights",
- "option" => [
- "any" => "Any license",
- "cl" => "Creative Commons licenses",
- "ol" => "Commercial & other licenses"
- ]
- ]
- ]
- );
- break;
-
- case "videos":
- return array_merge(
- $base,
- [
- "newer" => [ // tbs
- "display" => "Newer than",
- "option" => "_DATE"
- ],
- "older" => [
- "display" => "Older than",
- "option" => "_DATE"
- ],
- "duration" => [
- "display" => "Duration",
- "option" => [
- "any" => "Any duration",
- "s" => "Short (0-4min)", // tbs=dur:s
- "m" => "Medium (4-20min)", // tbs=dur:m
- "l" => "Long (20+ min)" // tbs=dur:l
- ]
- ],
- "quality" => [
- "display" => "Quality",
- "option" => [
- "any" => "Any quality",
- "h" => "High quality" // tbs=hq:h
- ]
- ],
- "captions" => [
- "display" => "Captions",
- "option" => [
- "any" => "No preference",
- "yes" => "Closed captioned" // tbs=cc:1
- ]
- ]
- ]
- );
- break;
-
- case "news":
- return array_merge(
- $base,
- [
- "newer" => [ // tbs
- "display" => "Newer than",
- "option" => "_DATE"
- ],
- "older" => [
- "display" => "Older than",
- "option" => "_DATE"
- ],
- "sort" => [
- "display" => "Sort",
- "option" => [
- "relevance" => "Relevance",
- "date" => "Date" // sbd:1
- ]
- ]
- ]
- );
- break;
- }
- }
-
- private function get($proxy, $url, $get = []){
-
- $headers = [
- "User-Agent: " . config::USER_AGENT,
- "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
- "Accept-Language: en-US,en;q=0.5",
- "Accept-Encoding: gzip",
- "DNT: 1",
- //"Cookie: SOCS=CAESNQgCEitib3FfaWRlbnRpdHlmcm9udGVuZHVpc2VydmVyXzIwMjQwMzE3LjA4X3AwGgJlbiAEGgYIgM7orwY",
- "Connection: keep-alive",
- "Upgrade-Insecure-Requests: 1",
- "Sec-Fetch-Dest: document",
- "Sec-Fetch-Mode: navigate",
- "Sec-Fetch-Site: none",
- "Sec-Fetch-User: ?1",
- "Priority: u=1",
- "TE: trailers"
- ];
-
- $curlproc = curl_init();
-
- if($get !== []){
- $get = http_build_query($get);
- $url .= "?" . $get;
- }
-
- curl_setopt($curlproc, CURLOPT_URL, $url);
-
- curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
- curl_setopt($curlproc, CURLOPT_HTTPHEADER, $headers);
-
- // use http2
- curl_setopt($curlproc, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_2_0);
-
- curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
- curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
- curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
- curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
- curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
-
- // follow redirects
- curl_setopt($curlproc, CURLOPT_FOLLOWLOCATION, true);
- $this->backend->assign_proxy($curlproc, $proxy);
-
- $data = curl_exec($curlproc);
-
- if(curl_errno($curlproc)){
-
- throw new Exception(curl_error($curlproc));
- }
-
- curl_close($curlproc);
- return $data;
- }
-
-
-
-
- private function parsepage($html, $pagetype, $search, $proxy, $params){
-
- $out = [
- "status" => "ok",
- "spelling" => [
- "type" => "no_correction",
- "using" => null,
- "correction" => null
- ],
- "npt" => null,
- "answer" => [],
- "web" => [],
- "image" => [],
- "video" => [],
- "news" => [],
- "related" => []
- ];
-
- $this->fuckhtml->load($html);
-
- $this->detect_sorry();
-
- // parse all <style> tags
- $this->parsestyles();
-
- // get javascript images
- $this->scrape_dimg($html);
-
- // get html blobs
- preg_match_all(
- '/function\(\){window\.jsl\.dh\(\'([^\']+?)\',\'(.+?[^\'])\'\);/',
- $html,
- $blobs
- );
-
- $this->blobs = [];
- if(isset($blobs[1])){
-
- for($i=0; $i<count($blobs[1]); $i++){
-
- $this->blobs[$blobs[1][$i]] =
- $this->fuckhtml
- ->parseJsString(
- $blobs[2][$i]
- );
- }
- }
-
- $this->scrape_imagearr($html);
-
- //
- // load result column
- //
- $result_div =
- $this->fuckhtml
- ->getElementById(
- "center_col",
- "div"
- );
-
- if($result_div === false){
-
- throw new Exception("Failed to grep result div");
- }
-
- $this->fuckhtml->load($result_div);
-
- //
- // Get word corrections
- //
- $correction =
- $this->fuckhtml
- ->getElementById(
- "fprs",
- "p"
- );
-
- if($correction){
-
- $this->fuckhtml->load($correction);
-
- $a =
- $this->fuckhtml
- ->getElementsByTagName(
- "a"
- );
-
- $using =
- $this->fuckhtml
- ->getElementById(
- "fprsl",
- $a
- );
-
- if($using){
-
- $using =
- $this->fuckhtml
- ->getTextContent(
- $using
- );
-
- $spans =
- $this->fuckhtml
- ->getElementsByTagName(
- "span"
- );
-
- $type_span =
- $this->fuckhtml
- ->getTextContent(
- $spans[0]
- );
-
- $type = "not_many";
-
- if(
- stripos(
- $type_span,
- "Showing results for"
- ) !== false
- ){
-
- $type = "including";
- }
-
- $correction =
- $this->fuckhtml
- ->getTextContent(
- $a[count($a) - 1]
- );
-
- $out["spelling"] = [
- "type" => $type,
- "using" => $using,
- "correction" => $correction
- ];
- }
-
- // reset
- $this->fuckhtml->load($result_div);
- }else{
-
- // get the "Did you mean?" prompt
- $taw =
- $this->fuckhtml
- ->getElementById(
- "taw"
- );
-
- if($taw){
-
- $this->fuckhtml->load($taw);
-
- $as =
- $this->fuckhtml
- ->getElementsByTagName(
- "a"
- );
-
- if(count($as) !== 0){
-
- $text =
- $this->fuckhtml
- ->getTextContent(
- $as[0]
- );
-
- // @TODO implement did_you_mean
- $out["spelling"] = [
- "type" => "including",
- "using" => $search,
- "correction" => $text
- ];
- }
- }
-
- $this->fuckhtml->load($result_div);
- }
-
- //
- // get notices
- //
- $botstuff =
- $this->fuckhtml
- ->getElementById(
- "botstuff"
- );
-
- // important for later
- $last_page = false;
-
- if($botstuff){
-
- $this->fuckhtml->load($botstuff);
-
- $cards =
- $this->fuckhtml
- ->getElementsByClassName(
- $this->getstyle(
- [
- "line-height" => "normal"
- ]
- ),
- "div"
- );
-
- foreach($cards as $card){
-
- $this->fuckhtml->load($card);
-
- $h2 =
- $this->fuckhtml
- ->getElementsByTagName(
- "h2"
- );
-
- if(count($h2) !== 0){
-
- $title =
- $this->fuckhtml
- ->getTextContent(
- $h2[0]
- );
-
- $card["innerHTML"] =
- str_replace(
- $h2[0]["outerHTML"],
- "",
- $card["innerHTML"]
- );
- }else{
-
- $title = "Notice";
- }
-
- $div =
- $this->fuckhtml
- ->getElementsByTagName(
- "div"
- );
-
- // probe for related searches div, if found, ignore it cause its shit
- $probe =
- $this->fuckhtml
- ->getElementsByAttributeValue(
- "role",
- "list",
- $div
- );
-
- // also probe for children
- if(count($probe) === 0){
-
- $probe =
- $this->fuckhtml
- ->getElementsByClassName(
- $this->getstyle(
- [
- "flex-shrink" => "0",
- "-moz-box-flex" => "0",
- "flex-grow" => "0",
- "overflow" => "hidden"
- ]
- ),
- $div
- );
- }
-
- if(count($probe) === 0){
-
- $description = [];
-
- $as =
- $this->fuckhtml
- ->getElementsByTagName(
- "a"
- );
-
- if(count($as) !== 0){
-
- $first = true;
-
- foreach($as as $a){
-
- $text_link =
- $this->fuckhtml
- ->getTextContent(
- $a
- );
-
- if(stripos($text_link, "repeat the search") !== false){
-
- $last_page = true;
- break 2;
- }
-
- $parts =
- explode(
- $a["outerHTML"],
- $card["innerHTML"],
- 2
- );
-
- $card["innerHTML"] = $parts[1];
-
- $value =
- preg_replace(
- '/ +/',
- " ",
- $this->fuckhtml
- ->getTextContent(
- $parts[0],
- false,
- false
- )
- );
-
- if(strlen(trim($value)) !== 0){
-
- $description[] = [
- "type" => "text",
- "value" => $value
- ];
-
- if($first){
-
- $description[0]["value"] =
- ltrim($description[0]["value"]);
- }
- }
-
- $first = false;
-
- $description[] = [
- "type" => "link",
- "url" =>
- $this->fuckhtml
- ->getTextContent(
- $a["attributes"]
- ["href"]
- ),
- "value" => $text_link
- ];
- }
-
- $text =
- $this->fuckhtml
- ->getTextContent(
- $card["innerHTML"],
- false,
- false
- );
-
- if(strlen(trim($text)) !== 0){
-
- $description[] = [
- "type" => "text",
- "value" =>
- rtrim(
- $text
- )
- ];
- }
- }
-
- if(count($description) !== 0){
-
- $out["answer"][] = [
- "title" => $title,
- "description" => $description,
- "url" => null,
- "thumb" => null,
- "table" => [],
- "sublink" => []
- ];
- }
- }
- }
-
- // reset
- $this->fuckhtml->load($html);
- }
-
- //
- // get "Related Searches" and "People also search for"
- //
- $relateds =
- $this->fuckhtml
- ->getElementsByClassName(
- "wyccme",
- "div"
- );
-
- foreach($relateds as $related){
-
- $text =
- $this->fuckhtml
- ->getTextContent(
- $related
- );
-
- if($text == "More results"){ continue; }
-
- $out["related"][] = $text;
- }
-
- //
- // Get text results
- //
- $results =
- $this->fuckhtml
- ->getElementsByClassName(
- "g",
- "div"
- );
-
- $this->skip_next = false;
-
- foreach($results as $result){
-
- if($this->skip_next){
-
- $this->skip_next = false;
- continue;
- }
-
- $this->fuckhtml->load($result);
-
- $web = [
- "title" => null,
- "description" => null,
- "url" => null,
- "date" => null,
- "type" => "web",
- "thumb" => [
- "url" => null,
- "ratio" => null
- ],
- "sublink" => [],
- "table" => []
- ];
-
- // Detect presence of sublinks
- $g =
- $this->fuckhtml
- ->getElementsByClassName(
- "g",
- "div"
- );
-
- $sublinks = [];
- if(count($g) > 0){
-
- $table =
- $this->fuckhtml
- ->getElementsByTagName(
- "table"
- );
-
- if(count($table) !== 0){
-
- // found some sublinks!
-
- $this->fuckhtml->load($table[0]);
-
- $tds =
- $this->fuckhtml
- ->getElementsByTagName(
- "td"
- );
-
- foreach($tds as $td){
-
- $this->fuckhtml->load($td);
-
- $a =
- $this->fuckhtml
- ->getElementsByTagName(
- "a"
- );
-
- if(
- count($a) === 0 ||
- (
- isset($a[0]["attributes"]["class"]) &&
- $a[0]["attributes"]["class"] == "fl"
- )
- ){
-
- continue;
- }
-
- $td["innerHTML"] =
- str_replace(
- $a[0]["outerHTML"],
- "",
- $td["innerHTML"]
- );
-
- $web["sublink"][] = [
- "title" =>
- $this->titledots(
- $this->fuckhtml
- ->getTextContent(
- $a[0]
- )
- ),
- "description" =>
- html_entity_decode(
- $this->titledots(
- $this->fuckhtml
- ->getTextContent(
- $td
- )
- )
- ),
- "url" =>
- $this->unshiturl(
- $a[0]
- ["attributes"]
- ["href"]
- ),
- "date" => null
- ];
- }
-
- // reset
- $this->fuckhtml->load($result);
- }
-
- // skip on next iteration
- $this->skip_next = true;
- }
-
- // get title
- $h3 =
- $this->fuckhtml
- ->getElementsByTagName(
- "h3"
- );
-
- if(count($h3) === 0){
-
- continue;
- }
-
- $web["title"] =
- $this->titledots(
- $this->fuckhtml
- ->getTextContent(
- $h3[0]
- )
- );
-
- // get url
- $as =
- $this->fuckhtml
- ->getElementsByTagName(
- "a"
- );
-
- $web["url"] =
- $this->unshiturl(
- $as[0]
- ["attributes"]
- ["href"]
- );
-
- if(
- !preg_match(
- '/^http/',
- $web["url"]
- )
- ){
-
- // skip if invalid url is found
- continue;
- }
-
- //
- // probe for twitter carousel
- //
- $carousel =
- $this->fuckhtml
- ->getElementsByTagName(
- "g-scrolling-carousel"
- );
-
- if(count($carousel) !== 0){
-
- $this->fuckhtml->load($carousel[0]);
-
- $items =
- $this->fuckhtml
- ->getElementsByTagName(
- "g-inner-card"
- );
-
- $has_thumbnail = false;
-
- foreach($items as $item){
-
- $this->fuckhtml->load($item);
-
- if($has_thumbnail === false){
-
- // get thumbnail
- $thumb =
- $this->fuckhtml
- ->getElementsByTagName(
- "img"
- );
-
- if(
- count($thumb) !== 0 &&
- isset($thumb[0]["attributes"]["id"])
- ){
-
- $web["thumb"] = [
- "url" =>
- $this->getdimg(
- $thumb[0]["attributes"]["id"]
- ),
- "ratio" => "16:9"
- ];
-
- $has_thumbnail = true;
- }
-
- // or else, try getting a thumbnail from next container
- }
-
- // cache div
- $div =
- $this->fuckhtml
- ->getElementsByTagName(
- "div"
- );
-
- // get link
- $links =
- $this->fuckhtml
- ->getElementsByTagName(
- "a"
- );
-
- // get description of carousel sublink
- $description =
- $this->fuckhtml
- ->getElementsByAttributeValue(
- "role",
- "heading",
- $div
- );
-
- if(count($description) !== 0){
-
- $description =
- $this->titledots(
- $this->fuckhtml
- ->getTextContent(
- $description[0]
- )
- );
- }else{
-
- $description = null;
- }
-
- $bottom =
- $this->fuckhtml
- ->getElementsByAttributeValue(
- "style",
- "z-index:2",
- $div
- );
-
- $title = null;
- $date = null;
- if(count($bottom) !== 0){
-
- $this->fuckhtml->load($bottom[0]);
-
- $spans =
- $this->fuckhtml
- ->getElementsByTagName(
- "span"
- );
-
- $title =
- $this->fuckhtml
- ->getTextContent(
- $spans[0]
- );
-
- $date =
- strtotime(
- $this->fuckhtml
- ->getTextContent(
- $spans[count($spans) - 1]
- )
- );
- }
-
- $web["sublink"][] = [
- "title" => $title,
- "description" => $description,
- "url" =>
- $this->unshiturl(
- $links[0]
- ["attributes"]
- ["href"]
- ),
- "date" => $date
- ];
- }
-
- $out["web"][] = $web;
- continue;
- }
-
- //
- // get viewcount, time posted and follower count from <cite> tag
- //
- $cite =
- $this->fuckhtml
- ->getElementsByTagName(
- "cite"
- );
-
- if(count($cite) !== 0){
-
- $this->fuckhtml->load($cite[0]);
-
- $spans =
- $this->fuckhtml
- ->getElementsByTagName("span");
-
- if(count($spans) === 0){
-
- $cites =
- explode(
- "·",
- $this->fuckhtml
- ->getTextContent(
- $cite[0]
- )
- );
-
- foreach($cites as $cite){
-
- $cite = trim($cite);
-
- if(
- preg_match(
- '/(.+) (views|followers|likes)$/',
- $cite,
- $match
- )
- ){
-
- $web["table"][ucfirst($match[2])] =
- $match[1];
- }elseif(
- preg_match(
- '/ago$/',
- $cite
- )
- ){
-
- $web["date"] =
- strtotime($cite);
- }
- }
- }
-
- // reset
- $this->fuckhtml->load($result);
- }
-
- //
- // attempt to fetch description cleanly
- //
- $description =
- $this->fuckhtml
- ->getElementsByAttributeValue(
- "style",
- "-webkit-line-clamp:2"
- );
-
- if(count($description) !== 0){
-
- $web["description"] =
- $this->titledots(
- $this->fuckhtml
- ->getTextContent(
- $description[0]
- )
- );
- }else{
-
- // use ANOTHER method where the description is a header of the result
- $description =
- $this->fuckhtml
- ->getElementsByAttributeValue(
- "data-attrid",
- "wa:/description"
- );
-
- if(count($description) !== 0){
-
- // get date off that shit
- $date =
- $this->fuckhtml
- ->getElementsByClassName(
- $this->getstyle(
- [
- "font-size" => "12px",
- "line-height" => "1.34",
- "display" => "inline-block",
- "font-family" => "google sans,arial,sans-serif",
- "padding-right" => "0",
- "white-space" => "nowrap"
- ]
- ),
- "span"
- );
-
- if(count($date) !== 0){
-
- $description[0]["innerHTML"] =
- str_replace(
- $date[0]["outerHTML"],
- "",
- $description[0]["innerHTML"]
- );
-
- $web["date"] =
- strtotime(
- $this->fuckhtml
- ->getTextContent(
- $date[0]
- )
- );
- }
-
- $web["description"] =
- $this->fuckhtml
- ->getTextContent(
- $description[0]
- );
- }else{
-
- // Yes.. You guessed it, use ANOTHER method to get descriptions
- // off youtube containers
- $description =
- $this->fuckhtml
- ->getElementsByClassName(
- $this->getstyle(
- [
- "-webkit-box-orient" => "vertical",
- "display" => "-webkit-box",
- "font-size" => "14px",
- "-webkit-line-clamp" => "2",
- "line-height" => "22px",
- "overflow" => "hidden",
- "word-break" => "break-word",
- "color" => "#4d5156"
- ]
- ),
- "div"
- );
-
- if(count($description) !== 0){
-
- // check for video duration
- $duration =
- $this->fuckhtml
- ->getElementsByClassName(
- $this->getstyle(
- [
- "background-color" => "rgba(0,0,0,0.6)",
- "color" => "#fff",
- "fill" => "#fff"
- ]
- ),
- "div"
- );
-
- if(count($duration) !== 0){
-
- $web["table"]["Duration"] =
- $this->fuckhtml
- ->getTextContent(
- $duration[0]
- );
- }
-
- $web["description"] =
- $this->titledots(
- html_entity_decode(
- $this->fuckhtml
- ->getTextContent(
- $description[0]
- )
- )
- );
-
- // get author + time posted
- $info =
- $this->fuckhtml
- ->getElementsByClassName(
- $this->getstyle(
- [
- "color" => "var(" . $this->getcolorvar("#70757a") . ")",
- "font-size" => "14px",
- "line-height" => "20px",
- "margin-top" => "12px"
- ]
- ),
- "div"
- );
-
- if(count($info) !== 0){
-
- $info =
- explode(
- "·",
- $this->fuckhtml
- ->getTextContent(
- $info[0]
- )
- );
-
- switch(count($info)){
-
- case 3:
- $web["table"]["Author"] = trim($info[1]);
- $web["date"] = strtotime(trim($info[2]));
- break;
-
- case 2:
- $web["date"] = strtotime(trim($info[1]));
- break;
- }
- }
- }
- }
- }
-
- //
- // get categories of content within the search result
- //
- $cats =
- $this->fuckhtml
- ->getElementsByAttributeName(
- "data-sncf",
- "div"
- );
-
- foreach($cats as $cat){
-
- $this->fuckhtml->load($cat);
-
- // detect image category
- $images =
- $this->fuckhtml
- ->getElementsByTagName(
- "img"
- );
-
- if(count($images) !== 0){
-
- foreach($images as $image){
-
- if(isset($image["attributes"]["id"])){
- // we found an image
-
- if(isset($image["attributes"]["width"])){
-
- $width = (int)$image["attributes"]["width"];
-
- if($width == 110){
-
- $ratio = "1:1";
- }elseif($width > 110){
-
- $ratio = "16:9";
- }else{
-
- $ratio = "9:16";
- }
- }else{
-
- $ratio = "1:1";
- }
-
- $web["thumb"] = [
- "url" => $this->getdimg($image["attributes"]["id"]),
- "ratio" => $ratio
- ];
-
- continue 2;
- }
- }
- }
-
- // Detect rating
- $spans_unfiltered =
- $this->fuckhtml
- ->getElementsByTagName(
- "span"
- );
-
- $spans =
- $this->fuckhtml
- ->getElementsByAttributeName(
- "aria-label",
- $spans_unfiltered
- );
-
- foreach($spans as $span){
-
- if(
- preg_match(
- '/^Rated/',
- $span["attributes"]["aria-label"]
- )
- ){
-
- // found rating
- // scrape rating
- preg_match(
- '/([0-9.]+).*([0-9.]+)/',
- $span["attributes"]["aria-label"],
- $rating
- );
-
- if(isset($rating[1])){
-
- $web["table"]["Rating"] =
- $rating[1] . "/" . $rating[2];
- }
-
- $has_seen_reviews = 0;
- foreach($spans_unfiltered as $span_unfiltered){
-
- if(
- preg_match(
- '/([0-9,.]+) +([A-z]+)$/',
- $this->fuckhtml
- ->getTextContent(
- $span_unfiltered
- ),
- $votes
- )
- ){
-
- $has_seen_reviews++;
- $web["table"][ucfirst($votes[2])] = $votes[1];
- continue;
- }
-
- $text =
- $this->fuckhtml
- ->getTextContent(
- $span_unfiltered
- );
-
- if(
- $text == " " ||
- $text == ""
- ){
-
- break;
- }
-
- switch($has_seen_reviews){
-
- case 1:
- // scrape price
- $web["table"]["Price"] = $text;
- $has_seen_reviews++;
- break;
-
- case 2:
- // scrape platform
- $web["table"]["Platform"] = $text;
- $has_seen_reviews++;
- break;
-
- case 3:
- // Scrape type
- $web["table"]["Medium"] = $text;
- break;
- }
- }
-
- continue 2;
- }
- }
-
- // check if its a table of small sublinks
- $table =
- $this->fuckhtml
- ->getElementsByClassName(
- $this->getstyle(
- [
- "display" => "table",
- "white-space" => "nowrap",
- "margin" => "5px 0",
- "line-height" => "1.58",
- "color" => "var(" . $this->getcolorvar("#70757a") . ")"
- ]
- ),
- "div"
- );
-
- if(count($table) !== 0){
-
- $this->fuckhtml->load($table[0]);
-
- $rows =
- $this->fuckhtml
- ->getElementsByClassName(
- $this->getstyle(
- [
- "display" => "flex",
- "white-space" => "normal"
- ]
- ),
- "div"
- );
-
- foreach($rows as $row){
-
- $this->fuckhtml->load($row);
-
- $sublink = [
- "title" => null,
- "description" => null,
- "url" => null,
- "date" => null
- ];
-
- $link =
- $this->fuckhtml
- ->getElementsByTagName(
- "a"
- )[0];
-
- $sublink["title"] =
- $this->titledots(
- $this->fuckhtml
- ->getTextContent(
- $link
- )
- );
-
- $sublink["url"] =
- $this->unshiturl(
- $link
- ["attributes"]
- ["href"]
- );
-
- $row["innerHTML"] =
- str_replace(
- $link["outerHTML"],
- "",
- $row["innerHTML"]
- );
-
- $this->fuckhtml->load($row);
-
- $spans =
- $this->fuckhtml
- ->getElementsByTagName(
- "span"
- );
-
- foreach($spans as $span){
-
- $text =
- $this->fuckhtml
- ->getTextContent(
- $span
- );
-
- if(
- preg_match(
- '/answers?$/',
- $text
- )
- ){
-
- $sublink["description"] =
- $text;
-
- continue;
- }
-
- $time = strtotime($text);
-
- if($time !== false){
-
- $sublink["date"] = $time;
- }
- }
-
- $web["sublink"][] = $sublink;
- }
-
- // reset
- $this->fuckhtml->load($cat);
- continue;
- }
-
- // check if its an answer header
- $answer_header =
- $this->fuckhtml
- ->getElementsByClassName(
- $this->getstyle(
- [
- "overflow" => "hidden",
- "text-overflow" => "ellipsis"
- ]
- ),
- "span"
- );
-
- if(count($answer_header) !== 0){
-
- $link =
- $this->fuckhtml
- ->getElementsByTagName(
- "a"
- );
-
- $cat["innerHTML"] =
- str_replace(
- $link[0]["outerHTML"],
- "",
- $cat["innerHTML"]
- );
-
- $web["sublink"][] = [
- "title" =>
- $this->fuckhtml
- ->getTextContent(
- $link[0]
- ),
- "description" =>
- $this->titledots(
- trim(
- str_replace(
- "\xc2\xa0",
- " ",
- html_entity_decode(
- $this->fuckhtml
- ->getTextContent(
- $cat
- )
- )
- ),
- " ·"
- )
- ),
- "url" =>
- $this->fuckhtml
- ->getTextContent(
- $link[0]
- ["attributes"]
- ["href"]
- ),
- "date" => null
- ];
-
- continue;
- }
-
- // check if its list of small sublinks
- $urls =
- $this->fuckhtml
- ->getElementsByTagName(
- "a"
- );
-
- if(count($urls) !== 0){
-
- // found small links
- foreach($urls as $url){
-
- $target =
- $this->fuckhtml
- ->getTextContent(
- $url
- ["attributes"]
- ["href"]
- );
-
- if(
- !preg_match(
- '/^http/',
- $target
- )
- ){
-
- continue;
- }
-
- $web["sublink"][] = [
- "title" =>
- $this->titledots(
- $this->fuckhtml
- ->getTextContent(
- $url
- )
- ),
- "description" => null,
- "url" => $target,
- "date" => null
- ];
- }
-
- continue;
- }
-
- // we probed everything, assume this is the description
- // if we didn't find one cleanly previously
- if($web["description"] === null){
- $web["description"] =
- $this->titledots(
- $this->fuckhtml
- ->getTextContent(
- $cat
- )
- );
- }
- }
-
- // check if description contains date
- $description = explode("—", $web["description"], 2);
-
- if(
- count($description) === 2 &&
- strlen($description[0]) <= 20
- ){
-
- $date = strtotime($description[0]);
-
- if($date !== false){
-
- $web["date"] = $date;
- $web["description"] = ltrim($description[1]);
- }
- }
-
- // fetch youtube thumbnail
- $thumbnail =
- $this->fuckhtml
- ->getElementsByClassName(
- $this->getstyle(
- [
- "border-radius" => "8px",
- "height" => "fit-content",
- "justify-content" => "center",
- "margin-right" => "20px",
- "margin-top" => "4px",
- "position" => "relative",
- "width" => "fit-content"
- ]
- ),
- "div"
- );
-
- if(count($thumbnail) !== 0){
-
- // load thumbnail container
- $this->fuckhtml->load($thumbnail[0]);
-
- $image =
- $this->fuckhtml
- ->getElementsByTagName(
- "img"
- );
-
- if(
- count($image) !== 0 &&
- isset($image[0]["attributes"]["id"])
- ){
-
- $web["thumb"] = [
- "url" =>
- $this->unshit_thumb(
- $this->getdimg(
- $image[0]["attributes"]["id"]
- )
- ),
- "ratio" => "16:9"
- ];
- }
-
- // reset
- $this->fuckhtml->load($result);
- }
-
- $out["web"][] = $web;
- }
-
- // reset
- $this->fuckhtml->load($result_div);
-
- //
- // Get instant answers
- //
- $answer_containers =
- $this->fuckhtml
- ->getElementsByClassName(
- $this->getstyle(
- [
- "padding-left" => "0px",
- "padding-right" => "0px"
- ]
- ),
- "div"
- );
-
- $date_class =
- $this->getstyle(
- [
- "font-size" => "12px",
- "line-height" => "1.34",
- "display" => "inline-block",
- "font-family" => "google sans,arial,sans-serif",
- "padding-right" => "0",
- "white-space" => "nowrap"
- ]
- );
-
- foreach($answer_containers as $container){
-
- $this->fuckhtml->load($container);
-
- $web = [
- "title" => null,
- "description" => null,
- "url" => null,
- "date" => null,
- "type" => "web",
- "thumb" => [
- "url" => null,
- "ratio" => null
- ],
- "sublink" => [],
- "table" => []
- ];
-
- $answers =
- $this->fuckhtml
- ->getElementsByAttributeName(
- "aria-controls",
- "div"
- );
-
- $item_insert_pos = 1;
- foreach($answers as $answer){
-
- $out["related"][] =
- $this->fuckhtml
- ->getTextContent(
- $answer
- );
-
- if(
- isset(
- $this->blobs[
- $answer
- ["attributes"]
- ["aria-controls"]
- ]
- )
- ){
-
- $this->fuckhtml->load(
- $this->blobs[
- $answer
- ["attributes"]
- ["aria-controls"]
- ]
- );
-
- $divs =
- $this->fuckhtml
- ->getElementsByAttributeName(
- "id",
- "div"
- );
-
- foreach($divs as $div){
-
- if(
- !isset(
- $this->blobs[
- $div
- ["attributes"]
- ["id"]
- ]
- )
- ){
-
- continue;
- }
-
- $this->fuckhtml->load(
- $this->blobs[
- $div
- ["attributes"]
- ["id"]
- ]
- );
-
- // get url
- $as =
- $this->fuckhtml
- ->getElementsByTagName(
- "a"
- );
-
- if(count($as) !== 0){
-
- $web["url"] =
- $this->unshiturl(
- $as[0]["attributes"]["href"]
- );
-
- // skip entries that redirect to a search
- if(
- !preg_match(
- '/^http/',
- $web["url"]
- )
- ){
-
- continue 3;
- }
- }
-
- // get title
- $h3 =
- $this->fuckhtml
- ->getElementsByTagName(
- "h3"
- );
-
- if(count($h3) !== 0){
-
- $web["title"] =
- $this->titledots(
- $this->fuckhtml
- ->getTextContent(
- $h3[0]
- )
- );
- }
-
- $description =
- $this->fuckhtml
- ->getElementsByAttributeValue(
- "data-attrid",
- "wa:/description",
- "div"
- );
-
- if(count($description) !== 0){
-
- // check for date
- $this->fuckhtml->load($description[0]);
-
- $date =
- $this->fuckhtml
- ->getElementsByClassName(
- $date_class,
- "span"
- );
-
- if(count($date) !== 0){
-
- $description[0]["innerHTML"] =
- str_replace(
- $date[0]["outerHTML"],
- "",
- $description[0]["innerHTML"]
- );
-
- $web["date"] =
- strtotime(
- $this->fuckhtml
- ->getTextContent(
- $date[0]
- )
- );
- }
-
- $web["description"] =
- ltrim(
- $this->fuckhtml
- ->getTextContent(
- $description[0]
- ),
- ": "
- );
- }
- }
-
- foreach($out["web"] as $item){
-
- if($item["url"] == $web["url"]){
-
- continue 2;
- }
- }
-
- array_splice($out["web"], $item_insert_pos, 0, [$web]);
- $item_insert_pos++;
- }
- }
- }
-
- // reset
- $this->fuckhtml->load($result_div);
-
- //
- // Scrape word definition
- //
- $definition_container =
- $this->fuckhtml
- ->getElementsByClassName(
- "lr_container",
- "div"
- );
-
- if(count($definition_container) !== 0){
-
- $this->fuckhtml->load($definition_container[0]);
-
- // get header
- $header =
- $this->fuckhtml
- ->getElementsByAttributeValue(
- "data-attrid",
- "EntryHeader",
- "div"
- );
-
- if(count($header) !== 0){
-
- $description = [];
-
- $this->fuckhtml->load($header[0]);
-
- $title_div =
- $this->fuckhtml
- ->getElementsByClassName(
- $this->getstyle(
- [
- "font-family" => "google sans,arial,sans-serif",
- "font-size" => "28px",
- "line-height" => "36px"
- ]
- )
- );
-
- if(count($title_div) !== 0){
-
- $title =
- $this->fuckhtml
- ->getTextContent(
- $title_div[0]
- );
- }else{
-
- $title = "Word definition";
- }
-
- $subtext_div =
- $this->fuckhtml
- ->getElementsByClassName(
- $this->getstyle(
- [
- "font-family" => "arial,sans-serif",
- "font-size" => "14px",
- "line-height" => "22px"
- ]
- ),
- "span"
- );
-
- if(count($subtext_div) !== 0){
-
- $description[] = [
- "type" => "quote",
- "value" =>
- $this->fuckhtml
- ->getTextContent(
- $subtext_div[0]
- )
- ];
- }
-
- // get audio
- $audio =
- $this->fuckhtml
- ->getElementsByTagName(
- "audio"
- );
-
- if(count($audio) !== 0){
-
- $this->fuckhtml->load($audio[0]);
-
- $source =
- $this->fuckhtml
- ->getElementsByTagName(
- "source"
- );
-
- if(count($source) !== 0){
-
- $description[] = [
- "type" => "audio",
- "url" =>
- preg_replace(
- '/^\/\//',
- "https://",
- $this->fuckhtml
- ->getTextContent(
- $source[0]
- ["attributes"]
- ["src"]
- )
- )
- ];
- }
-
- }
-
- // remove header to avoid confusion
- $definition_container[0]["innerHTML"] =
- str_replace(
- $header[0]["outerHTML"],
- "",
- $definition_container[0]["innerHTML"]
- );
-
- // reset
- $this->fuckhtml->load($definition_container[0]);
-
- $vmods =
- $this->fuckhtml
- ->getElementsByClassName(
- "vmod",
- "div"
- );
-
- foreach($vmods as $category){
-
- if(
- !isset(
- $category
- ["attributes"]
- ["data-topic"]
- ) ||
- $category
- ["attributes"]
- ["class"] != "vmod"
- ){
-
- continue;
- }
-
- $this->fuckhtml->load($category);
-
- // get category type
- $type =
- $this->fuckhtml
- ->getElementsByTagName(
- "i"
- );
-
- if(count($type) !== 0){
-
- $description[] = [
- "type" => "title",
- "value" =>
- $this->fuckhtml
- ->getTextContent(
- $type[0]
- )
- ];
- }
-
- // get heading text
- $headings =
- $this->fuckhtml
- ->getElementsByClassName(
- "xpdxpnd",
- "div"
- );
-
- foreach($headings as $heading){
-
- $description[] = [
- "type" => "quote",
- "value" =>
- $this->fuckhtml
- ->getTextContent(
- $heading
- )
- ];
- }
-
- $definitions =
- $this->fuckhtml
- ->getElementsByAttributeValue(
- "data-attrid",
- "SenseDefinition",
- "div"
- );
-
- $i = 1;
- $text = [];
-
- foreach($definitions as $definition){
-
- $text[] =
- $i . ". " .
- $this->fuckhtml
- ->getTextContent(
- $definition
- );
-
- $i++;
- }
-
- if(count($text) !== 0){
-
- $description[] = [
- "type" => "text",
- "value" =>
- implode("\n", $text)
- ];
- }
- }
-
- $out["answer"][] = [
- "title" => $title,
- "description" => $description,
- "url" => null,
- "thumb" => null,
- "table" => [],
- "sublink" => []
- ];
- }
-
- // reset
- $this->fuckhtml->load($result_div);
- }
-
- //
- // scrape elements with a g-section-with-header
- // includes: images, news carousels
- //
-
- $g_sections =
- $this->fuckhtml
- ->getElementsByTagName(
- "g-section-with-header"
- );
-
- if(count($g_sections) !== 0){
- foreach($g_sections as $g_section){
-
- // parse elements with a g-section-with-header
- $this->fuckhtml->load($g_section);
-
- $div_title =
- $this->fuckhtml
- ->getElementsByClassName(
- "a-no-hover-decoration",
- "a"
- );
-
- if(count($div_title) !== 0){
-
- // title detected, skip
- continue;
- }
-
- // no title detected: detect news container
- $news =
- $this->fuckhtml
- ->getElementsByClassName(
- $this->getstyle(
- [
- "outline-offset" => "-1px",
- "outline-width" => "1px",
- "display" => "flex",
- "flex-direction" => "column",
- "flex-grow" => "1"
- ]
- )
- );
-
- foreach($news as $new){
-
- $this->fuckhtml->load($new);
-
- $image =
- $this->fuckhtml
- ->getElementsByAttributeName(
- "id",
- "img"
- );
-
- if(
- count($image) !== 0 &&
- !(
- isset($image[0]["attributes"]["style"]) &&
- strpos(
- $image[0]["attributes"]["style"],
- "height:18px"
- ) !== false
- )
- ){
-
- $thumb = [
- "url" =>
- $this->getdimg(
- $image[0]
- ["attributes"]
- ["id"]
- ),
- "ratio" => "1:1"
- ];
- }
-
- $title =
- $this->titledots(
- $this->fuckhtml
- ->getTextContent(
- $this->fuckhtml
- ->getElementsByAttributeValue(
- "role",
- "heading",
- "div"
- )[0]
- )
- );
-
- $date_div =
- $this->fuckhtml
- ->getElementsByAttributeName(
- "style",
- "div"
- );
-
- $date = null;
-
- if(count($date_div) !== 0){
-
- foreach($date_div as $div){
-
- if(
- strpos(
- $div["attributes"]["style"],
- "bottom:"
- ) !== false
- ){
-
- $date =
- strtotime(
- $this->fuckhtml
- ->getTextContent(
- $div
- )
- );
- break;
- }
- }
- }else{
-
- $date = null;
- }
-
- $out["news"][] = [
- "title" => $title,
- "description" => null,
- "date" => $date,
- "thumb" => $thumb,
- "url" =>
- $this->fuckhtml
- ->getTextContent(
- $new
- ["attributes"]
- ["href"]
- )
- ];
- }
- }
-
- // reset
- $this->fuckhtml->load($result_div);
- }
-
- //
- // Parse images (carousel, left hand-side)
- //
- $image_carousels =
- $this->fuckhtml
- ->getElementsByAttributeValue(
- "id",
- "media_result_group",
- "div"
- );
-
- if(count($image_carousels) !== 0){
-
- foreach($image_carousels as $image_carousel){
-
- $this->fuckhtml->load($image_carousel);
-
- // get related searches in image carousel
- $relateds =
- $this->fuckhtml
- ->getElementsByClassName(
- $this->getstyle(
- [
- "display" => "inline-block",
- "margin-right" => "6px",
- "outline" => "none",
- "padding" => "6px 0"
- ],
- "a"
- )
- );
-
- foreach($relateds as $related){
-
- if(!isset($related["innerHTML"])){
-
- // found an image
- continue;
- }
-
- $text =
- $this->fuckhtml
- ->getTextContent(
- $related
- );
-
- if($text != ""){
-
- $out["related"][] = $text;
- }
- }
-
- $div =
- $this->fuckhtml
- ->getElementsByTagName(
- "div"
- );
-
- // get loaded images
- $images =
- $this->fuckhtml
- ->getElementsByClassName(
- "ivg-i",
- $div
- );
-
- foreach($images as $image){
-
- $this->fuckhtml->load($image);
-
- $img_tags =
- $this->fuckhtml
- ->getElementsByTagName(
- "img"
- );
-
- if(
- !isset($image["attributes"]["data-docid"]) ||
- !isset($this->image_arr[$image["attributes"]["data-docid"]])
- ){
-
- continue;
- }
-
- // search for the right image tag
- $image_tag = false;
- foreach($img_tags as $img){
-
- if(
- isset(
- $img
- ["attributes"]
- ["alt"]
- ) &&
- trim(
- $img
- ["attributes"]
- ["alt"]
- ) != ""
- ){
-
- $image_tag = $img;
- break;
- }
- }
-
- if($image_tag === false){
-
- continue;
- }
-
- $out["image"][] = [
- "title" =>
- $this->titledots(
- $this->fuckhtml
- ->getTextContent(
- $image_tag
- ["attributes"]
- ["alt"]
- )
- ),
- "source" =>
- $this->image_arr[
- $image
- ["attributes"]
- ["data-docid"]
- ],
- "url" =>
- $this->fuckhtml
- ->getTextContent(
- $image
- ["attributes"]
- ["data-lpage"]
- )
- ];
- }
-
- // get unloaded javascript images
- $images_js_sel =
- $this->fuckhtml
- ->getElementsByAttributeName(
- "id",
- $div
- );
-
- $loaded = [];
-
- foreach($images_js_sel as $sel){
-
- if(
- !isset($this->blobs[$sel["attributes"]["id"]]) ||
- in_array((string)$sel["attributes"]["id"], $loaded, true)
- ){
-
- // not an unloaded javascript image
- continue;
- }
-
- $loaded[] = $sel["attributes"]["id"];
-
- // get yet another javascript component
- $this->fuckhtml->load($this->blobs[$sel["attributes"]["id"]]);
-
- // get js node: contains title & url
- $js_node =
- $this->fuckhtml
- ->getElementsByTagName(
- "div"
- )[0];
-
- if(!isset($this->blobs[$js_node["attributes"]["id"]])){
-
- // did not find refer id
- continue;
- }
-
- // load second javascript component
- $this->fuckhtml->load($this->blobs[$js_node["attributes"]["id"]]);
-
- // get title from image alt text.
- // data-src from this image is cropped, ignore it..
- $img =
- $this->fuckhtml
- ->getElementsByTagName(
- "img"
- )[0];
-
- $out["image"][] = [
- "title" =>
- $this->fuckhtml
- ->getTextContent(
- $img["attributes"]["alt"]
- ),
- "source" =>
- $this->image_arr[
- $js_node["attributes"]["data-docid"]
- ],
- "url" =>
- $this->fuckhtml
- ->getTextContent(
- $js_node["attributes"]["data-lpage"]
- )
- ];
- }
- }
-
- // reset
- $this->fuckhtml->load($result_div);
- }
-
- //
- // Parse videos
- //
- $this->fuckhtml->load($result_div);
-
- $videos =
- $this->fuckhtml
- ->getElementsByAttributeName(
- "data-vid",
- "div"
- );
-
- foreach($videos as $video){
-
- $this->fuckhtml->load($video);
-
- // get url
- $url =
- $this->fuckhtml
- ->getTextContent(
- $video
- ["attributes"]
- ["data-surl"]
- );
-
- foreach($out["web"] as $link){
-
- if($link["url"] == $url){
-
- // ignore if we already have the video in $out["web"]
- continue 2;
- }
- }
-
- // get heading element
- $heading =
- $this->fuckhtml
- ->getElementsByAttributeValue(
- "role",
- "heading",
- "div"
- );
-
- if(count($heading) === 0){
-
- // no heading, fuck this.
- continue;
- }
-
- // get thumbnail before loading heading object
- $image =
- $this->fuckhtml
- ->getElementsByAttributeName(
- "id",
- "img"
- );
-
- if(count($image) !== 0){
-
- $thumb = [
- "url" => $this->getdimg($image[0]["attributes"]["id"]),
- "ratio" => "16:9"
- ];
- }else{
-
- $thumb = [
- "url" => null,
- "ratio" => null
- ];
- }
-
- // get duration
- $duration_div =
- $this->fuckhtml
- ->getElementsByClassName(
- $this->getstyle(
- [
- "border-radius" => "10px",
- "font-family" => "arial,sans-serif-medium,sans-serif",
- "font-size" => "12px",
- "line-height" => "16px",
- "padding-block" => "2px",
- "padding-inline" => "8px"
- ]
- ),
- "div"
- );
-
- if(count($duration_div) !== 0){
-
- $duration =
- $this->hms2int(
- $this->fuckhtml
- ->getTextContent(
- $duration_div[0]
- )
- );
- }else{
-
- // check if its a livestream
- $duration =
- $this->fuckhtml
- ->getElementsByClassName(
- $this->getstyle(
- [
- "background-color" => "#d93025",
- "border-radius" => "10px",
- "color" => "#fff",
- "font-family" => "arial,sans-serif-medium,sans-serif",
- "font-size" => "12px",
- "line-height" => "16px",
- "padding-block" => "2px",
- "padding-inline" => "8px"
- ]
- ),
- "span"
- );
-
- if(count($duration) !== 0){
-
- $duration = "_LIVE";
- }else{
-
- $duration = null;
- }
- }
-
- // load heading
- $this->fuckhtml->load($heading[0]);
-
- // get title
- $title =
- $this->fuckhtml
- ->getElementsByClassName(
- $this->getstyle(
- [
- "font-family" => "arial,sans-serif",
- "font-size" => "16px",
- "font-weight" => "400",
- "line-height" => "24px"
- ]
- ),
- "div"
- );
-
- if(count($title) === 0){
-
- // ?? no title
- continue;
- }
-
- $title =
- $this->titledots(
- $this->fuckhtml
- ->getTextContent(
- $title[0]
- )
- );
-
- // get date
- $date_div =
- $this->fuckhtml
- ->getElementsByClassName(
- $this->getstyle(
- [
- "color" => "var(" . $this->getcolorvar("#70757a") . ")",
- "font-size" => "14px"
- ]
- ),
- "div"
- );
-
- if(count($date_div) !== 0){
-
- $date = strtotime(
- $this->fuckhtml
- ->getTextContent(
- $date_div[0]
- )
- );
-
- if($date === false){
-
- // failed to parse date
- $date = null;
- }
- }else{
-
- $date = null;
- }
-
- $out["video"][] = [
- "title" => $title,
- "description" => null,
- "date" => $date,
- "duration" => $duration,
- "views" => null,
- "thumb" => $thumb,
- "url" => $url
- ];
- }
-
- //
- // Parse featured results (which contain images, fuck the rest desu)
- //
- $this->fuckhtml->load($html);
- $top =
- $this->fuckhtml
- ->getElementsByAttributeValue(
- "aria-label",
- "Featured results",
- "div"
- );
-
- if(count($top) !== 0){
-
- $this->fuckhtml->load($top[0]);
-
- // get images
- $grid =
- $this->fuckhtml
- ->getElementsByClassName(
- $this->getstyle(
- [
- "border-radius" => "20px",
- "display" => "grid",
- "grid-gap" => "2px",
- "grid-template-rows" => "repeat(2,minmax(0,1fr))",
- "overflow" => "hidden",
- "bottom" => "0",
- "left" => "0",
- "right" => "0",
- "top" => "0",
- "position" => "absolute",
- ]
- ),
- "div"
- );
-
- if(count($grid) !== 0){
-
- // we found image grid
- $this->fuckhtml->load($grid[0]);
-
- $images_div =
- $this->fuckhtml
- ->getElementsByAttributeName(
- "data-attrid",
- "div"
- );
-
- foreach($images_div as $image_div){
-
- $this->fuckhtml->load($image_div);
-
- $image =
- $this->fuckhtml
- ->getElementsByTagName(
- "img"
- );
-
- if(
- count($image) === 0 ||
- !isset($image_div["attributes"]["data-docid"]) ||
- !isset($this->image_arr[$image_div["attributes"]["data-docid"]])
- ){
-
- // ?? no image, continue
- continue;
- }
-
- $out["image"][] = [
- "title" =>
- $this->titledots(
- $this->fuckhtml
- ->getTextContent(
- $image[0]["attributes"]["alt"]
- )
- ),
- "source" =>
- $this->image_arr[
- $image_div["attributes"]["data-docid"]
- ],
- "url" =>
- $this->fuckhtml
- ->getTextContent(
- $image_div["attributes"]["data-lpage"]
- )
- ];
- }
- }
- }
-
-
- //
- // craft $npt token
- //
- if(
- $last_page === false &&
- count($out["web"]) !== 0
- ){
- if(!isset($params["start"])){
-
- $params["start"] = 20;
- }else{
-
- $params["start"] += 20;
- }
-
- $out["npt"] =
- $this->backend
- ->store(
- json_encode($params),
- $pagetype,
- $proxy
- );
- }
-
-
- //
- // Parse right handside
- //
- $this->fuckhtml->load($html);
-
- $rhs =
- $this->fuckhtml
- ->getElementById(
- "rhs"
- );
-
- if($rhs === null){
-
- return $out;
- }
-
- $this->fuckhtml->load($rhs);
-
- // get images gallery
- $image_gallery =
- $this->fuckhtml
- ->getElementsByAttributeValue(
- "data-rc",
- "ivg-i",
- "div"
- );
-
- if(count($image_gallery) !== 0){
-
- $this->fuckhtml->load($image_gallery[0]);
-
- // get images
- $images_div =
- $this->fuckhtml
- ->getElementsByClassName(
- "ivg-i",
- "div"
- );
-
- foreach($images_div as $image_div){
-
- $this->fuckhtml->load($image_div);
-
- $image =
- $this->fuckhtml
- ->getElementsByTagName(
- "img"
- );
-
- if(
- count($image) === 0 ||
- !isset(
- $this->image_arr[
- $image_div
- ["attributes"]
- ["data-docid"]
- ]
- )
- ){
-
- continue;
- }
-
- foreach($out["image"] as $existing_image){
-
- // might already exist
- if(
- $existing_image["source"][1]["url"] ==
- $this->image_arr[
- $image_div
- ["attributes"]
- ["data-docid"]
- ][1]["url"]
- ){
-
- continue 2;
- }
- }
-
- $out["image"][] = [
- "title" =>
- $this->titledots(
- $this->fuckhtml
- ->getTextContent(
- $image[0]
- ["attributes"]
- ["alt"]
- )
- ),
- "source" =>
- $this->image_arr[
- $image_div
- ["attributes"]
- ["data-docid"]
- ],
- "url" =>
- $this->fuckhtml
- ->getTextContent(
- $image_div
- ["attributes"]
- ["data-lpage"]
- )
- ];
- }
-
- // reset
- $this->fuckhtml->load($rhs);
- }
-
- // get header container
- $header =
- $this->fuckhtml
- ->getElementsByClassName(
- $this->getstyle(
- [
- "padding" => "0 0 16px 20px",
- "display" => "flex"
- ]
- ),
- "div"
- );
-
- // stop parsing wikipedia heads if there isn't a header
- $description = [];
- $title = "About";
-
- if(count($header) !== 0){
-
- $this->fuckhtml->load($header[0]);
-
- // g-snackbar-action present: we found a button instead
- if(
- count(
- $this->fuckhtml
- ->getElementsByTagName(
- "g-snackbar-action"
- )
- ) !== 0
- ){
-
- $title_tag =
- $this->fuckhtml
- ->getElementsByAttributeValue(
- "data-attrid",
- "title",
- "div"
- );
-
- if(count($title_tag) !== 0){
- $title =
- $this->fuckhtml
- ->getTextContent(
- $title_tag[0]
- );
-
- $header[0]["innerHTML"] =
- str_replace(
- $title_tag[0]["outerHTML"],
- "",
- $header[0]["innerHTML"]
- );
-
- // if header still contains text, add it as a subtitle in description
- $subtitle =
- $this->fuckhtml
- ->getTextContent(
- $header[0]
- );
-
- if(strlen($subtitle) !== 0){
-
- $description[] = [
- "type" => "quote",
- "value" => $subtitle
- ];
- }
- }
- }
-
- // reset
- $this->fuckhtml->load($rhs);
- }
-
- // get description elements
- $url = null;
-
- $text =
- $this->fuckhtml
- ->getElementsByAttributeValue(
- "data-attrid",
- "description",
- "div"
- );
-
- if(count($text) !== 0){
-
- $this->fuckhtml->load($text[0]);
-
- $a =
- $this->fuckhtml
- ->getElementsByTagName(
- "a"
- );
-
- if(count($a) !== 0){
- // get link and remove it from description
-
- $a = $a[count($a) - 1];
-
- $text[0]["innerHTML"] =
- str_replace(
- $a["outerHTML"],
- "",
- $text[0]["innerHTML"]
- );
-
- $url =
- $this->fuckhtml
- ->getTextContent(
- $a
- ["attributes"]
- ["href"]
- );
- }
-
- $description[] = [
- "type" => "text",
- "value" =>
- html_entity_decode(
- preg_replace(
- '/^Description/',
- "",
- $this->fuckhtml
- ->getTextContent(
- $text[0]
- )
- )
- )
- ];
-
- // reset
- $this->fuckhtml->load($rhs);
- }
-
- // get reviews (google play, steam, etc)
- $review_container =
- $this->fuckhtml
- ->getElementsByClassName(
- $this->getstyle(
- [
- "align-items" => "start",
- "display" => "flex"
- ]
- ),
- "div"
- );
-
- if(count($review_container) !== 0){
-
- $this->fuckhtml->load($review_container[0]);
-
- $as =
- $this->fuckhtml
- ->getElementsByTagName(
- "a"
- );
-
- if(count($as) !== 0){
-
- $description[] = [
- "type" => "title",
- "value" => "Ratings"
- ];
-
- foreach($as as $a){
-
- $this->fuckhtml->load($a);
-
- $spans =
- $this->fuckhtml
- ->getElementsByTagName(
- "span"
- );
-
- if(count($spans) >= 2){
-
- $value =
- trim(
- $this->fuckhtml
- ->getTextContent(
- $spans[1]
- ),
- "· "
- );
-
- if(
- $value == "" &&
- isset($spans[2])
- ){
-
- $value =
- $this->fuckhtml
- ->getTextContent(
- $spans[2]
- );
- }
-
- $description[] = [
- "type" => "link",
- "url" =>
- $this->fuckhtml
- ->getTextContent(
- $a["attributes"]
- ["href"]
- ),
- "value" => $value
- ];
-
- $description[] = [
- "type" => "text",
- "value" =>
- ": " .
- $this->fuckhtml
- ->getTextContent(
- $spans[0]
- ) . "\n"
- ];
- }
- }
- }
-
- // reset
- $this->fuckhtml->load($rhs);
- }
-
- // initialize sublinks
- $sublinks = [];
-
- // get description from business
- if(count($description) === 0){
-
- $data_attrid =
- $this->fuckhtml
- ->getElementsByAttributeName(
- "data-attrid"
- );
-
- $summary =
- $this->fuckhtml
- ->getElementsByAttributeValue(
- "data-attrid",
- "kc:/local:one line summary",
- $data_attrid
- );
-
- if(count($summary) !== 0){
-
- $description[] = [
- "type" => "quote",
- "value" =>
- $this->fuckhtml
- ->getTextContent(
- $summary[0]
- )
- ];
-
- // remove summary so it doesnt get parsed as a table
- $rhs["innerHTML"] =
- str_replace(
- $summary[0]["outerHTML"],
- "",
- $rhs["innerHTML"]
- );
-
- $this->fuckhtml->load($rhs);
- }
-
- $address =
- $this->fuckhtml
- ->getElementsByAttributeValue(
- "data-attrid",
- "kc:/location/location:address",
- $data_attrid
- );
-
- if(count($address) !== 0){
-
- $description[] = [
- "type" => "text",
- "value" =>
- $this->fuckhtml
- ->getTextContent(
- $address[0]
- )
- ];
- }
-
- // get title
- $title_div =
- $this->fuckhtml
- ->getElementsByAttributeValue(
- "data-attrid",
- "title",
- $data_attrid
- );
-
- if(count($title_div) !== 0){
-
- $title =
- $this->fuckhtml
- ->getTextContent(
- $title_div[0]
- );
- }
-
- // get phone number
- $phone =
- $this->fuckhtml
- ->getElementsByAttributeValue(
- "data-attrid",
- "kc:/local:alt phone",
- $data_attrid
- );
-
- if(count($phone) !== 0){
-
- $this->fuckhtml->load($phone[0]);
-
- $sublinks["Call"] =
- "tel:" .
- $this->fuckhtml
- ->getTextContent(
- $this->fuckhtml
- ->getElementsByAttributeName(
- "aria-label",
- "span"
- )[0]
- );
-
- $this->fuckhtml->load($rhs);
- }
- }
-
- if(count($description) === 0){
-
- // still no description? abort
- return $out;
- }
-
- // get table elements
- $table = [];
- $table_elems =
- $this->fuckhtml
- ->getElementsByClassName(
- $this->getstyle(
- [
- "margin-top" => "7px"
- ]
- ),
- "div"
- );
-
- foreach($table_elems as $elem){
-
- $this->fuckhtml->load($elem);
-
- $spans =
- $this->fuckhtml
- ->getElementsByTagName(
- "span"
- );
-
- if(count($spans) === 0){
-
- // ?? invalid
- continue;
- }
-
- $elem["innerHTML"] =
- str_replace(
- $spans[0]["outerHTML"],
- "",
- $elem["innerHTML"]
- );
-
- $key =
- rtrim(
- $this->fuckhtml
- ->getTextContent(
- $spans[0]
- ),
- ": "
- );
-
- if(
- $key == "" ||
- $key == "Phone"
- ){
-
- continue;
- }
-
- if($key == "Hours"){
-
- $hours = [];
-
- $this->fuckhtml->load($elem);
-
- $trs =
- $this->fuckhtml
- ->getElementsByTagName(
- "tr"
- );
-
- foreach($trs as $tr){
-
- $this->fuckhtml->load($tr);
-
- $tds =
- $this->fuckhtml
- ->getElementsByTagName(
- "td"
- );
-
- if(count($tds) === 2){
-
- $hours[] =
- $this->fuckhtml
- ->getTextContent(
- $tds[0]
- ) . ": " .
- $this->fuckhtml
- ->getTextContent(
- $tds[1]
- );
- }
- }
-
- if(count($hours) !== 0){
-
- $hours = implode("\n", $hours);
- $table["Hours"] = $hours;
- }
-
- continue;
- }
-
- $table[$key] =
- preg_replace(
- '/ +/',
- " ",
- $this->fuckhtml
- ->getTextContent(
- $elem
- )
- );
- }
-
- // reset
- $this->fuckhtml->load($rhs);
-
- // get the website div
- $as =
- $this->fuckhtml
- ->getElementsByAttributeValue(
- "data-attrid",
- "visit_official_site",
- "a"
- );
-
- if(count($as) !== 0){
-
- $sublinks["Website"] =
- str_replace(
- "http://",
- "https://",
- $this->fuckhtml
- ->getTextContent(
- $as[0]
- ["attributes"]
- ["href"]
- )
- );
- }else{
-
- // get website through button
- $button =
- $this->fuckhtml
- ->getElementsByClassName(
- "ab_button",
- "a"
- );
-
- if(count($button) !== 0){
-
- $sublinks["Website"] =
- $this->unshiturl(
- $this->fuckhtml
- ->getTextContent(
- $button[0]
- ["attributes"]
- ["href"]
- )
- );
- }
- }
-
- // get social media links
- $as =
- $this->fuckhtml
- ->getElementsByTagName(
- "g-link"
- );
-
- foreach($as as $a){
-
- $this->fuckhtml->load($a);
-
- $link =
- $this->fuckhtml
- ->getElementsByTagName(
- "a"
- );
-
- if(count($link) === 0){
-
- continue;
- }
-
- $sublink_title =
- $this->fuckhtml
- ->getTextContent(
- $a
- );
-
- if($sublink_title == "X (Twitter)"){
-
- $sublink_title = "Twitter";
- }
-
- $sublinks[$sublink_title] =
- $this->fuckhtml
- ->getTextContent(
- $link[0]
- ["attributes"]
- ["href"]
- );
- }
-
- // reset
- $this->fuckhtml->load($rhs);
-
- // get those round containers
- $containers =
- $this->fuckhtml
- ->getElementsByClassName(
- "tpa-ci"
- );
-
- foreach($containers as $container){
-
- $this->fuckhtml->load($container);
-
- $as =
- $this->fuckhtml
- ->getElementsByTagName(
- "a"
- );
-
- if(count($as) === 0){
-
- continue;
- }
-
- $sublinks[
- $this->fuckhtml
- ->getTextContent(
- $as[0]
- )
- ] =
- $this->fuckhtml
- ->getTextContent(
- $as[0]
- ["attributes"]
- ["href"]
- );
- }
-
- $out["answer"][] = [
- "title" => $title,
- "description" => $description,
- "url" => $url,
- "thumb" => null,
- "table" => $table,
- "sublink" => $sublinks
- ];
-
- return $out;
- }
-
-
- private function scrape_dimg($html){
-
- // get images loaded through javascript
- $this->dimg = [];
-
- preg_match_all(
- '/function\(\){google\.ldi=({.*?});/',
- $html,
- $dimg
- );
-
- if(isset($dimg[1])){
-
- foreach($dimg[1] as $i){
-
- $tmp = json_decode($i, true);
- foreach($tmp as $key => $value){
-
- $this->dimg[$key] =
- $this->unshit_thumb(
- $value
- );
- }
- }
- }
-
- // get additional javascript base64 images
- preg_match_all(
- '/var s=\'(data:image\/[^\']+)\';var ii=\[((?:\'[^\']+\',?)+)\];/',
- $html,
- $dimg
- );
-
- if(isset($dimg[1])){
-
- for($i=0; $i<count($dimg[1]); $i++){
-
- $delims = explode(",", $dimg[2][$i]);
- $string =
- $this->fuckhtml
- ->parseJsString(
- $dimg[1][$i]
- );
-
- foreach($delims as $delim){
-
- $this->dimg[trim($delim, "'")] = $string;
- }
- }
- }
- }
-
-
- private function scrape_imagearr($html){
- // get image links arrays
- preg_match_all(
- '/\[0,"([^"]+)",\["([^"]+)\",([0-9]+),([0-9]+)\],\["([^"]+)",([0-9]+),([0-9]+)\]/',
- $html,
- $image_arr
- );
-
- $this->image_arr = [];
- if(isset($image_arr[1])){
-
- for($i=0; $i<count($image_arr[1]); $i++){
-
- $this->image_arr[$image_arr[1][$i]] =
- [
- [
- "url" =>
- $this->fuckhtml
- ->parseJsString(
- $image_arr[5][$i]
- ),
- "width" => (int)$image_arr[7][$i],
- "height" => (int)$image_arr[6][$i]
- ],
- [
- "url" =>
- $this->unshit_thumb(
- $this->fuckhtml
- ->parseJsString(
- $image_arr[2][$i]
- )
- ),
- "width" => (int)$image_arr[4][$i],
- "height" => (int)$image_arr[3][$i]
- ]
- ];
- }
- }
- }
-
-
- private function getdimg($dimg){
-
- return isset($this->dimg[$dimg]) ? $this->dimg[$dimg] : null;
- }
-
-
- private function unshit_thumb($url){
- // https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcQINE2vbnNLHXqoZr3RVsaEJFyOsj1_BiBnJch-e1nyz3oia7Aj5xVj
- // https://i.ytimg.com/vi/PZVIyA5ER3Y/mqdefault.jpg?sqp=-oaymwEFCJQBEFM&rs=AMzJL3nXeaCpdIar-ltNwl82Y82cIJfphA
-
- $parts = parse_url($url);
-
- if(
- isset($parts["host"]) &&
- preg_match(
- '/tbn.*\.gstatic\.com/',
- $parts["host"]
- )
- ){
-
- parse_str($parts["query"], $params);
-
- if(isset($params["q"])){
-
- return "https://" . $parts["host"] . "/images?q=" . $params["q"];
- }
- }
-
- return $url;
- }
-
-
- private function parsestyles(){
-
- $styles = [];
- $style_div =
- $this->fuckhtml
- ->getElementsByTagName(
- "style"
- );
-
- $raw_styles = "";
-
- foreach($style_div as $style){
-
- $raw_styles .= $style["innerHTML"];
- }
-
- // filter out media/keyframe queries
- $raw_styles =
- preg_replace(
- '/@\s*(?!font-face)[^{]+\s*{[\S\s]+?}\s*}/',
- "",
- $raw_styles
- );
-
- // get styles
- preg_match_all(
- '/(.+?){([\S\s]*?)}/',
- $raw_styles,
- $matches
- );
-
- for($i=0; $i<count($matches[1]); $i++){
-
- // get style values
- preg_match_all(
- '/([^:;]+):([^;]*?(?:\([^)]+\)[^;]*?)?)(?:;|$)/',
- $matches[2][$i],
- $values_regex
- );
-
- $values = [];
- for($k=0; $k<count($values_regex[1]); $k++){
-
- $values[trim($values_regex[1][$k])] =
- strtolower(trim($values_regex[2][$k]));
- }
-
- $names = explode(",", $matches[1][$i]);
-
- // h1,h2,h3 will each get their own array index
- foreach($names as $name){
-
- $name = trim($name, "}\t\n\r\0\x0B");
-
- foreach($values as $key => $value){
-
- $styles[$name][$key] = $value;
- }
- }
- }
-
- foreach($styles as $key => $values){
-
- $styles[$key]["_c"] = count($values);
- }
-
- $this->styles = $styles;
-
- // get CSS colors
- $this->css_colors = [];
-
- if(isset($this->styles[":root"])){
-
- foreach($this->styles[":root"] as $key => $value){
-
- $this->css_colors[$value] = strtolower($key);
- }
- }
- }
-
-
-
- private function getstyle($styles){
-
- $styles["_c"] = count($styles);
-
- foreach($this->styles as $style_key => $style_values){
-
- if(count(array_intersect_assoc($style_values, $styles)) === $styles["_c"] + 1){
-
- $style_key =
- explode(" ", $style_key);
-
- $style_key = $style_key[count($style_key) - 1];
-
- return
- ltrim(
- str_replace(
- [".", "#"],
- " ",
- $style_key
- )
- );
- }
- }
-
- return false;
- }
-
-
-
- private function getcolorvar($color){
-
- if(isset($this->css_colors[$color])){
-
- return $this->css_colors[$color];
- }
-
- return null;
- }
-
-
-
- public function web($get){
-
- if($get["npt"]){
-
- [$params, $proxy] = $this->backend->get($get["npt"], "web");
- $params = json_decode($params, true);
-
- $search = $params["q"];
-
- }else{
- $search = $get["s"];
- $country = $get["country"];
- $nsfw = $get["nsfw"];
- $lang = $get["lang"];
- $older = $get["older"];
- $newer = $get["newer"];
- $spellcheck = $get["spellcheck"];
- $proxy = $this->backend->get_ip();
-
- $offset = 0;
-
- $params = [
- "q" => $search,
- "hl" => "en",
- "num" => 20 // get 20 results
- ];
-
- // country
- if($country != "any"){
-
- $params["gl"] = $country;
- }
-
- // nsfw
- $params["safe"] = $nsfw == "yes" ? "off" : "active";
-
- // language
- if($lang != "any"){
-
- $params["lr"] = "lang_" . $lang;
- }
-
- // generate tbs
- $tbs = [];
-
- // get date
- $older = $older === false ? null : date("m/d/Y", $older);
- $newer = $newer === false ? null : date("m/d/Y", $newer);
-
- if(
- $older !== null ||
- $newer !== null
- ){
-
- $tbs["cdr"] = "1";
- $tbs["cd_min"] = $newer;
- $tbs["cd_max"] = $older;
- }
-
- // spellcheck filter
- if($spellcheck == "no"){
-
- $params["nfpr"] = "1";
- }
-
- if(count($tbs) !== 0){
-
- $params["tbs"] = "";
-
- foreach($tbs as $key => $value){
-
- $params["tbs"] .= $key . ":" . $value . ",";
- }
-
- $params["tbs"] = rtrim($params["tbs"], ",");
- }
- }
-
- try{
- $html =
- $this->get(
- $proxy,
- "https://www.google.com/search",
- $params
- );
- }catch(Exception $error){
-
- throw new Exception("Failed to get HTML");
- }
-
- //$html = file_get_contents("scraper/google.txt");
-
- return $this->parsepage($html, "web", $search, $proxy, $params);
- }
-
-
-
- public function video($get){
-
- if($get["npt"]){
-
- [$params, $proxy] = $this->backend->get($get["npt"], "video");
- $params = json_decode($params, true);
-
- $search = $params["q"];
-
- }else{
- $search = $get["s"];
- $country = $get["country"];
- $nsfw = $get["nsfw"];
- $older = $get["older"];
- $newer = $get["newer"];
- $duration = $get["duration"];
- $quality = $get["quality"];
- $captions = $get["captions"];
- $proxy = $this->backend->get_ip();
-
- $params = [
- "q" => $search,
- "tbm" => "vid",
- "hl" => "en",
- "num" => "20"
- ];
-
- // country
- if($country != "any"){
-
- $params["gl"] = $country;
- }
-
- // nsfw
- $params["safe"] = $nsfw == "yes" ? "off" : "active";
-
- $tbs = [];
-
- // get date
- $older = $older === false ? null : date("m/d/Y", $older);
- $newer = $newer === false ? null : date("m/d/Y", $newer);
-
- if(
- $older !== null ||
- $newer !== null
- ){
-
- $tbs["cdr"] = "1";
- $tbs["cd_min"] = $newer;
- $tbs["cd_max"] = $older;
- }
-
- // duration
- if($duration != "any"){
-
- $tbs[] = "dur:" . $duration;
- }
-
- // quality
- if($quality != "any"){
-
- $tbs[] = "hq:" . $quality;
- }
-
- // captions
- if($captions != "any"){
-
- $tbs[] = "cc:" . $captions;
- }
-
- // append tbs
- if(count($tbs) !== 0){
-
- $params["tbs"] =
- implode(",", $tbs);
- }
- }
-
- try{
- $html =
- $this->get(
- $proxy,
- "https://www.google.com/search",
- $params
- );
- }catch(Exception $error){
-
- throw new Exception("Failed to get HTML");
- }
-
- //$html = file_get_contents("scraper/google.html");
-
- $response = $this->parsepage($html, "videos", $search, $proxy, $params);
- $out = [
- "status" => "ok",
- "npt" => $response["npt"],
- "video" => [],
- "author" => [],
- "livestream" => [],
- "playlist" => [],
- "reel" => []
- ];
-
- foreach($response["web"] as $result){
-
- $out["video"][] = [
- "title" => $result["title"],
- "description" => $result["description"],
- "author" => [
- "name" => isset($result["table"]["Author"]) ? $result["table"]["Author"] : null,
- "url" => null,
- "avatar" => null
- ],
- "date" => $result["date"],
- "duration" => isset($result["table"]["Duration"]) ? $this->hms2int($result["table"]["Duration"]) : null,
- "views" => null,
- "thumb" => $result["thumb"],
- "url" => $result["url"]
- ];
- }
-
- return $out;
- }
-
-
-
- public function news($get){
-
- if($get["npt"]){
-
- [$req, $proxy] = $this->backend->get($get["npt"], "news");
- /*parse_str(
- parse_url($req, PHP_URL_QUERY),
- $search
- );*/
-
- try{
-
- $html =
- $this->get(
- $proxy,
- "https://www.google.com" . $req,
- []
- );
- }catch(Exception $error){
-
- throw new Exception("Failed to get HTML");
- }
-
- }else{
- $search = $get["s"];
- $country = $get["country"];
- $nsfw = $get["nsfw"];
- $older = $get["older"];
- $newer = $get["newer"];
- $sort = $get["sort"];
- $proxy = $this->backend->get_ip();
-
- $params = [
- "q" => $search,
- "tbm" => "nws",
- "hl" => "en",
- "num" => "20"
- ];
-
- // country
- if($country != "any"){
-
- $params["gl"] = $country;
- }
-
- // nsfw
- $params["safe"] = $nsfw == "yes" ? "off" : "active";
-
- $tbs = [];
-
- // get date
- $older = $older === false ? null : date("m/d/Y", $older);
- $newer = $newer === false ? null : date("m/d/Y", $newer);
-
- if(
- $older !== null ||
- $newer !== null
- ){
-
- $tbs["cdr"] = "1";
- $tbs["cd_min"] = $newer;
- $tbs["cd_max"] = $older;
- }
-
- // relevance
- if($sort == "date"){
-
- $tbs["sbd"] = "1";
- }
-
- // append tbs
- if(count($tbs) !== 0){
-
- $params["tbs"] = "";
-
- foreach($tbs as $key => $value){
-
- $params["tbs"] .= $key . ":" . $value . ",";
- }
-
- $params["tbs"] = rtrim($params["tbs"], ",");
- }
-
- //$html = file_get_contents("scraper/google-news.html");
-
- $html =
- $this->get(
- $proxy,
- "https://www.google.com/search",
- $params
- );
- }
-
- $out = [
- "status" => "ok",
- "npt" => null,
- "news" => []
- ];
-
- $this->fuckhtml->load($html);
-
- $this->detect_sorry();
-
- // get images
- $this->scrape_dimg($html);
-
- // parse styles
- $this->parsestyles();
-
- $center_col =
- $this->fuckhtml
- ->getElementById(
- "center_col",
- "div"
- );
-
- if($center_col === null){
-
- throw new Exception("Could not grep result div");
- }
-
- $this->fuckhtml->load($center_col);
-
- // get next page
- $npt =
- $this->fuckhtml
- ->getElementById(
- "pnnext",
- "a"
- );
-
- if($npt !== false){
-
- $out["npt"] =
- $this->backend->store(
- $this->fuckhtml
- ->getTextContent(
- $npt["attributes"]
- ["href"]
- ),
- "news",
- $proxy
- );
- }
-
- $as =
- $this->fuckhtml
- ->getElementsByAttributeName(
- "jsname",
- "a"
- );
-
- foreach($as as $a){
-
- $this->fuckhtml->load($a);
-
- // get title
- $title =
- $this->fuckhtml
- ->getElementsByAttributeValue(
- "role",
- "heading",
- "div"
- );
-
- if(count($title) === 0){
-
- continue;
- }
-
- $title =
- $this->titledots(
- $this->fuckhtml
- ->getTextContent(
- $title[0]
- )
- );
-
- // get thumbnail
- $image =
- $this->fuckhtml
- ->getElementsByAttributeName(
- "id",
- "img"
- );
-
- // check for padded title node, if found, we're inside a carousel
- $probe =
- $this->fuckhtml
- ->getElementsByClassName(
- $this->getstyle(
- [
- "padding" => "16px 16px 40px 16px"
- ]
- ),
- "div"
- );
-
- if(count($probe) !== 0){
-
- $probe = true;
- }else{
-
- $probe = false;
- }
-
- if(
- count($image) !== 0 &&
- !isset($image[0]["attributes"]["width"])
- ){
-
- $thumb = [
- "url" =>
- $this->getdimg(
- $image[0]["attributes"]["id"]
- ),
- "ratio" => $probe === true ? "16:9" : "1:1"
- ];
- }else{
-
- $thumb = [
- "url" => null,
- "ratio" => null
- ];
- }
-
- $description = null;
-
- if($probe === false){
-
- $desc_divs =
- $this->fuckhtml
- ->getElementsByAttributeName(
- "style",
- "div"
- );
-
- foreach($desc_divs as $desc){
-
- if(
- strpos(
- $desc["attributes"]["style"],
- "margin-top:"
- ) !== false
- ){
-
- $description =
- $this->titledots(
- $this->fuckhtml
- ->getTextContent(
- $desc
- )
- );
- break;
- }
- }
- }
-
- // get author
- $author =
- $this->fuckhtml
- ->getElementsByClassName(
- $this->getstyle(
- [
- "overflow" => "hidden",
- "text-align" => "left",
- "text-overflow" => "ellipsis",
- "white-space" => "nowrap",
- "margin-bottom" => "8px"
- ]
- ),
- "div"
- );
-
- if(count($author) !== 0){
-
- $author =
- $this->fuckhtml
- ->getTextContent(
- $author[0]
- );
- }else{
-
- $author = null;
- }
-
- // get date
- $date = null;
-
- $date_div =
- $this->fuckhtml
- ->getElementsByAttributeName(
- "style",
- "div"
- );
-
- foreach($date_div as $d){
-
- $this->fuckhtml->load($d);
-
- $span =
- $this->fuckhtml
- ->getElementsByTagName(
- "span"
- );
-
- if(
- strpos(
- $d["attributes"]["style"],
- "bottom:"
- ) !== false
- ){
-
- $date =
- strtotime(
- $this->fuckhtml
- ->getTextContent(
- $span[count($span) - 1]
- )
- );
- break;
- }
- }
-
- $out["news"][] = [
- "title" => $title,
- "author" => $author,
- "description" => $description,
- "date" => $date,
- "thumb" => $thumb,
- "url" =>
- $this->unshiturl(
- $a["attributes"]
- ["href"]
- )
- ];
- }
-
- return $out;
- }
-
-
-
-
- public function image($get){
-
- // generate parameters
- if($get["npt"]){
-
- [$params, $proxy] =
- $this->backend->get(
- $get["npt"],
- "images"
- );
-
- $params = json_decode($params, true);
- }else{
-
- $search = $get["s"];
- if(strlen($search) === 0){
-
- throw new Exception("Search term is empty!");
- }
-
- $proxy = $this->backend->get_ip();
- $country = $get["country"];
- $nsfw = $get["nsfw"];
- $time = $get["time"];
- $size = $get["size"];
- $ratio = $get["ratio"];
- $color = $get["color"];
- $type = $get["type"];
- $format = $get["format"];
- $rights = $get["rights"];
-
- $params = [
- "q" => $search,
- "udm" => "2" // get images
- ];
-
- // country (image search uses cr instead of gl)
- if($country != "any"){
-
- $params["cr"] = "country" . strtoupper($country);
- }
-
- // nsfw
- $params["safe"] = $nsfw == "yes" ? "off" : "active";
-
- // generate tbs
- $tbs = [];
-
- // time
- if($time != "any"){
-
- $tbs["qdr"] = $time;
- }
-
- // size
- if($size != "any"){
-
- $params["imgsz"] = $size;
- }
-
- // ratio
- if($ratio != "any"){
-
- $params["imgar"] = $ratio;
- }
-
- // color
- if($color != "any"){
-
- if(
- $color == "color" ||
- $color == "trans"
- ){
-
- $params["imgc"] = $color;
- }elseif($color == "bnw"){
-
- $params["imgc"] = "gray";
- }else{
-
- $tbs["ic"] = "specific";
- $tbs["isc"] = $color;
- }
- }
-
- // type
- if($type != "any"){
-
- $tbs["itp"] = $type;
- }
-
- // format
- if($format != "any"){
-
- $params["as_filetype"] = $format;
- }
-
- // rights (tbs)
- if($rights != "any"){
-
- $tbs["sur"] = $rights;
- }
-
- // append tbs
- if(count($tbs) !== 0){
-
- $params["tbs"] = "";
-
- foreach($tbs as $key => $value){
-
- $params["tbs"] .= $key . ":" . $value . ",";
- }
-
- $params["tbs"] = rtrim($params["tbs"], ",");
- }
- }
- /*
- $handle = fopen("scraper/google-img.html", "r");
- $html = fread($handle, filesize("scraper/google-img.html"));
- fclose($handle);*/
-
- try{
- $html =
- $this->get(
- $proxy,
- "https://www.google.com/search",
- $params
- );
- }catch(Exception $error){
-
- throw new Exception("Failed to get search page");
- }
-
- $this->fuckhtml->load($html);
-
- $this->detect_sorry();
-
- // get javascript images
- $this->scrape_imagearr($html);
-
- $out = [
- "status" => "ok",
- "npt" => null,
- "image" => []
- ];
-
- $images =
- $this->fuckhtml
- ->getElementsByClassName(
- "ivg-i",
- "div"
- );
-
- foreach($images as $div){
-
- $this->fuckhtml->load($div);
-
- $image =
- $this->fuckhtml
- ->getElementsByTagName("img")[0];
-
- $out["image"][] = [
- "title" =>
- $this->titledots(
- $this->fuckhtml
- ->getTextContent(
- $image["attributes"]["alt"]
- )
- ),
- "source" =>
- $this->image_arr[
- $div["attributes"]["data-docid"]
- ],
- "url" =>
- $this->fuckhtml
- ->getTextContent(
- $div["attributes"]["data-lpage"]
- )
- ];
- }
-
- // as usual, no way to check if there is a next page reliably
- if(count($out["image"]) > 50){
-
- if(!isset($params["start"])){
-
- $params["start"] = 10;
- }else{
-
- $params["start"] += 10;
- }
-
- $out["npt"] =
- $this->backend
- ->store(
- json_encode($params),
- "image",
- $proxy
- );
- }
-
- return $out;
- }
-
- private function unshiturl($url, $return_size = false){
-
- // decode
- $url =
- $this->fuckhtml
- ->getTextContent($url);
-
- $url_parts = parse_url($url);
-
- if(
- !isset(
- $url_parts["host"]
- )
- ){
-
- // no host, we have a tracking url
- parse_str($url_parts["query"], $query);
-
- if(isset($query["imgurl"])){
-
- $url = $query["imgurl"];
- }
- elseif(isset($query["q"])){
-
- $url = $query["q"];
- }
- }
-
- // rewrite URLs to remove extra tracking parameters
- $domain = parse_url($url, PHP_URL_HOST);
-
- if(
- preg_match(
- '/wikipedia.org$/',
- $domain
- )
- ){
-
- // rewrite wikipedia mobile URLs to desktop
- $url =
- $this->replacedomain(
- $url,
- preg_replace(
- '/([a-z0-9]+)(\.m\.)/',
- '$1.',
- $domain
- )
- );
- }
-
- elseif(
- preg_match(
- '/imdb\.com$|youtube\.[^.]+$/',
- $domain
- )
- ){
-
- // rewrite imdb and youtube mobile URLs too
- $url =
- $this->replacedomain(
- $url,
- preg_replace(
- '/^m\./',
- "",
- $domain
- )
- );
-
- }
-
- elseif(
- preg_match(
- '/play\.google\.[^.]+$/',
- $domain
- )
- ){
-
- // remove referrers from play.google.com
- $oldquery = parse_url($url, PHP_URL_QUERY);
- if($oldquery !== null){
-
- parse_str($oldquery, $query);
- if(isset($query["referrer"])){ unset($query["referrer"]); }
- if(isset($query["hl"])){ unset($query["hl"]); }
- if(isset($query["gl"])){ unset($query["gl"]); }
-
- $query = http_build_query($query);
-
- $url =
- str_replace(
- $oldquery,
- $query,
- $url
- );
- }
- }
-
- elseif(
- preg_match(
- '/twitter\.com$/',
- $domain
- )
- ){
- // remove more referrers from twitter.com
- $oldquery = parse_url($url, PHP_URL_QUERY);
- if($oldquery !== null){
-
- parse_str($oldquery, $query);
- if(isset($query["ref_src"])){ unset($query["ref_src"]); }
-
- $query = http_build_query($query);
-
- $url =
- str_replace(
- $oldquery,
- $query,
- $url
- );
- }
- }
-
- elseif(
- preg_match(
- '/maps\.google\.[^.]+/',
- $domain
- )
- ){
-
- if(stripos($url, "maps?") !== false){
-
- //https://maps.google.com/maps?daddr=Johnny,+603+Rue+St+Georges,+Saint-J%C3%A9r%C3%B4me,+Quebec+J7Z+5B7
- $query = parse_url($url, PHP_URL_QUERY);
- if($query !== null){
-
- parse_str($query, $query);
-
- if(isset($query["daddr"])){
-
- $url =
- "https://maps.google.com/maps?daddr=" .
- urlencode($query["daddr"]);
- }
- }
- }
- }
-
- if($return_size){
-
- return [
- "url" => $url,
- "ref" => isset($query["imgrefurl"]) ? $query["imgrefurl"] : null,
- "thumb_width" => isset($query["tbnw"]) ? (int)$query["tbnw"] : null,
- "thumb_height" => isset($query["tbnh"]) ? (int)$query["tbnh"] : null,
- "image_width" => isset($query["w"]) ? (int)$query["w"] : null,
- "image_height" => isset($query["h"]) ? (int)$query["h"] : null
- ];
- }
-
- return $url;
- }
-
- private function replacedomain($url, $domain){
-
- return
- preg_replace(
- '/(https?:\/\/)([^\/]+)/',
- '$1' . $domain,
- $url
- );
- }
-
- private function titledots($title){
-
- return trim($title, " .\t\n\r\0\x0B…");
- }
-
- private function hms2int($time){
-
- $parts = explode(":", $time, 3);
- $time = 0;
-
- if(count($parts) === 3){
-
- // hours
- $time = $time + ((int)$parts[0] * 3600);
- array_shift($parts);
- }
-
- if(count($parts) === 2){
-
- // minutes
- $time = $time + ((int)$parts[0] * 60);
- array_shift($parts);
- }
-
- // seconds
- $time = $time + (int)$parts[0];
-
- return $time;
- }
-
- private function detect_sorry(){
-
- $recaptcha =
- $this->fuckhtml
- ->getElementById(
- "recaptcha",
- "div"
- );
-
- if($recaptcha !== false){
-
- throw new Exception("Google returned a captcha");
- }
- }
- }
|