curlproxy.php 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660
  1. <?php
  2. class proxy{
  3. public const req_web = 0;
  4. public const req_image = 1;
  5. public function __construct($cache = true){
  6. $this->cache = $cache;
  7. }
  8. public function do404(){
  9. http_response_code(404);
  10. header("Content-Type: image/png");
  11. $handle = fopen("lib/img404.png", "r");
  12. echo fread($handle, filesize("lib/img404.png"));
  13. fclose($handle);
  14. die();
  15. return;
  16. }
  17. public function getabsoluteurl($path, $relative){
  18. if($this->validateurl($path)){
  19. return $path;
  20. }
  21. if(substr($path, 0, 2) == "//"){
  22. return "https:" . $path;
  23. }
  24. $url = null;
  25. $relative = parse_url($relative);
  26. $url = $relative["scheme"] . "://";
  27. if(
  28. isset($relative["user"]) &&
  29. isset($relative["pass"])
  30. ){
  31. $url .= $relative["user"] . ":" . $relative["pass"] . "@";
  32. }
  33. $url .= $relative["host"];
  34. if(isset($relative["path"])){
  35. $relative["path"] = explode(
  36. "/",
  37. $relative["path"]
  38. );
  39. unset($relative["path"][count($relative["path"]) - 1]);
  40. $relative["path"] = implode("/", $relative["path"]);
  41. $url .= $relative["path"];
  42. }
  43. if(
  44. strlen($path) !== 0 &&
  45. $path[0] !== "/"
  46. ){
  47. $url .= "/";
  48. }
  49. $url .= $path;
  50. return $url;
  51. }
  52. public function validateurl($url){
  53. $url_parts = parse_url($url);
  54. // check if required parts are there
  55. if(
  56. !isset($url_parts["scheme"]) ||
  57. !(
  58. $url_parts["scheme"] == "http" ||
  59. $url_parts["scheme"] == "https"
  60. ) ||
  61. !isset($url_parts["host"])
  62. ){
  63. return false;
  64. }
  65. $ip =
  66. str_replace(
  67. ["[", "]"], // handle ipv6
  68. "",
  69. $url_parts["host"]
  70. );
  71. // if its not an IP
  72. if(!filter_var($ip, FILTER_VALIDATE_IP)){
  73. // resolve domain's IP
  74. $ip = gethostbyname($url_parts["host"] . ".");
  75. }
  76. // check if its localhost
  77. if(
  78. filter_var(
  79. $ip,
  80. FILTER_VALIDATE_IP, FILTER_FLAG_NO_PRIV_RANGE | FILTER_FLAG_NO_RES_RANGE
  81. ) === false
  82. ){
  83. return false;
  84. }
  85. return true;
  86. }
  87. public function get($url, $reqtype = self::req_web, $acceptallcodes = false, $referer = null, $redirectcount = 0){
  88. if($redirectcount === 5){
  89. throw new Exception("Too many redirects");
  90. }
  91. if($url == "https://i.imgur.com/removed.png"){
  92. throw new Exception("Encountered imgur 404");
  93. }
  94. // sanitize URL
  95. if($this->validateurl($url) === false){
  96. throw new Exception("Invalid URL");
  97. }
  98. $this->clientcache();
  99. $curl = curl_init();
  100. curl_setopt($curl, CURLOPT_URL, $url);
  101. curl_setopt($curl, CURLOPT_ENCODING, ""); // default encoding
  102. curl_setopt($curl, CURLOPT_HEADER, 1);
  103. switch($reqtype){
  104. case self::req_web:
  105. curl_setopt(
  106. $curl,
  107. CURLOPT_HTTPHEADER,
  108. [
  109. "User-Agent: " . config::USER_AGENT,
  110. "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
  111. "Accept-Language: en-US,en;q=0.5",
  112. "Accept-Encoding: gzip, deflate",
  113. "DNT: 1",
  114. "Connection: keep-alive",
  115. "Upgrade-Insecure-Requests: 1",
  116. "Sec-Fetch-Dest: document",
  117. "Sec-Fetch-Mode: navigate",
  118. "Sec-Fetch-Site: none",
  119. "Sec-Fetch-User: ?1"
  120. ]
  121. );
  122. break;
  123. case self::req_image:
  124. if($referer === null){
  125. $referer = explode("/", $url, 4);
  126. array_pop($referer);
  127. $referer = implode("/", $referer);
  128. }
  129. curl_setopt(
  130. $curl,
  131. CURLOPT_HTTPHEADER,
  132. [
  133. "User-Agent: " . config::USER_AGENT,
  134. "Accept: image/avif,image/webp,*/*",
  135. "Accept-Language: en-US,en;q=0.5",
  136. "Accept-Encoding: gzip, deflate",
  137. "DNT: 1",
  138. "Connection: keep-alive",
  139. "Referer: {$referer}"
  140. ]
  141. );
  142. break;
  143. }
  144. curl_setopt($curl, CURLOPT_RETURNTRANSFER, true);
  145. curl_setopt($curl, CURLOPT_SSL_VERIFYHOST, 2);
  146. curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, true);
  147. curl_setopt($curl, CURLOPT_CONNECTTIMEOUT, 30);
  148. curl_setopt($curl, CURLOPT_TIMEOUT, 30);
  149. // limit size of payloads
  150. curl_setopt($curl, CURLOPT_BUFFERSIZE, 1024);
  151. curl_setopt($curl, CURLOPT_NOPROGRESS, false);
  152. curl_setopt(
  153. $curl,
  154. CURLOPT_PROGRESSFUNCTION,
  155. function($downloadsize, $downloaded, $uploadsize, $uploaded
  156. ){
  157. // if $downloaded exceeds 100MB, fuck off
  158. return ($downloaded > 100000000) ? 1 : 0;
  159. });
  160. $body = curl_exec($curl);
  161. if(curl_errno($curl)){
  162. throw new Exception(curl_error($curl));
  163. }
  164. curl_close($curl);
  165. $headers = [];
  166. $http = null;
  167. while(true){
  168. $header = explode("\n", $body, 2);
  169. $body = $header[1];
  170. if($http === null){
  171. // http/1.1 200 ok
  172. $header = explode("/", $header[0], 2);
  173. $header = explode(" ", $header[1], 3);
  174. $http = [
  175. "version" => (float)$header[0],
  176. "code" => (int)$header[1]
  177. ];
  178. continue;
  179. }
  180. if(trim($header[0]) == ""){
  181. // reached end of headers
  182. break;
  183. }
  184. $header = explode(":", $header[0], 2);
  185. // malformed headers
  186. if(count($header) !== 2){ continue; }
  187. $headers[strtolower(trim($header[0]))] = trim($header[1]);
  188. }
  189. // check http code
  190. if(
  191. $http["code"] >= 300 &&
  192. $http["code"] <= 309
  193. ){
  194. // redirect
  195. if(!isset($headers["location"])){
  196. throw new Exception("Broken redirect");
  197. }
  198. $redirectcount++;
  199. return $this->get($this->getabsoluteurl($headers["location"], $url), $reqtype, $acceptallcodes, $referer, $redirectcount);
  200. }else{
  201. if(
  202. $acceptallcodes === false &&
  203. $http["code"] > 300
  204. ){
  205. throw new Exception("Remote server returned an error code! ({$http["code"]})");
  206. }
  207. }
  208. // check if data is okay
  209. switch($reqtype){
  210. case self::req_image:
  211. $format = false;
  212. if(isset($headers["content-type"])){
  213. if(stripos($headers["content-type"], "text/html") !== false){
  214. throw new Exception("Server returned html");
  215. }
  216. if(
  217. preg_match(
  218. '/image\/([^ ]+)/i',
  219. $headers["content-type"],
  220. $match
  221. )
  222. ){
  223. $format = strtolower($match[1]);
  224. if(substr(strtolower($format), 0, 2) == "x-"){
  225. $format = substr($format, 2);
  226. }
  227. }
  228. }
  229. return [
  230. "http" => $http,
  231. "format" => $format,
  232. "headers" => $headers,
  233. "body" => $body
  234. ];
  235. break;
  236. default:
  237. return [
  238. "http" => $http,
  239. "headers" => $headers,
  240. "body" => $body
  241. ];
  242. break;
  243. }
  244. return;
  245. }
  246. public function stream_linear_image($url, $referer = null){
  247. $this->stream($url, $referer, "image");
  248. }
  249. public function stream_linear_audio($url, $referer = null){
  250. $this->stream($url, $referer, "audio");
  251. }
  252. private function stream($url, $referer, $format){
  253. $this->clientcache();
  254. $this->url = $url;
  255. $this->format = $format;
  256. // sanitize URL
  257. if($this->validateurl($url) === false){
  258. throw new Exception("Invalid URL");
  259. }
  260. $curl = curl_init();
  261. // set headers
  262. if($referer === null){
  263. $referer = explode("/", $url, 4);
  264. array_pop($referer);
  265. $referer = implode("/", $referer);
  266. }
  267. switch($format){
  268. case "image":
  269. curl_setopt(
  270. $curl,
  271. CURLOPT_HTTPHEADER,
  272. [
  273. "User-Agent: " . config::USER_AGENT,
  274. "Accept: image/avif,image/webp,*/*",
  275. "Accept-Language: en-US,en;q=0.5",
  276. "Accept-Encoding: gzip, deflate, br",
  277. "DNT: 1",
  278. "Connection: keep-alive",
  279. "Referer: {$referer}"
  280. ]
  281. );
  282. break;
  283. case "audio":
  284. curl_setopt(
  285. $curl,
  286. CURLOPT_HTTPHEADER,
  287. [
  288. "User-Agent: " . config::USER_AGENT,
  289. "Accept: audio/webm,audio/ogg,audio/wav,audio/*;q=0.9,application/ogg;q=0.7,video/*;q=0.6,*/*;q=0.5",
  290. "Accept-Language: en-US,en;q=0.5",
  291. "Accept-Encoding: gzip, deflate, br",
  292. "DNT: 1",
  293. "Connection: keep-alive",
  294. "Referer: {$referer}"
  295. ]
  296. );
  297. break;
  298. }
  299. // follow redirects
  300. curl_setopt($curl, CURLOPT_FOLLOWLOCATION, true);
  301. curl_setopt($curl, CURLOPT_MAXREDIRS, 5);
  302. curl_setopt($curl, CURLOPT_AUTOREFERER, 5);
  303. // set url
  304. curl_setopt($curl, CURLOPT_URL, $url);
  305. curl_setopt($curl, CURLOPT_ENCODING, ""); // default encoding
  306. // timeout + disable ssl
  307. curl_setopt($curl, CURLOPT_SSL_VERIFYHOST, 2);
  308. curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, true);
  309. curl_setopt($curl, CURLOPT_CONNECTTIMEOUT, 10);
  310. curl_setopt($curl, CURLOPT_TIMEOUT, 30);
  311. curl_setopt(
  312. $curl,
  313. CURLOPT_WRITEFUNCTION,
  314. function($c, $data){
  315. if(curl_getinfo($c, CURLINFO_HTTP_CODE) !== 200){
  316. throw new Exception("Serber returned a non-200 code");
  317. }
  318. echo $data;
  319. return strlen($data);
  320. }
  321. );
  322. $this->empty_header = false;
  323. $this->cont = false;
  324. $this->headers_tmp = [];
  325. $this->headers = [];
  326. curl_setopt(
  327. $curl,
  328. CURLOPT_HEADERFUNCTION,
  329. function($c, $header){
  330. $head = trim($header);
  331. $len = strlen($head);
  332. if($len === 0){
  333. $this->empty_header = true;
  334. $this->headers_tmp = [];
  335. }else{
  336. $this->empty_header = false;
  337. $this->headers_tmp[] = $head;
  338. }
  339. foreach($this->headers_tmp as $h){
  340. // parse headers
  341. $h = explode(":", $h, 2);
  342. if(count($h) !== 2){
  343. if(curl_getinfo($c, CURLINFO_HTTP_CODE) !== 200){
  344. // not HTTP 200, probably a redirect
  345. $this->cont = false;
  346. }else{
  347. $this->cont = true;
  348. }
  349. // is HTTP 200, just ignore that line
  350. continue;
  351. }
  352. $this->headers[strtolower(trim($h[0]))] = trim($h[1]);
  353. }
  354. if(
  355. $this->cont &&
  356. $this->empty_header
  357. ){
  358. // get content type
  359. if(isset($this->headers["content-type"])){
  360. $octet_check = stripos($this->headers["content-type"], "octet-stream");
  361. if(
  362. stripos($this->headers["content-type"], $this->format) === false &&
  363. $octet_check === false
  364. ){
  365. throw new Exception("Resource reported invalid Content-Type");
  366. }
  367. }else{
  368. throw new Exception("Resource is not an {$this->format} (no Content-Type)");
  369. }
  370. $filetype = explode("/", $this->headers["content-type"]);
  371. if(!isset($filetype[1])){
  372. throw new Exception("Malformed Content-Type header");
  373. }
  374. if($octet_check !== false){
  375. $filetype[1] = "jpeg";
  376. }
  377. header("Content-Type: {$this->format}/{$filetype[1]}");
  378. // give payload size
  379. if(isset($this->headers["content-length"])){
  380. header("Content-Length: {$this->headers["content-length"]}");
  381. }
  382. // give filename
  383. $this->getfilenameheader($this->headers, $this->url, $filetype[1]);
  384. }
  385. return strlen($header);
  386. }
  387. );
  388. curl_exec($curl);
  389. if(curl_errno($curl)){
  390. throw new Exception(curl_error($curl));
  391. }
  392. curl_close($curl);
  393. }
  394. public function getfilenameheader($headers, $url, $filetype = "jpg"){
  395. // get filename from content-disposition header
  396. if(isset($headers["content-disposition"])){
  397. preg_match(
  398. '/filename=([^;]+)/',
  399. $headers["content-disposition"],
  400. $filename
  401. );
  402. if(isset($filename[1])){
  403. header("Content-Disposition: filename=\"" . trim($filename[1], "\"'") . "." . $filetype . "\"");
  404. return;
  405. }
  406. }
  407. // get filename from URL
  408. $filename = parse_url($url, PHP_URL_PATH);
  409. if($filename === null){
  410. // everything failed! rename file to domain name
  411. header("Content-Disposition: filename=\"" . parse_url($url, PHP_URL_HOST) . "." . $filetype . "\"");
  412. return;
  413. }
  414. // remove extension from filename
  415. $filename =
  416. explode(
  417. ".",
  418. basename($filename)
  419. );
  420. if(count($filename) > 1){
  421. array_pop($filename);
  422. }
  423. $filename = implode(".", $filename);
  424. header("Content-Disposition: inline; filename=\"" . $filename . "." . $filetype . "\"");
  425. return;
  426. }
  427. public function getimageformat($payload, &$imagick){
  428. $finfo = new finfo(FILEINFO_MIME_TYPE);
  429. $format = $finfo->buffer($payload["body"]);
  430. if($format === false){
  431. if($payload["format"] === false){
  432. header("X-Error: Could not parse format");
  433. $this->favicon404();
  434. }
  435. $format = $payload["format"];
  436. }else{
  437. $format_tmp = explode("/", $format, 2);
  438. if($format_tmp[0] == "image"){
  439. $format_tmp = strtolower($format_tmp[1]);
  440. if(substr($format_tmp, 0, 2) == "x-"){
  441. $format_tmp = substr($format_tmp, 2);
  442. }
  443. $format = $format_tmp;
  444. }
  445. }
  446. switch($format){
  447. case "tiff": $format = "gif"; break;
  448. case "vnd.microsoft.icon": $format = "ico"; break;
  449. case "icon": $format = "ico"; break;
  450. case "svg+xml": $format = "svg"; break;
  451. }
  452. $imagick = new Imagick();
  453. if(
  454. !in_array(
  455. $format,
  456. array_map("strtolower", $imagick->queryFormats())
  457. )
  458. ){
  459. // format could not be found, but imagemagick can
  460. // sometimes detect it? shit's fucked
  461. $format = false;
  462. }
  463. return $format;
  464. }
  465. public function clientcache(){
  466. if($this->cache === false){
  467. return;
  468. }
  469. header("Last-Modified: Thu, 01 Oct 1970 00:00:00 GMT");
  470. $headers = getallheaders();
  471. if(
  472. isset($headers["If-Modified-Since"]) ||
  473. isset($headers["If-Unmodified-Since"])
  474. ){
  475. http_response_code(304); // 304: Not Modified
  476. die();
  477. }
  478. }
  479. }