backend.php 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177
  1. <?php
  2. class backend{
  3. public function __construct($scraper){
  4. $this->scraper = $scraper;
  5. }
  6. /*
  7. Proxy stuff
  8. */
  9. public function get_ip(){
  10. $pool = constant("config::PROXY_" . strtoupper($this->scraper));
  11. if($pool === false){
  12. // we don't want a proxy, fuck off!
  13. return 'raw_ip::::';
  14. }
  15. // indent
  16. $proxy_index_raw = apcu_inc("p." . $this->scraper);
  17. $proxylist = file_get_contents("data/proxies/" . $pool . ".txt");
  18. $proxylist = explode("\n", $proxylist);
  19. // ignore empty or commented lines
  20. $proxylist = array_filter($proxylist, function($entry){
  21. $entry = ltrim($entry);
  22. return strlen($entry) > 0 && substr($entry, 0, 1) != "#";
  23. });
  24. $proxylist = array_values($proxylist);
  25. return $proxylist[$proxy_index_raw % count($proxylist)];
  26. }
  27. // this function is also called directly on nextpage
  28. public function assign_proxy(&$curlproc, string $ip){
  29. // parse proxy line
  30. [
  31. $type,
  32. $address,
  33. $port,
  34. $username,
  35. $password
  36. ] = explode(":", $ip, 5);
  37. switch($type){
  38. case "raw_ip":
  39. return;
  40. break;
  41. case "http":
  42. case "https":
  43. curl_setopt($curlproc, CURLOPT_PROXYTYPE, CURLPROXY_HTTP);
  44. curl_setopt($curlproc, CURLOPT_PROXY, $type . "://" . $address . ":" . $port);
  45. break;
  46. case "socks4":
  47. curl_setopt($curlproc, CURLOPT_PROXYTYPE, CURLPROXY_SOCKS4);
  48. curl_setopt($curlproc, CURLOPT_PROXY, $address . ":" . $port);
  49. break;
  50. case "socks5":
  51. curl_setopt($curlproc, CURLOPT_PROXYTYPE, CURLPROXY_SOCKS5);
  52. curl_setopt($curlproc, CURLOPT_PROXY, $address . ":" . $port);
  53. break;
  54. case "socks4a":
  55. curl_setopt($curlproc, CURLOPT_PROXYTYPE, CURLPROXY_SOCKS4A);
  56. curl_setopt($curlproc, CURLOPT_PROXY, $address . ":" . $port);
  57. break;
  58. case "socks5_hostname":
  59. case "socks5a":
  60. curl_setopt($curlproc, CURLOPT_PROXYTYPE, CURLPROXY_SOCKS5_HOSTNAME);
  61. curl_setopt($curlproc, CURLOPT_PROXY, $address . ":" . $port);
  62. break;
  63. }
  64. if($username != ""){
  65. curl_setopt($curlproc, CURLOPT_PROXYUSERPWD, $username . ":" . $password);
  66. }
  67. }
  68. /*
  69. Next page stuff
  70. */
  71. public function store(string $payload, string $page, string $proxy){
  72. $key = sodium_crypto_secretbox_keygen();
  73. $nonce = random_bytes(SODIUM_CRYPTO_SECRETBOX_NONCEBYTES);
  74. $requestid = apcu_inc("requestid");
  75. apcu_store(
  76. $page[0] . "." . // first letter of page name
  77. $this->scraper . // scraper name
  78. $requestid,
  79. [
  80. $nonce,
  81. $proxy,
  82. // compress and encrypt
  83. sodium_crypto_secretbox(
  84. gzdeflate($payload),
  85. $nonce,
  86. $key
  87. )
  88. ],
  89. 900 // cache information for 15 minutes
  90. );
  91. return
  92. $this->scraper . $requestid . "." .
  93. rtrim(strtr(base64_encode($key), '+/', '-_'), '=');
  94. }
  95. public function get(string $npt, string $page){
  96. $page = $page[0];
  97. $explode = explode(".", $npt, 2);
  98. if(count($explode) !== 2){
  99. throw new Exception("Malformed nextPageToken!");
  100. }
  101. $apcu = $page . "." . $explode[0];
  102. $key = $explode[1];
  103. $payload = apcu_fetch($apcu);
  104. if($payload === false){
  105. throw new Exception("The next page token is invalid or has expired!");
  106. }
  107. $key =
  108. base64_decode(
  109. str_pad(
  110. strtr($key, '-_', '+/'),
  111. strlen($key) % 4,
  112. '=',
  113. STR_PAD_RIGHT
  114. )
  115. );
  116. // decrypt and decompress data
  117. $payload[2] =
  118. gzinflate(
  119. sodium_crypto_secretbox_open(
  120. $payload[2], // data
  121. $payload[0], // nonce
  122. $key
  123. )
  124. );
  125. if($payload[2] === false){
  126. throw new Exception("The next page token is invalid or has expired!");
  127. }
  128. // remove the key after using successfully
  129. apcu_delete($apcu);
  130. return [
  131. $payload[2], // data
  132. $payload[1] // proxy
  133. ];
  134. }
  135. }