backend.php 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178
  1. <?php
  2. class backend{
  3. public function __construct($scraper){
  4. $this->scraper = $scraper;
  5. }
  6. /*
  7. Proxy stuff
  8. */
  9. public function get_ip(){
  10. $pool = constant("config::PROXY_" . strtoupper($this->scraper));
  11. if($pool === false){
  12. // we don't want a proxy, fuck off!
  13. return 'raw_ip::::';
  14. }
  15. // indent
  16. $proxy_index_raw = apcu_inc("p." . $this->scraper);
  17. $proxylist = file_get_contents("data/proxies/" . $pool . ".txt");
  18. $proxylist = explode("\n", $proxylist);
  19. // ignore empty or commented lines
  20. $proxylist = array_filter($proxylist, function($entry){
  21. $entry = ltrim($entry);
  22. return strlen($entry) > 0 && substr($entry, 0, 1) != "#";
  23. });
  24. $proxylist = array_values($proxylist);
  25. return $proxylist[$proxy_index_raw % count($proxylist)];
  26. }
  27. // this function is also called directly on nextpage
  28. public function assign_proxy(&$curlproc, string $ip){
  29. // parse proxy line
  30. [
  31. $type,
  32. $address,
  33. $port,
  34. $username,
  35. $password
  36. ] = explode(":", $ip, 5);
  37. switch($type){
  38. case "raw_ip":
  39. return;
  40. break;
  41. case "http":
  42. case "https":
  43. curl_setopt($curlproc, CURLOPT_PROXYTYPE, CURLPROXY_HTTP);
  44. curl_setopt($curlproc, CURLOPT_PROXY, $type . "://" . $address . ":" . $port);
  45. break;
  46. case "socks4":
  47. curl_setopt($curlproc, CURLOPT_PROXYTYPE, CURLPROXY_SOCKS4);
  48. curl_setopt($curlproc, CURLOPT_PROXY, $address . ":" . $port);
  49. break;
  50. case "socks5":
  51. curl_setopt($curlproc, CURLOPT_PROXYTYPE, CURLPROXY_SOCKS5);
  52. curl_setopt($curlproc, CURLOPT_PROXY, $address . ":" . $port);
  53. break;
  54. case "socks4a":
  55. curl_setopt($curlproc, CURLOPT_PROXYTYPE, CURLPROXY_SOCKS4A);
  56. curl_setopt($curlproc, CURLOPT_PROXY, $address . ":" . $port);
  57. break;
  58. case "socks5_hostname":
  59. case "socks5h":
  60. case "socks5a":
  61. curl_setopt($curlproc, CURLOPT_PROXYTYPE, CURLPROXY_SOCKS5_HOSTNAME);
  62. curl_setopt($curlproc, CURLOPT_PROXY, $address . ":" . $port);
  63. break;
  64. }
  65. if($username != ""){
  66. curl_setopt($curlproc, CURLOPT_PROXYUSERPWD, $username . ":" . $password);
  67. }
  68. }
  69. /*
  70. Next page stuff
  71. */
  72. public function store(string $payload, string $page, string $proxy){
  73. $key = sodium_crypto_secretbox_keygen();
  74. $nonce = random_bytes(SODIUM_CRYPTO_SECRETBOX_NONCEBYTES);
  75. $requestid = apcu_inc("requestid");
  76. apcu_store(
  77. $page[0] . "." . // first letter of page name
  78. $this->scraper . // scraper name
  79. $requestid,
  80. [
  81. $nonce,
  82. $proxy,
  83. // compress and encrypt
  84. sodium_crypto_secretbox(
  85. gzdeflate($payload),
  86. $nonce,
  87. $key
  88. )
  89. ],
  90. 900 // cache information for 15 minutes
  91. );
  92. return
  93. $this->scraper . $requestid . "." .
  94. rtrim(strtr(base64_encode($key), '+/', '-_'), '=');
  95. }
  96. public function get(string $npt, string $page){
  97. $page = $page[0];
  98. $explode = explode(".", $npt, 2);
  99. if(count($explode) !== 2){
  100. throw new Exception("Malformed nextPageToken!");
  101. }
  102. $apcu = $page . "." . $explode[0];
  103. $key = $explode[1];
  104. $payload = apcu_fetch($apcu);
  105. if($payload === false){
  106. throw new Exception("The next page token is invalid or has expired!");
  107. }
  108. $key =
  109. base64_decode(
  110. str_pad(
  111. strtr($key, '-_', '+/'),
  112. strlen($key) % 4,
  113. '=',
  114. STR_PAD_RIGHT
  115. )
  116. );
  117. // decrypt and decompress data
  118. $payload[2] =
  119. gzinflate(
  120. sodium_crypto_secretbox_open(
  121. $payload[2], // data
  122. $payload[0], // nonce
  123. $key
  124. )
  125. );
  126. if($payload[2] === false){
  127. throw new Exception("The next page token is invalid or has expired!");
  128. }
  129. // remove the key after using successfully
  130. apcu_delete($apcu);
  131. return [
  132. $payload[2], // data
  133. $payload[1] // proxy
  134. ];
  135. }
  136. }