sc.php 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512
  1. <?php
  2. class sc{
  3. public function __construct(){
  4. include "lib/backend.php";
  5. $this->backend = new backend("sc");
  6. include "lib/fuckhtml.php";
  7. $this->fuckhtml = new fuckhtml();
  8. }
  9. public function getfilters($page){
  10. return [
  11. "type" => [
  12. "display" => "Type",
  13. "option" => [
  14. "any" => "Any type",
  15. "track" => "Tracks",
  16. "author" => "People",
  17. "album" => "Albums",
  18. "playlist" => "Playlists",
  19. "goplus" => "Go+ Tracks"
  20. ]
  21. ]
  22. ];
  23. }
  24. private function get($proxy, $url, $get = [], $web_req = false){
  25. $curlproc = curl_init();
  26. if($get !== []){
  27. $get = http_build_query($get);
  28. $url .= "?" . $get;
  29. }
  30. curl_setopt($curlproc, CURLOPT_URL, $url);
  31. curl_setopt($curlproc, CURLOPT_ENCODING, ""); // default encoding
  32. // use http2
  33. curl_setopt($curlproc, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_2_0);
  34. if($web_req === false){
  35. curl_setopt($curlproc, CURLOPT_HTTPHEADER,
  36. ["User-Agent: " . config::USER_AGENT,
  37. "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
  38. "Accept-Language: en-US,en;q=0.5",
  39. "Accept-Encoding: gzip",
  40. "Referer: https://soundcloud.com/",
  41. "Origin: https://soundcloud.com",
  42. "DNT: 1",
  43. "Connection: keep-alive",
  44. "Sec-Fetch-Dest: empty",
  45. "Sec-Fetch-Mode: cors",
  46. "Sec-Fetch-Site: same-site",
  47. "Priority: u=1"]
  48. );
  49. }else{
  50. curl_setopt($curlproc, CURLOPT_HTTPHEADER,
  51. ["User-Agent: " . config::USER_AGENT,
  52. "Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
  53. "Accept-Language: en-US,en;q=0.5",
  54. "Accept-Encoding: gzip",
  55. "DNT: 1",
  56. "Connection: keep-alive",
  57. "Upgrade-Insecure-Requests: 1",
  58. "Sec-Fetch-Dest: document",
  59. "Sec-Fetch-Mode: navigate",
  60. "Sec-Fetch-Site: cross-site",
  61. "Priority: u=1",
  62. "TE: trailers"]
  63. );
  64. }
  65. curl_setopt($curlproc, CURLOPT_RETURNTRANSFER, true);
  66. curl_setopt($curlproc, CURLOPT_SSL_VERIFYHOST, 2);
  67. curl_setopt($curlproc, CURLOPT_SSL_VERIFYPEER, true);
  68. curl_setopt($curlproc, CURLOPT_CONNECTTIMEOUT, 30);
  69. curl_setopt($curlproc, CURLOPT_TIMEOUT, 30);
  70. $this->backend->assign_proxy($curlproc, $proxy);
  71. $data = curl_exec($curlproc);
  72. if(curl_errno($curlproc)){
  73. throw new Exception(curl_error($curlproc));
  74. }
  75. curl_close($curlproc);
  76. return $data;
  77. }
  78. public function music($get, $last_attempt = false){
  79. if($get["npt"]){
  80. [$params, $proxy] = $this->backend->get($get["npt"], "music");
  81. $params = json_decode($params, true);
  82. $url = $params["url"];
  83. unset($params["url"]);
  84. }else{
  85. // normal search:
  86. // https://api-v2.soundcloud.com/search?q=freddie%20dredd&variant_ids=&facet=model&user_id=351062-302234-707916-795081&client_id=iMxZgT5mfGstBj8GWJbYMvpzelS8ne0E&limit=20&offset=0&linked_partitioning=1&app_version=1693487844&app_locale=en
  87. // soundcloud go+ search:
  88. // https://api-v2.soundcloud.com/search/tracks?q=freddie%20dredd&variant_ids=&filter.content_tier=SUB_HIGH_TIER&facet=genre&user_id=630591-269800-703400-765403&client_id=iMxZgT5mfGstBj8GWJbYMvpzelS8ne0E&limit=20&offset=0&linked_partitioning=1&app_version=1693487844&app_locale=en
  89. // tracks search:
  90. // https://api-v2.soundcloud.com/search/tracks?q=freddie%20dredd&variant_ids=&facet=genre&user_id=630591-269800-703400-765403&client_id=iMxZgT5mfGstBj8GWJbYMvpzelS8ne0E&limit=20&offset=0&linked_partitioning=1&app_version=1693487844&app_locale=en
  91. // users search:
  92. // https://api-v2.soundcloud.com/search/users?q=freddie%20dredd&variant_ids=&facet=place&user_id=630591-269800-703400-765403&client_id=iMxZgT5mfGstBj8GWJbYMvpzelS8ne0E&limit=20&offset=0&linked_partitioning=1&app_version=1693487844&app_locale=en
  93. // albums search:
  94. // https://api-v2.soundcloud.com/search/albums?q=freddie%20dredd&variant_ids=&facet=genre&user_id=630591-269800-703400-765403&client_id=iMxZgT5mfGstBj8GWJbYMvpzelS8ne0E&limit=20&offset=0&linked_partitioning=1&app_version=1693487844&app_locale=en
  95. // playlists search:
  96. // https://api-v2.soundcloud.com/search/playlists_without_albums?q=freddie%20dredd&variant_ids=&facet=genre&user_id=630591-269800-703400-765403&client_id=iMxZgT5mfGstBj8GWJbYMvpzelS8ne0E&limit=20&offset=0&linked_partitioning=1&app_version=1693487844&app_locale=en
  97. $search = $get["s"];
  98. if(strlen($search) === 0){
  99. throw new Exception("Search term is empty!");
  100. }
  101. $type = $get["type"];
  102. $proxy = $this->backend->get_ip();
  103. $token = $this->get_token($proxy);
  104. switch($type){
  105. case "any":
  106. $url = "https://api-v2.soundcloud.com/search";
  107. $params = [
  108. "q" => $search,
  109. "variant_ids" => "",
  110. "facet" => "model",
  111. "client_id" => $token,
  112. "limit" => 20,
  113. "offset" => 0,
  114. "linked_partitioning" => 1,
  115. "app_version" => 1713542117,
  116. "app_locale" => "en"
  117. ];
  118. break;
  119. case "track":
  120. $url = "https://api-v2.soundcloud.com/search/tracks";
  121. $params = [
  122. "q" => $search,
  123. "variant_ids" => "",
  124. "facet_genre" => "",
  125. "client_id" => $token,
  126. "limit" => 20,
  127. "offset" => 0,
  128. "linked_partitioning" => 1,
  129. "app_version" => 1713542117,
  130. "app_locale" => "en"
  131. ];
  132. break;
  133. case "author":
  134. $url = "https://api-v2.soundcloud.com/search/users";
  135. $params = [
  136. "q" => $search,
  137. "variant_ids" => "",
  138. "facet" => "place",
  139. "client_id" => $token,
  140. "limit" => 20,
  141. "offset" => 0,
  142. "linked_partitioning" => 1,
  143. "app_version" => 1713542117,
  144. "app_locale" => "en"
  145. ];
  146. break;
  147. case "album":
  148. $url = "https://api-v2.soundcloud.com/search/albums";
  149. $params = [
  150. "q" => $search,
  151. "variant_ids" => "",
  152. "facet" => "genre",
  153. "client_id" => $token,
  154. "limit" => 20,
  155. "offset" => 0,
  156. "linked_partitioning" => 1,
  157. "app_version" => 1713542117,
  158. "app_locale" => "en"
  159. ];
  160. break;
  161. case "playlist":
  162. $url = "https://api-v2.soundcloud.com/search/playlists_without_albums";
  163. $params = [
  164. "q" => $search,
  165. "variant_ids" => "",
  166. "facet" => "genre",
  167. "client_id" => $token,
  168. "limit" => 20,
  169. "offset" => 0,
  170. "linked_partitioning" => 1,
  171. "app_version" => 1713542117,
  172. "app_locale" => "en"
  173. ];
  174. break;
  175. case "goplus":
  176. $url = "https://api-v2.soundcloud.com/search/tracks";
  177. $params = [
  178. "q" => $search,
  179. "variant_ids" => "",
  180. "filter.content_tier" => "SUB_HIGH_TIER",
  181. "facet" => "genre",
  182. "client_id" => $token,
  183. "limit" => 20,
  184. "offset" => 0,
  185. "linked_partitioning" => 1,
  186. "app_version" => 1713542117,
  187. "app_locale" => "en"
  188. ];
  189. break;
  190. }
  191. }
  192. try{
  193. $json = $this->get($proxy, $url, $params);
  194. }catch(Exception $error){
  195. throw new Exception("Failed to fetch JSON");
  196. }
  197. /*
  198. $handle = fopen("scraper/soundcloud.json", "r");
  199. $json = fread($handle, filesize("scraper/soundcloud.json"));
  200. fclose($handle);
  201. */
  202. $json = json_decode($json, true);
  203. if($json === null){
  204. if($last_attempt === true){
  205. throw new Exception("Fetched an invalid token (please report!!)");
  206. }
  207. // token might've expired, get a new one and re-try search
  208. $this->get_token($proxy);
  209. return $this->music($get, true);
  210. }
  211. $out = [
  212. "status" => "ok",
  213. "npt" => null,
  214. "song" => [],
  215. "playlist" => [],
  216. "album" => [],
  217. "podcast" => [],
  218. "author" => [],
  219. "user" => []
  220. ];
  221. /*
  222. Get next page
  223. */
  224. if(isset($json["next_href"])){
  225. $params["query_urn"] = $json["query_urn"];
  226. $params["offset"] = $params["offset"] + 20;
  227. $params["url"] = $url; // we will remove this later
  228. $out["npt"] =
  229. $this->backend->store(
  230. json_encode($params),
  231. "music",
  232. $proxy
  233. );
  234. }
  235. /*
  236. Scrape items
  237. */
  238. foreach($json["collection"] as $item){
  239. switch($item["kind"]){
  240. case "user":
  241. // parse author
  242. $out["author"][] = [
  243. "title" => $item["username"],
  244. "followers" => $item["followers_count"],
  245. "description" => trim($item["track_count"] . " songs. " . $this->limitstrlen($item["description"])),
  246. "thumb" => [
  247. "url" => $item["avatar_url"],
  248. "ratio" => "1:1"
  249. ],
  250. "url" => $item["permalink_url"]
  251. ];
  252. break;
  253. case "playlist":
  254. // parse playlist
  255. $description = [];
  256. $count = 0;
  257. foreach($item["tracks"] as $song){
  258. $count++;
  259. if(!isset($song["title"])){
  260. continue;
  261. }
  262. $description[] = $song["title"];
  263. }
  264. if(count($description) !== 0){
  265. $description = trim($count . " songs. " . implode(", ", $description));
  266. }else{
  267. $description = "";
  268. }
  269. if(
  270. isset($item["artwork_url"]) &&
  271. !empty($item["artwork_url"])
  272. ){
  273. $thumb = [
  274. "ratio" => "1:1",
  275. "url" => $item["artwork_url"]
  276. ];
  277. }elseif(
  278. isset($item["tracks"][0]["artwork_url"]) &&
  279. !empty($item["tracks"][0]["artwork_url"])
  280. ){
  281. $thumb = [
  282. "ratio" => "1:1",
  283. "url" => $item["tracks"][0]["artwork_url"]
  284. ];
  285. }else{
  286. $thumb = [
  287. "ratio" => null,
  288. "url" => null
  289. ];
  290. }
  291. $out["playlist"][] = [
  292. "title" => $item["title"],
  293. "description" => $this->limitstrlen($description),
  294. "author" => [
  295. "name" => $item["user"]["username"],
  296. "url" => $item["user"]["permalink_url"],
  297. "avatar" => $item["user"]["avatar_url"]
  298. ],
  299. "thumb" => $thumb,
  300. "date" => strtotime($item["created_at"]),
  301. "duration" => $item["duration"] / 1000,
  302. "url" => $item["permalink_url"]
  303. ];
  304. break;
  305. case "track":
  306. if(stripos($item["monetization_model"], "TIER") === false){
  307. $stream = [
  308. "endpoint" => "sc",
  309. "url" =>
  310. $item["media"]["transcodings"][0]["url"] .
  311. "?client_id=" . $token .
  312. "&track_authorization=" .
  313. $item["track_authorization"]
  314. ];
  315. }else{
  316. $stream = [
  317. "endpoint" => null,
  318. "url" => null
  319. ];
  320. }
  321. // parse track
  322. $out["song"][] = [
  323. "title" => $item["title"],
  324. "description" => $item["description"] == "" ? null : $this->limitstrlen($item["description"]),
  325. "url" => $item["permalink_url"],
  326. "views" => $item["playback_count"],
  327. "author" => [
  328. "name" => $item["user"]["username"],
  329. "url" => $item["user"]["permalink_url"],
  330. "avatar" => $item["user"]["avatar_url"]
  331. ],
  332. "thumb" => [
  333. "ratio" => "1:1",
  334. "url" => $item["artwork_url"]
  335. ],
  336. "date" => strtotime($item["created_at"]),
  337. "duration" => (int)$item["full_duration"] / 1000,
  338. "stream" => $stream
  339. ];
  340. break;
  341. }
  342. }
  343. return $out;
  344. }
  345. public function get_token($proxy){
  346. $token = apcu_fetch("sc_token");
  347. if($token !== false){
  348. return $token;
  349. }
  350. // search through all javascript components on the main page
  351. try{
  352. $html =
  353. $this->get(
  354. $proxy,
  355. "https://soundcloud.com",
  356. [],
  357. true
  358. );
  359. }catch(Exception $error){
  360. throw new Exception("Failed to fetch front page");
  361. }
  362. $this->fuckhtml->load($html);
  363. $scripts =
  364. $this->fuckhtml
  365. ->getElementsByTagName(
  366. "script"
  367. );
  368. foreach($scripts as $script){
  369. if(
  370. !isset($script["attributes"]["src"]) ||
  371. strpos($script["attributes"]["src"], "sndcdn.com") === false
  372. ){
  373. continue;
  374. }
  375. try{
  376. $js =
  377. $this->get(
  378. $proxy,
  379. $script["attributes"]["src"],
  380. []
  381. );
  382. }catch(Exception $error){
  383. throw new Exception("Failed to fetch search token");
  384. }
  385. preg_match(
  386. '/client_id=([^"]+)/',
  387. $js,
  388. $token
  389. );
  390. if(isset($token[1])){
  391. apcu_store("sc_token", $token[1]);
  392. return $token[1];
  393. break;
  394. }
  395. }
  396. throw new Exception("Did not find a Soundcloud token in the Javascript blobs");
  397. }
  398. private function limitstrlen($text){
  399. return
  400. explode(
  401. "\n",
  402. wordwrap(
  403. str_replace(
  404. ["\n\r", "\r\n", "\n", "\r"],
  405. " ",
  406. $text
  407. ),
  408. 300,
  409. "\n"
  410. ),
  411. 2
  412. )[0];
  413. }
  414. }