|
@@ -5,19 +5,17 @@ import requests
|
|
|
import html
|
|
|
import re
|
|
|
from bs4 import BeautifulSoup
|
|
|
-
|
|
|
+from urllib.parse import quote, unquote
|
|
|
|
|
|
def scrape(url):
|
|
|
data = requests.get(url)
|
|
|
|
|
|
our_path = re.sub(r".*://.*/", "/", request.url)
|
|
|
path = re.sub(r".*://.*/", "/", data.url)
|
|
|
- print()
|
|
|
- print(our_path)
|
|
|
- print(path)
|
|
|
- print()
|
|
|
- if our_path != path:
|
|
|
- return f"REDIRECT {path}"
|
|
|
+ if our_path != path and \
|
|
|
+ quote(unquote(re.sub("[?&=]", "", our_path))) != re.sub("[?&=]", "", path):
|
|
|
+
|
|
|
+ return f"REDIRECT {path}"
|
|
|
ret = []
|
|
|
soup = BeautifulSoup(data.text, "html.parser")
|
|
|
for div in soup.find_all("div"):
|