|
@@ -12,7 +12,7 @@ from selenium.common.exceptions import WebDriverException
|
|
|
import selenium
|
|
|
import traceback
|
|
|
from bs4 import BeautifulSoup
|
|
|
-
|
|
|
+import gzip
|
|
|
from utility import database_access as DA
|
|
|
from utility.parseutils import *
|
|
|
from utility.connect import *
|
|
@@ -309,7 +309,17 @@ def process_web_request_start(driver, fid):
|
|
|
front, _ = fid.split(':')
|
|
|
if request.url.find(front) != -1:
|
|
|
print(request.url)
|
|
|
- resp = brotli.decompress(request.response.body)
|
|
|
+# resp = brotli.decompress(request.response.body)
|
|
|
+ resp=request.response.body
|
|
|
+ if 'gzip' in request.response.headers.get('Content-Encoding'):
|
|
|
+ resp = gzip.decompress(request.response.body)
|
|
|
+
|
|
|
+ if 'br' in request.response.headers.get('Content-Encoding'):
|
|
|
+ resp = brotli.decompress(request.response.body)
|
|
|
+
|
|
|
+ # resp = brotli.decompress(request.response.body)
|
|
|
+
|
|
|
+
|
|
|
jstext = resp.decode('utf-8')
|
|
|
output = parsing_js(jstext)
|
|
|
time.sleep(1)
|
|
@@ -359,9 +369,25 @@ def process_web_request_reviews(driver, output, ludocid):
|
|
|
# print('parsing js:')
|
|
|
if request.url.find(ludocid) != -1:
|
|
|
print(request.url)
|
|
|
- resp = brotli.decompress(request.response.body)
|
|
|
+
|
|
|
+# resp = brotli.decompress(request.response.body)
|
|
|
+# jstext = resp.decode('utf-8')
|
|
|
+# result = reviews_parsing_js(jstext)
|
|
|
+# resp = brotli.decompress(request.response.body)
|
|
|
+ resp=request.response.body
|
|
|
+ if 'gzip' in request.response.headers.get('Content-Encoding'):
|
|
|
+ resp = gzip.decompress(request.response.body)
|
|
|
+
|
|
|
+ if 'br' in request.response.headers.get('Content-Encoding'):
|
|
|
+ resp = brotli.decompress(request.response.body)
|
|
|
+
|
|
|
+ # resp = brotli.decompress(request.response.body)
|
|
|
+
|
|
|
+
|
|
|
jstext = resp.decode('utf-8')
|
|
|
- result = reviews_parsing_js(jstext)
|
|
|
+ result = parsing_js(jstext)
|
|
|
+
|
|
|
+
|
|
|
output['reviews'] = str(result)
|
|
|
time.sleep(1)
|
|
|
return output
|