|
@@ -154,34 +154,31 @@ def save_js_to_db(jsobj, fid):
|
|
|
traceback.print_exc()
|
|
|
|
|
|
|
|
|
-def process_web_request(driver, fid, ludocid):
|
|
|
+def process_web_request(driver, fid):
|
|
|
time.sleep(3)
|
|
|
print("ppppppppp&**********************")
|
|
|
for request in driver.requests:
|
|
|
if request.response:
|
|
|
# print(request.url)
|
|
|
if 'listentitiesreviews?' in request.url :
|
|
|
- print('-', request.url)
|
|
|
- if request.url.find(ludocid) != -1:
|
|
|
+ print('parsing js:')
|
|
|
+ print(request.url)
|
|
|
+ # resp = brotli.decompress(request.response.body)
|
|
|
+ resp=request.response.body
|
|
|
+ if 'gzip' in request.response.headers.get('Content-Encoding'):
|
|
|
+ resp = gzip.decompress(request.response.body)
|
|
|
|
|
|
- print('parsing js:')
|
|
|
- print(request.url)
|
|
|
- # resp = brotli.decompress(request.response.body)
|
|
|
- resp=request.response.body
|
|
|
- if 'gzip' in request.response.headers.get('Content-Encoding'):
|
|
|
- resp = gzip.decompress(request.response.body)
|
|
|
+ if 'br' in request.response.headers.get('Content-Encoding'):
|
|
|
+ resp = brotli.decompress(request.response.body)
|
|
|
|
|
|
- if 'br' in request.response.headers.get('Content-Encoding'):
|
|
|
- resp = brotli.decompress(request.response.body)
|
|
|
+ jstext = resp.decode('utf-8')
|
|
|
+ result = parsing_js(jstext)
|
|
|
|
|
|
- jstext = resp.decode('utf-8')
|
|
|
- result = parsing_js(jstext)
|
|
|
+ save_js_to_db(result, fid)
|
|
|
+ time.sleep(1)
|
|
|
|
|
|
- save_js_to_db(result, fid)
|
|
|
- time.sleep(1)
|
|
|
-
|
|
|
- del driver.requests
|
|
|
- return 1
|
|
|
+ del driver.requests
|
|
|
+ return 1
|
|
|
|
|
|
del driver.requests
|
|
|
return 0
|
|
@@ -257,7 +254,6 @@ def main():
|
|
|
item_url = group['item_url']
|
|
|
reviews_cnt = group['user_ratings_total']
|
|
|
fid = group['fid']
|
|
|
- ludocid = group['ludocid']
|
|
|
|
|
|
print(reviews_cnt, item_url)
|
|
|
|
|
@@ -269,8 +265,7 @@ def main():
|
|
|
time.sleep(0.5)
|
|
|
|
|
|
get_reviews(driver, reviews_cnt)
|
|
|
- status = process_web_request(driver, fid, ludocid)
|
|
|
- print(driver.current_url)
|
|
|
+ status = process_web_request(driver, fid)
|
|
|
|
|
|
if status:
|
|
|
db['review_process'].insert({'fid':fid, 'dt':datetime.now()})
|