|
@@ -99,7 +99,7 @@ def get_next_job(db):
|
|
|
result = {}
|
|
|
sql = '''select t1.name, t1.ludocid, t1.fid, t1.user_ratings_total, t2.place_id from
|
|
|
(select * from shop_list3 where ludocid is NOT NULL and user_ratings_total is NOT NULL and
|
|
|
- fid not in (select fid from review_process ) ORDER BY RAND() limit 1 )
|
|
|
+ fid not in (select fid from review_process ) ORDER BY RAND() limit 5 )
|
|
|
as t1 join google_poi.swire_store_list as t2 on t1.fid = t2.fid'''
|
|
|
result = db.query(sql)
|
|
|
url_pd = pd.DataFrame([dict(i) for i in result])
|
|
@@ -161,12 +161,13 @@ def process_web_request(driver, fid, ludocid):
|
|
|
if request.response:
|
|
|
# print(request.url)
|
|
|
if 'listentitiesreviews?' in request.url :
|
|
|
+ print('-', request.url)
|
|
|
if request.url.find(ludocid) != -1:
|
|
|
|
|
|
print('parsing js:')
|
|
|
print(request.url)
|
|
|
- resp = brotli.decompress(request.response.body)
|
|
|
-
|
|
|
+ # resp = brotli.decompress(request.response.body)
|
|
|
+ resp=request.response.body
|
|
|
if 'gzip' in request.response.headers.get('Content-Encoding'):
|
|
|
resp = gzip.decompress(request.response.body)
|
|
|
|
|
@@ -254,7 +255,7 @@ def main():
|
|
|
for row, group in job.iterrows():
|
|
|
try:
|
|
|
item_url = group['item_url']
|
|
|
- reviews_cnt = group['reviews_cnt']
|
|
|
+ reviews_cnt = group['user_ratings_total']
|
|
|
fid = group['fid']
|
|
|
ludocid = group['ludocid']
|
|
|
|