noodles před 2 roky
rodič
revize
0b2734ab7b
1 změnil soubory, kde provedl 5 přidání a 4 odebrání
  1. 5 4
      swire_shop_review.py

+ 5 - 4
swire_shop_review.py

@@ -99,7 +99,7 @@ def get_next_job(db):
     result = {}
     sql = '''select t1.name, t1.ludocid, t1.fid, t1.user_ratings_total, t2.place_id from 
                 (select * from shop_list3 where ludocid is NOT NULL and user_ratings_total is NOT NULL and 
-                fid not in (select fid from review_process ) ORDER BY RAND() limit 1 ) 
+                fid not in (select fid from review_process ) ORDER BY RAND() limit 5 ) 
             as t1 join google_poi.swire_store_list as t2 on t1.fid = t2.fid'''
     result = db.query(sql)
     url_pd = pd.DataFrame([dict(i) for i in result])
@@ -161,12 +161,13 @@ def process_web_request(driver, fid, ludocid):
         if request.response:
             # print(request.url)
             if 'listentitiesreviews?' in request.url :
+                print('-',  request.url)
                 if request.url.find(ludocid) != -1:
 
                     print('parsing js:')
                     print(request.url)
-                    resp = brotli.decompress(request.response.body)
-
+                    # resp = brotli.decompress(request.response.body)
+                    resp=request.response.body
                     if 'gzip' in request.response.headers.get('Content-Encoding'):
                         resp = gzip.decompress(request.response.body)
 
@@ -254,7 +255,7 @@ def main():
     for row, group in job.iterrows():
         try:
             item_url = group['item_url']
-            reviews_cnt = group['reviews_cnt']
+            reviews_cnt = group['user_ratings_total']
             fid = group['fid']
             ludocid = group['ludocid']