jared 3 سال پیش
والد
کامیت
2ddfbf8761
1فایلهای تغییر یافته به همراه16 افزوده شده و 0 حذف شده
  1. 16 0
      swire_shop_item_list.py

+ 16 - 0
swire_shop_item_list.py

@@ -28,6 +28,17 @@ import urllib.parse
 chrome_window=False
 globalkw=None
 proxyport=8787
+
+
+def build_cache(db):
+    id_dict={}
+    cursor = db.query('SELECT place_id FROM google_poi.swire_store_list;')
+
+    for c in cursor:
+        id_dict[c['place_id']]=1
+    return id_dict
+    #
+
 def brower_start(port):
     global proxyport
     global chrome_window
@@ -239,6 +250,7 @@ def save_js_to_db(jsobj,num,keyword):
 #        store_list_table.upsert(r,keys=['place_id'])
 
 def process_web_request(driver,area_num,keyword):
+    global iddict
 #    query = WebDriverWait(driver, 30).until(EC.presence_of_element_located((By.XPATH, '//button[@vet="19128"]')))
     time.sleep(0.8)
     time.sleep(3)
@@ -256,6 +268,8 @@ def process_web_request(driver,area_num,keyword):
                 jstext=resp.decode('utf-8')
                 resultobj=parsing_js(jstext)
                 print("before",datetime.now())
+                if iddict.get(resultobj['place_id']) is not None:
+                    continue
                 save_js_to_db(resultobj,area_num,keyword)
                 print("after",datetime.now())
 
@@ -267,6 +281,7 @@ def main():
     global store_list_table
     global globalkw
     global proxyport
+    global iddict
 
     if len(sys.argv)>1:
         globalkw=sys.argv[1]
@@ -278,6 +293,7 @@ def main():
 #        chrome_window=False
 
     db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/google_poi?charset=utf8mb4')
+    iddict=build_cache(db)
     store_list_table = db['swire_store_list']
 
 #    table2 = db['swire_progress_list']