|
@@ -28,6 +28,17 @@ import urllib.parse
|
|
|
chrome_window=False
|
|
|
globalkw=None
|
|
|
proxyport=8787
|
|
|
+
|
|
|
+
|
|
|
+def build_cache(db):
|
|
|
+ id_dict={}
|
|
|
+ cursor = db.query('SELECT place_id FROM google_poi.swire_store_list;')
|
|
|
+
|
|
|
+ for c in cursor:
|
|
|
+ id_dict[c['place_id']]=1
|
|
|
+ return id_dict
|
|
|
+ #
|
|
|
+
|
|
|
def brower_start(port):
|
|
|
global proxyport
|
|
|
global chrome_window
|
|
@@ -239,6 +250,7 @@ def save_js_to_db(jsobj,num,keyword):
|
|
|
# store_list_table.upsert(r,keys=['place_id'])
|
|
|
|
|
|
def process_web_request(driver,area_num,keyword):
|
|
|
+ global iddict
|
|
|
# query = WebDriverWait(driver, 30).until(EC.presence_of_element_located((By.XPATH, '//button[@vet="19128"]')))
|
|
|
time.sleep(0.8)
|
|
|
time.sleep(3)
|
|
@@ -256,6 +268,8 @@ def process_web_request(driver,area_num,keyword):
|
|
|
jstext=resp.decode('utf-8')
|
|
|
resultobj=parsing_js(jstext)
|
|
|
print("before",datetime.now())
|
|
|
+ if iddict.get(resultobj['place_id']) is not None:
|
|
|
+ continue
|
|
|
save_js_to_db(resultobj,area_num,keyword)
|
|
|
print("after",datetime.now())
|
|
|
|
|
@@ -267,6 +281,7 @@ def main():
|
|
|
global store_list_table
|
|
|
global globalkw
|
|
|
global proxyport
|
|
|
+ global iddict
|
|
|
|
|
|
if len(sys.argv)>1:
|
|
|
globalkw=sys.argv[1]
|
|
@@ -278,6 +293,7 @@ def main():
|
|
|
# chrome_window=False
|
|
|
|
|
|
db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/google_poi?charset=utf8mb4')
|
|
|
+ iddict=build_cache(db)
|
|
|
store_list_table = db['swire_store_list']
|
|
|
|
|
|
# table2 = db['swire_progress_list']
|