noodles 3 سال پیش
والد
کامیت
428aa9c4f3
1فایلهای تغییر یافته به همراه13 افزوده شده و 4 حذف شده
  1. 13 4
      run.py

+ 13 - 4
run.py

@@ -381,20 +381,29 @@ def time_click(driver):
     return status
 
 
-def main():
-    # driver = serive_create('Profile 1')
-    keyword = '咖啡'
-    db = DA.mysql_connect(MYSQL_CONFIG, DB_NAME)
+def get_not_cralwer_url(keyword):
     db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/google_poi?charset=utf8mb4')
     table = db['shop_item_list2']
     url_list = list(table.find(keyword=keyword))
     shop_item = [i['item_url'] for i in db.query('SELECT item_url FROM shop_list where keyword="{}"'.format(keyword))]
+
     url_pd = pd.DataFrame(url_list, columns=url_list[0].keys())
     url_pd['item_url_length'] = url_pd.item_url.apply(lambda x: len(x))
     url_pd = url_pd[url_pd['item_url_length']!=600]
     url_pd = url_pd[~url_pd['item_url'].isin(shop_item)]
+
     print('have {} URL list'.format(len(url_list)))
     # url_list = pd.read_csv('result/shop_item_list_20211210.csv', index_col=0)
+
+    return url_pd
+
+
+def main():
+    # driver = serive_create('Profile 1')
+    keyword = '咖啡'
+    db = DA.mysql_connect(MYSQL_CONFIG, DB_NAME)
+    url_pd = get_not_cralwer_url(keyword)
+    
     print('drvier start...')
     driver = brower_start()