|
@@ -381,20 +381,29 @@ def time_click(driver):
|
|
|
return status
|
|
|
|
|
|
|
|
|
-def main():
|
|
|
- # driver = serive_create('Profile 1')
|
|
|
- keyword = '咖啡'
|
|
|
- db = DA.mysql_connect(MYSQL_CONFIG, DB_NAME)
|
|
|
+def get_not_cralwer_url(keyword):
|
|
|
db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/google_poi?charset=utf8mb4')
|
|
|
table = db['shop_item_list2']
|
|
|
url_list = list(table.find(keyword=keyword))
|
|
|
shop_item = [i['item_url'] for i in db.query('SELECT item_url FROM shop_list where keyword="{}"'.format(keyword))]
|
|
|
+
|
|
|
url_pd = pd.DataFrame(url_list, columns=url_list[0].keys())
|
|
|
url_pd['item_url_length'] = url_pd.item_url.apply(lambda x: len(x))
|
|
|
url_pd = url_pd[url_pd['item_url_length']!=600]
|
|
|
url_pd = url_pd[~url_pd['item_url'].isin(shop_item)]
|
|
|
+
|
|
|
print('have {} URL list'.format(len(url_list)))
|
|
|
# url_list = pd.read_csv('result/shop_item_list_20211210.csv', index_col=0)
|
|
|
+
|
|
|
+ return url_pd
|
|
|
+
|
|
|
+
|
|
|
+def main():
|
|
|
+ # driver = serive_create('Profile 1')
|
|
|
+ keyword = '咖啡'
|
|
|
+ db = DA.mysql_connect(MYSQL_CONFIG, DB_NAME)
|
|
|
+ url_pd = get_not_cralwer_url(keyword)
|
|
|
+
|
|
|
print('drvier start...')
|
|
|
driver = brower_start()
|
|
|
|