|
@@ -14,6 +14,7 @@ from utility.parseutils import *
|
|
|
from utility.connect import *
|
|
|
|
|
|
from datetime import datetime
|
|
|
+import dataset
|
|
|
import pandas as pd
|
|
|
import time
|
|
|
import json
|
|
@@ -383,7 +384,16 @@ def time_click(driver):
|
|
|
def main():
|
|
|
# driver = serive_create('Profile 1')
|
|
|
db = DA.mysql_connect(MYSQL_CONFIG, DB_NAME)
|
|
|
- url_list = pd.read_csv('result/shop_item_list_20211210.csv', index_col=0)
|
|
|
+ db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/google_poi?charset=utf8mb4')
|
|
|
+ table = db['shop_item_list2']
|
|
|
+ url_list = list(table.find(keyword='咖啡'))
|
|
|
+ shop_item = [i['item_url'] for i in db.query('SELECT item_url FROM shop_list where keyword="{}"'.format('咖啡'))]
|
|
|
+ url_pd = pd.DataFrame(url_list, columns=url_list[0].keys())
|
|
|
+ url_pd['item_url_length'] = url_pd.item_url.apply(lambda x: len(x))
|
|
|
+ url_pd = url_pd[url_pd['item_url_length']!=600]
|
|
|
+ url_list = url_pd[~url_pd['item_url'].isin(shop_item)]['item_url']
|
|
|
+
|
|
|
+ # url_list = pd.read_csv('result/shop_item_list_20211210.csv', index_col=0)
|
|
|
print('drvier start...')
|
|
|
driver = brower_start()
|
|
|
|
|
@@ -391,8 +401,8 @@ def main():
|
|
|
for key, row in url_list.iterrows():
|
|
|
name = row['name']
|
|
|
item_url = row['item_url']
|
|
|
- result = DA.mysql_select_data(db, 'select item_url from shop_list where item_url="{}"'.format(item_url))
|
|
|
- if len(result) != 0: continue
|
|
|
+ # result = DA.mysql_select_data(db, 'select item_url from shop_list where item_url="{}"'.format(item_url))
|
|
|
+ # if len(result) != 0: continue
|
|
|
print(key, name, ': ' ,item_url)
|
|
|
|
|
|
driver.get(item_url)
|