|
@@ -295,6 +295,8 @@ def find_photo_list(driver):
|
|
|
def find_big_photo(output, driver):
|
|
|
element = driver.find_element(By.CSS_SELECTOR, "div[aria-label='{}的相片']".format(output['name']))
|
|
|
ActionChains(driver).move_to_element(element).click(element).perform()
|
|
|
+ output['shop_photo'] = '[]'
|
|
|
+ output['menu_photo'] = '[]'
|
|
|
|
|
|
photo_map = {
|
|
|
'全部': 'shop_photo',
|
|
@@ -391,19 +393,21 @@ def main():
|
|
|
print('drvier start...')
|
|
|
driver = brower_start()
|
|
|
driver.get(url)
|
|
|
- keyin_keyword(driver, '咖啡')
|
|
|
- url_list = get_url_list(driver)
|
|
|
+ # keyin_keyword(driver, '咖啡')
|
|
|
+ # url_list = get_url_list(driver)
|
|
|
+ url_list = pd.read_csv('result/shop_item_list_20211210.csv', index_col=0)
|
|
|
|
|
|
- result = []
|
|
|
# try:
|
|
|
- for item_url, name in url_list:
|
|
|
- print(name, ': ' ,item_url)
|
|
|
+ for key, row in url_list.iterrows():
|
|
|
+ name = row['name']
|
|
|
+ item_url = row['item_url']
|
|
|
+ print(key, name, ': ' ,item_url)
|
|
|
driver.get(item_url)
|
|
|
for i in range(4, 26, 2):
|
|
|
element = driver.find_element_by_xpath('//*[@id="pane"]/div/div[1]/div/div/div[{}]'.format(i))
|
|
|
actions = ActionChains(driver)
|
|
|
actions.move_to_element(element).perform()
|
|
|
-
|
|
|
+ print('start...')
|
|
|
time_status = time_click(driver)
|
|
|
time.sleep(1)
|
|
|
shop_soup = BeautifulSoup(driver.page_source, 'html.parser')
|
|
@@ -428,13 +432,9 @@ def main():
|
|
|
query_name = query_name.replace(' ','')
|
|
|
output['query_name'] = query_name
|
|
|
output['google_url'] = 'https://www.google.com.tw/search?q={}'.format(query_name)
|
|
|
- time.sleep(2)
|
|
|
- result += [output]
|
|
|
- with open('result/20211207_{}.json'.format(name), 'w') as f:
|
|
|
- json.dump(output, f)
|
|
|
-
|
|
|
+ time.sleep(1)
|
|
|
data_select_insert(db, SHOP_LIST_TABLE, SHOP_LIST_TABLE_COL, output)
|
|
|
- break
|
|
|
+
|
|
|
# except:
|
|
|
# shop_soup = BeautifulSoup(driver.page_source, 'html.parser')
|
|
|
# print("error {}".format(id_))
|