|
@@ -46,6 +46,7 @@ def brower_start(port):
|
|
|
|
|
|
browser = webdriver.Remote(
|
|
|
command_executor='http://127.0.0.1:'+str(port)+'/wd/hub',
|
|
|
+ # command_executor='http://192.53.174.202:'+str(port)+'/wd/hub',
|
|
|
desired_capabilities=options.to_capabilities()
|
|
|
)
|
|
|
return browser
|
|
@@ -315,6 +316,10 @@ def find_photo_list(driver):
|
|
|
|
|
|
def find_big_photo(output, driver):
|
|
|
# element = driver.find_element(By.CSS_SELECTOR, "div[aria-label='{}的相片']".format(output['name']))
|
|
|
+ wait = WebDriverWait(driver, 60)
|
|
|
+ wait.until(
|
|
|
+ EC.element_to_be_clickable((By.XPATH, '//*[@id="pane"]/div/div[1]/div/div/div[1]/div[1]/button'))
|
|
|
+ )
|
|
|
element = driver.find_element(By.XPATH, '//*[@id="pane"]/div/div[1]/div/div/div[1]/div[1]/button')
|
|
|
ActionChains(driver).move_to_element(element).click(element).perform()
|
|
|
output['shop_photo'] = '[]'
|
|
@@ -324,7 +329,8 @@ def find_big_photo(output, driver):
|
|
|
'全部': 'shop_photo',
|
|
|
'菜單': 'menu_photo'
|
|
|
}
|
|
|
-
|
|
|
+
|
|
|
+ driver.find_element(By.CSS_SELECTOR, "button[data-tab-index='1']")
|
|
|
photo_soup = BeautifulSoup(driver.page_source, 'html.parser')
|
|
|
tab_dict = {}
|
|
|
for tab_index in [0, 1, 2]:
|
|
@@ -402,9 +408,12 @@ def time_click(driver):
|
|
|
ActionChains(driver).move_to_element(element).click(element).perform()
|
|
|
status = '正常'
|
|
|
|
|
|
- elif len(shop_soup.select("img[aria-label='通知']")) != 0:
|
|
|
+ elif len(shop_soup_tmp.select("img[aria-label='通知']")) != 0:
|
|
|
status = shop_soup_tmp.find('span',class_='LJKBpe-Tswv1b-text aSftqf').text
|
|
|
# status = '永久停業' or '暫時關閉'
|
|
|
+
|
|
|
+ elif len(shop_soup_tmp.select('button[aria-label*="查看更詳細的營業時間"]')) != 0:
|
|
|
+ status = 'error'
|
|
|
|
|
|
return status
|
|
|
except:
|
|
@@ -491,7 +500,7 @@ def main():
|
|
|
#driver = serive_create_linux(profilepath)
|
|
|
|
|
|
for key, row in url_pd.iterrows():
|
|
|
- # try:
|
|
|
+ try:
|
|
|
name = row['name']
|
|
|
item_url = row['item_url']
|
|
|
print(key, name, ': ' ,item_url)
|
|
@@ -501,6 +510,10 @@ def main():
|
|
|
page_down_(driver, "//div[@class='x3AX1-LfntMc-header-title-ij8cu']", 3)
|
|
|
|
|
|
time_status = time_click(driver)
|
|
|
+ if time_status == 'error':
|
|
|
+ error_table_col = ['name', 'lon', 'lat', 'keyword', 'item_url', 'crawler_date']
|
|
|
+ data_select_insert(db, 'error_list', error_table_col, row)
|
|
|
+ continue
|
|
|
time.sleep(0.5)
|
|
|
shop_soup = BeautifulSoup(driver.page_source, 'html.parser')
|
|
|
|
|
@@ -513,7 +526,7 @@ def main():
|
|
|
output = get_shop_info(driver, output, shop_soup)
|
|
|
|
|
|
print('get_intro_info')
|
|
|
- if len(shop_soup.select("div > [aria-label='{}簡介']".format(output['name']))) != 0:
|
|
|
+ if len(shop_soup.select("div[aria-label='{}簡介']".format(output['name']))) != 0:
|
|
|
output = get_intro_info(driver, output)
|
|
|
else:
|
|
|
for key in intro_list:
|
|
@@ -543,12 +556,12 @@ def main():
|
|
|
output['keyword'] = keyword
|
|
|
output['google_url'] = 'https://www.google.com.tw/search?q={}'.format(query_name)
|
|
|
data_select_insert(db, SHOP_LIST_TABLE, SHOP_LIST_TABLE_COL, output)
|
|
|
-
|
|
|
- # except Exception as e:
|
|
|
- # print(e)
|
|
|
- # error_table_col = ['name', 'lon', 'lat', 'keyword', 'item_url', 'crawler_date']
|
|
|
- # data_select_insert(db, 'error_list', error_table_col, row)
|
|
|
- # time.sleep(2)
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ print(e)
|
|
|
+ error_table_col = ['name', 'lon', 'lat', 'keyword', 'item_url', 'crawler_date']
|
|
|
+ data_select_insert(db, 'error_list', error_table_col, row)
|
|
|
+ time.sleep(1)
|
|
|
# driver.close()
|
|
|
# driver = brower_start(port)
|
|
|
# driver = serive_create_linux(profilepath)
|