|
@@ -4,6 +4,7 @@ from selenium.webdriver.common.action_chains import ActionChains
|
|
|
from selenium.webdriver.common.keys import Keys
|
|
|
from selenium.webdriver.support import expected_conditions as EC
|
|
|
from selenium.webdriver.support.wait import WebDriverWait
|
|
|
+from selenium.common.exceptions import NoSuchElementException
|
|
|
from selenium.webdriver.common.by import By
|
|
|
|
|
|
from bs4 import BeautifulSoup
|
|
@@ -135,61 +136,62 @@ def get_intro_info(driver, output):
|
|
|
|
|
|
|
|
|
def get_time_list(shop_soup, output):
|
|
|
+ periods = []
|
|
|
+ weekday_text = []
|
|
|
+
|
|
|
open_now = blank_check(shop_soup.find('span', {'class':'LJKBpe-Tswv1b-hour-text'}).text.split('\xa0')[0])
|
|
|
if open_now == '永久停業' or open_now == '暫時關閉':
|
|
|
output['open_now'] = 'False'
|
|
|
else:
|
|
|
output['open_now'] = 'True'
|
|
|
- periods = []
|
|
|
- weekday_text = []
|
|
|
- for tr_ in shop_soup.find_all('tr'):
|
|
|
- if tr_.find('div').text.replace(' ','') != '':
|
|
|
- week = tr_.find('div').text
|
|
|
- time_list = [blank_check(i.text) for i in tr_.find_all('li')]
|
|
|
- for time_ in time_list:
|
|
|
- if time_ == '24 小時營業':
|
|
|
- periods += [{
|
|
|
- "open":{
|
|
|
- "day": week_list[week],
|
|
|
- "time": 0000
|
|
|
- },
|
|
|
- "close":{
|
|
|
- "day": week_list[week],
|
|
|
- "time": ''
|
|
|
- }
|
|
|
- }]
|
|
|
- elif time_ == '休息':
|
|
|
- periods += [{
|
|
|
- "open":{
|
|
|
- "day": week_list[week],
|
|
|
- "time": ''
|
|
|
- },
|
|
|
- "close":{
|
|
|
- "day": week_list[week],
|
|
|
- "time": ''
|
|
|
- }
|
|
|
- }]
|
|
|
- else:
|
|
|
- start, end = time_.split('–')
|
|
|
- end_hour, end_min = end.split(':')
|
|
|
- start_hour, start_min = start.split(':')
|
|
|
-
|
|
|
- if end_hour < start_hour:
|
|
|
- end_day = week_list[week] + 1
|
|
|
+ for tr_ in shop_soup.find_all('tr'):
|
|
|
+ if tr_.find('div').text.replace(' ','') != '':
|
|
|
+ week = tr_.find('div').text
|
|
|
+ time_list = [blank_check(i.text) for i in tr_.find_all('li')]
|
|
|
+ for time_ in time_list:
|
|
|
+ if time_ == '24 小時營業':
|
|
|
+ periods += [{
|
|
|
+ "open":{
|
|
|
+ "day": week_list[week],
|
|
|
+ "time": 0000
|
|
|
+ },
|
|
|
+ "close":{
|
|
|
+ "day": week_list[week],
|
|
|
+ "time": ''
|
|
|
+ }
|
|
|
+ }]
|
|
|
+ elif time_ == '休息':
|
|
|
+ periods += [{
|
|
|
+ "open":{
|
|
|
+ "day": week_list[week],
|
|
|
+ "time": ''
|
|
|
+ },
|
|
|
+ "close":{
|
|
|
+ "day": week_list[week],
|
|
|
+ "time": ''
|
|
|
+ }
|
|
|
+ }]
|
|
|
else:
|
|
|
- end_day = week_list[week]
|
|
|
-
|
|
|
- periods += [{
|
|
|
- "open":{
|
|
|
- "day": week_list[week],
|
|
|
- "time": start.replace(':','')
|
|
|
- },
|
|
|
- "close":{
|
|
|
- "day": end_day,
|
|
|
- "time": end.replace(':','')
|
|
|
- }
|
|
|
- }]
|
|
|
- weekday_text += ["{}: {}".format(week, ', '.join(time_list))]
|
|
|
+ start, end = time_.split('–')
|
|
|
+ end_hour, end_min = end.split(':')
|
|
|
+ start_hour, start_min = start.split(':')
|
|
|
+
|
|
|
+ if end_hour < start_hour:
|
|
|
+ end_day = week_list[week] + 1
|
|
|
+ else:
|
|
|
+ end_day = week_list[week]
|
|
|
+
|
|
|
+ periods += [{
|
|
|
+ "open":{
|
|
|
+ "day": week_list[week],
|
|
|
+ "time": start.replace(':','')
|
|
|
+ },
|
|
|
+ "close":{
|
|
|
+ "day": end_day,
|
|
|
+ "time": end.replace(':','')
|
|
|
+ }
|
|
|
+ }]
|
|
|
+ weekday_text += ["{}: {}".format(week, ', '.join(time_list))]
|
|
|
output['periods'] = str(periods)
|
|
|
output['weekday_text'] = str(weekday_text)
|
|
|
|
|
@@ -262,6 +264,7 @@ def get_reviews(driver, output):
|
|
|
|
|
|
|
|
|
def find_photo_list(driver):
|
|
|
+ time.sleep(2)
|
|
|
wait = WebDriverWait(driver, 60)
|
|
|
wait.until(
|
|
|
EC.element_to_be_clickable((By.XPATH, '//*[@id="pane"]/div/div[1]/div/div/div[3]/div[1]/div[1]/div/a'))
|
|
@@ -275,7 +278,7 @@ def find_photo_list(driver):
|
|
|
actions.move_to_element(element).perform()
|
|
|
except:
|
|
|
break
|
|
|
-
|
|
|
+
|
|
|
photo_soup = BeautifulSoup(driver.page_source, 'html.parser')
|
|
|
photo_url = []
|
|
|
for photo_id in count_list:
|
|
@@ -358,6 +361,23 @@ def data_select_insert(db, table_name, table_col, data):
|
|
|
DA.mysql_insert_data(db, insert_sql)
|
|
|
|
|
|
|
|
|
+def time_click(driver):
|
|
|
+ status = ''
|
|
|
+ try:
|
|
|
+ time_css = "span[aria-label='顯示本週營業時間']"
|
|
|
+ element = driver.find_element_by_css_selector(time_css)
|
|
|
+ driver.implicitly_wait(30)
|
|
|
+ ActionChains(driver).move_to_element(element).click(element).perform()
|
|
|
+ status = '正常'
|
|
|
+
|
|
|
+ except NoSuchElementException:
|
|
|
+ time_css = "div[aria-expanded='false']"
|
|
|
+ elem = driver.find_element_by_css_selector(time_css)
|
|
|
+ if elem:
|
|
|
+ status = '暫時關閉'
|
|
|
+ return status
|
|
|
+
|
|
|
+
|
|
|
def main():
|
|
|
data = pd.read_csv('lat_long_location.csv', index_col = 0)
|
|
|
tmp = data.iloc[0]
|
|
@@ -379,16 +399,12 @@ def main():
|
|
|
for item_url, name in url_list:
|
|
|
print(name, ': ' ,item_url)
|
|
|
driver.get(item_url)
|
|
|
+ for i in range(4, 26, 2):
|
|
|
+ element = driver.find_element_by_xpath('//*[@id="pane"]/div/div[1]/div/div/div[{}]'.format(i))
|
|
|
+ actions = ActionChains(driver)
|
|
|
+ actions.move_to_element(element).perform()
|
|
|
|
|
|
- wait = WebDriverWait(driver, 120)
|
|
|
- time_css = "span[aria-label='顯示本週營業時間']"
|
|
|
- wait.until(
|
|
|
- EC.element_to_be_clickable((By.CSS_SELECTOR, time_css))
|
|
|
- )
|
|
|
- element = driver.find_element_by_css_selector(time_css)
|
|
|
- driver.implicitly_wait(30)
|
|
|
- ActionChains(driver).move_to_element(element).click(element).perform()
|
|
|
-
|
|
|
+ time_status = time_click(driver)
|
|
|
time.sleep(1)
|
|
|
shop_soup = BeautifulSoup(driver.page_source, 'html.parser')
|
|
|
|