|
@@ -28,7 +28,7 @@ def serive_create(profilepath):
|
|
|
|
|
|
option.add_argument('--disable-web-security')
|
|
|
option.add_argument('--allow-running-insecure-content')
|
|
|
- #option.add_argument("--user-data-dir=C:\\Users\\user\\AppData\\Local\\Google\\Chrome\\User Data")
|
|
|
+ option.add_argument("--user-data-dir=C:\\Users\\user\\AppData\\Local\\Google\\Chrome\\User Data")
|
|
|
option.add_argument("profile-directory="+profilepath)
|
|
|
|
|
|
driver = webdriver.Chrome('./utility/chromedriver_win32/chromedriver', options=option)
|
|
@@ -231,15 +231,20 @@ def get_reviews(driver, output):
|
|
|
|
|
|
# page_down_(driver, '//*[@id="pane"]/div/div[1]/div/div/div[2]/div[1]', 5)
|
|
|
page_down_(driver, '//div[@class="PPCwl"]',5)
|
|
|
- all_photo = driver.find_elements_by_class_name('ODSEW-ShBeI-xJzy8c-bF1uUb')
|
|
|
- for ap in all_photo:
|
|
|
- ap.click()
|
|
|
|
|
|
- all_review = driver.find_elements_by_css_selector('button[aria-label="顯示更多"')
|
|
|
- for ap in all_review:
|
|
|
- ap.click()
|
|
|
+ comment_soup = BeautifulSoup(driver.page_source, 'html.parser')
|
|
|
+ if comment_soup.find_all('div',class_='ODSEW-ShBeI-xJzy8c-bF1uUb') != 0:
|
|
|
+ all_photo = driver.find_elements_by_class_name('ODSEW-ShBeI-xJzy8c-bF1uUb')
|
|
|
+ for ap in all_photo:
|
|
|
+ ap.click()
|
|
|
+
|
|
|
+ if comment_soup.select('button[aria-label="顯示更多"]') != 0:
|
|
|
+ all_review = driver.find_elements_by_css_selector('button[aria-label="顯示更多"]')
|
|
|
+ for ap in all_review:
|
|
|
+ ap.click()
|
|
|
|
|
|
comment_soup = BeautifulSoup(driver.page_source, 'html.parser')
|
|
|
+
|
|
|
count = 0
|
|
|
reviews = []
|
|
|
for comment in comment_soup.find_all('div',{'class':'ODSEW-ShBeI'}):
|
|
@@ -320,16 +325,16 @@ def find_big_photo(output, driver):
|
|
|
'菜單': 'menu_photo'
|
|
|
}
|
|
|
|
|
|
+ photo_soup = BeautifulSoup(driver.page_source, 'html.parser')
|
|
|
tab_dict = {}
|
|
|
for tab_index in [0, 1, 2]:
|
|
|
- try:
|
|
|
- photo_name = driver.find_element(By.CSS_SELECTOR, "button[data-tab-index='{}']".format(tab_index)).text
|
|
|
+ selector = photo_soup.select("button[data-tab-index='{}']".format(tab_index))
|
|
|
+ if len(selector) != 0:
|
|
|
+ photo_name = selector[0].text
|
|
|
if photo_name == '菜單':
|
|
|
tab_dict[photo_name] = tab_index
|
|
|
elif photo_name == '全部':
|
|
|
tab_dict[photo_name] = tab_index
|
|
|
- except:
|
|
|
- traceback.print_exc()
|
|
|
print(tab_dict)
|
|
|
for tab_ in tab_dict:
|
|
|
tab_index = tab_dict[tab_]
|
|
@@ -387,23 +392,23 @@ def data_select_insert(db, table_name, table_col, data):
|
|
|
|
|
|
|
|
|
def time_click(driver):
|
|
|
+ shop_soup_tmp = BeautifulSoup(driver.page_source, 'html.parser')
|
|
|
status = ''
|
|
|
try:
|
|
|
- time_css = "span[aria-label='顯示本週營業時間']"
|
|
|
- element = driver.find_element_by_css_selector(time_css)
|
|
|
- driver.implicitly_wait(10)
|
|
|
- ActionChains(driver).move_to_element(element).click(element).perform()
|
|
|
- status = '正常'
|
|
|
-
|
|
|
- except NoSuchElementException:
|
|
|
- try:
|
|
|
- time_css = "div[aria-expanded='false']"
|
|
|
- elem = driver.find_element_by_css_selector(time_css)
|
|
|
- if elem:
|
|
|
- status = '暫時關閉'
|
|
|
- except:
|
|
|
- return ''
|
|
|
- return status
|
|
|
+ if len(shop_soup_tmp.select("span[aria-label='顯示本週營業時間']")) != 0:
|
|
|
+ time_css = "span[aria-label='顯示本週營業時間']"
|
|
|
+ element = driver.find_element_by_css_selector(time_css)
|
|
|
+ driver.implicitly_wait(10)
|
|
|
+ ActionChains(driver).move_to_element(element).click(element).perform()
|
|
|
+ status = '正常'
|
|
|
+
|
|
|
+ elif len(shop_soup.select("img[aria-label='通知']")) != 0:
|
|
|
+ status = shop_soup_tmp.find('span',class_='LJKBpe-Tswv1b-text aSftqf').text
|
|
|
+# status = '永久停業' or '暫時關閉'
|
|
|
+
|
|
|
+ return status
|
|
|
+ except:
|
|
|
+ return ''
|
|
|
|
|
|
|
|
|
def get_not_cralwer_url(keyword):
|
|
@@ -496,30 +501,38 @@ def main():
|
|
|
page_down_(driver, "//div[@class='x3AX1-LfntMc-header-title-ij8cu']", 3)
|
|
|
|
|
|
time_status = time_click(driver)
|
|
|
- time.sleep(1)
|
|
|
+ time.sleep(0.5)
|
|
|
shop_soup = BeautifulSoup(driver.page_source, 'html.parser')
|
|
|
|
|
|
output = {
|
|
|
'name': blank_check(shop_soup.find('h1', class_='x3AX1-LfntMc-header-title-title').text)
|
|
|
}
|
|
|
print(output['name'])
|
|
|
+
|
|
|
print('get_shop_info')
|
|
|
output = get_shop_info(driver, output, shop_soup)
|
|
|
+
|
|
|
print('get_intro_info')
|
|
|
- output = get_intro_info(driver, output)
|
|
|
+ if len(shop_soup.select("div > [aria-label='{}簡介']".format(output['name']))) != 0:
|
|
|
+ output = get_intro_info(driver, output)
|
|
|
+ else:
|
|
|
+ for key in intro_list:
|
|
|
+ output[intro_list[key][0]] = '[]'
|
|
|
+
|
|
|
print('get_time_list')
|
|
|
- if time_status == '':
|
|
|
+ if time_status == '正常':
|
|
|
+ output = get_time_list(shop_soup, output)
|
|
|
+ else:
|
|
|
output['open_now'] = False
|
|
|
output['periods'] = ''
|
|
|
output['weekday_text'] = ''
|
|
|
- else:
|
|
|
- output = get_time_list(shop_soup, output)
|
|
|
|
|
|
print('user_ratings_total')
|
|
|
if output['user_ratings_total'] == '':
|
|
|
output['reviews'] = ''
|
|
|
else:
|
|
|
output = get_reviews(driver, output)
|
|
|
+
|
|
|
print('find_big_photo')
|
|
|
output = find_big_photo(output, driver)
|
|
|
|