noodles 3 tahun lalu
induk
melakukan
2a35c93602
1 mengubah file dengan 44 tambahan dan 31 penghapusan
  1. 44 31
      run.py

+ 44 - 31
run.py

@@ -28,7 +28,7 @@ def serive_create(profilepath):
 
     option.add_argument('--disable-web-security')
     option.add_argument('--allow-running-insecure-content') 
-    #option.add_argument("--user-data-dir=C:\\Users\\user\\AppData\\Local\\Google\\Chrome\\User Data")
+    option.add_argument("--user-data-dir=C:\\Users\\user\\AppData\\Local\\Google\\Chrome\\User Data")
     option.add_argument("profile-directory="+profilepath)
 
     driver = webdriver.Chrome('./utility/chromedriver_win32/chromedriver', options=option)
@@ -231,15 +231,20 @@ def get_reviews(driver, output):
 
     # page_down_(driver, '//*[@id="pane"]/div/div[1]/div/div/div[2]/div[1]', 5)
     page_down_(driver, '//div[@class="PPCwl"]',5)
-    all_photo = driver.find_elements_by_class_name('ODSEW-ShBeI-xJzy8c-bF1uUb')
-    for ap in all_photo:
-        ap.click()
 
-    all_review = driver.find_elements_by_css_selector('button[aria-label="顯示更多"')
-    for ap in all_review:
-        ap.click()
+    comment_soup = BeautifulSoup(driver.page_source, 'html.parser')
+    if comment_soup.find_all('div',class_='ODSEW-ShBeI-xJzy8c-bF1uUb') != 0:
+        all_photo = driver.find_elements_by_class_name('ODSEW-ShBeI-xJzy8c-bF1uUb')
+        for ap in all_photo:
+            ap.click()
+
+    if comment_soup.select('button[aria-label="顯示更多"]') != 0:
+        all_review = driver.find_elements_by_css_selector('button[aria-label="顯示更多"]')
+        for ap in all_review:
+            ap.click()
 
     comment_soup = BeautifulSoup(driver.page_source, 'html.parser')
+
     count = 0
     reviews = []
     for comment in comment_soup.find_all('div',{'class':'ODSEW-ShBeI'}):
@@ -320,16 +325,16 @@ def find_big_photo(output, driver):
         '菜單': 'menu_photo'
     }
     
+    photo_soup = BeautifulSoup(driver.page_source, 'html.parser')
     tab_dict = {}
     for tab_index in [0, 1, 2]:
-        try:
-            photo_name = driver.find_element(By.CSS_SELECTOR, "button[data-tab-index='{}']".format(tab_index)).text
+        selector = photo_soup.select("button[data-tab-index='{}']".format(tab_index))
+        if len(selector) != 0:
+            photo_name = selector[0].text
             if photo_name == '菜單':
                 tab_dict[photo_name] = tab_index
             elif photo_name == '全部':
                 tab_dict[photo_name] = tab_index
-        except:
-            traceback.print_exc()
     print(tab_dict)                            
     for tab_ in tab_dict:
         tab_index = tab_dict[tab_]
@@ -387,23 +392,23 @@ def data_select_insert(db, table_name, table_col, data):
 
 
 def time_click(driver):
+    shop_soup_tmp = BeautifulSoup(driver.page_source, 'html.parser')
     status = ''
     try:
-        time_css =  "span[aria-label='顯示本週營業時間']"
-        element = driver.find_element_by_css_selector(time_css)
-        driver.implicitly_wait(10)
-        ActionChains(driver).move_to_element(element).click(element).perform()
-        status = '正常'
-
-    except NoSuchElementException:
-        try:
-            time_css =  "div[aria-expanded='false']"
-            elem = driver.find_element_by_css_selector(time_css)
-            if elem:
-                status = '暫時關閉'
-        except:
-            return ''
-    return status
+        if len(shop_soup_tmp.select("span[aria-label='顯示本週營業時間']")) != 0:
+            time_css =  "span[aria-label='顯示本週營業時間']"
+            element = driver.find_element_by_css_selector(time_css)
+            driver.implicitly_wait(10)
+            ActionChains(driver).move_to_element(element).click(element).perform()
+            status = '正常'
+        
+        elif len(shop_soup.select("img[aria-label='通知']")) != 0:
+            status = shop_soup_tmp.find('span',class_='LJKBpe-Tswv1b-text aSftqf').text
+#             status = '永久停業' or '暫時關閉'
+            
+        return status
+    except:
+        return ''
 
 
 def get_not_cralwer_url(keyword):
@@ -496,30 +501,38 @@ def main():
             page_down_(driver, "//div[@class='x3AX1-LfntMc-header-title-ij8cu']", 3)
             
             time_status = time_click(driver)
-            time.sleep(1)
+            time.sleep(0.5)
             shop_soup = BeautifulSoup(driver.page_source, 'html.parser')
 
             output = {
                 'name': blank_check(shop_soup.find('h1', class_='x3AX1-LfntMc-header-title-title').text)
             }
             print(output['name'])
+
             print('get_shop_info')
             output = get_shop_info(driver, output, shop_soup)
+
             print('get_intro_info')
-            output = get_intro_info(driver, output)
+            if len(shop_soup.select("div > [aria-label='{}簡介']".format(output['name']))) != 0:
+                output = get_intro_info(driver, output)
+            else:
+                for key in intro_list:
+                    output[intro_list[key][0]] = '[]'
+
             print('get_time_list')
-            if time_status == '':
+            if time_status == '正常':
+                output = get_time_list(shop_soup, output)
+            else:
                 output['open_now'] = False
                 output['periods'] = ''
                 output['weekday_text'] = ''
-            else:
-                output = get_time_list(shop_soup, output)
 
             print('user_ratings_total')
             if output['user_ratings_total'] == '':
                 output['reviews'] = ''
             else:
                 output = get_reviews(driver, output)
+
             print('find_big_photo')
             output = find_big_photo(output, driver)