noodles 3 gadi atpakaļ
vecāks
revīzija
66019f32b8
1 mainītis faili ar 75 papildinājumiem un 59 dzēšanām
  1. 75 59
      run.py

+ 75 - 59
run.py

@@ -4,6 +4,7 @@ from selenium.webdriver.common.action_chains import ActionChains
 from selenium.webdriver.common.keys import Keys
 from selenium.webdriver.support import expected_conditions as EC
 from selenium.webdriver.support.wait import WebDriverWait
+from selenium.common.exceptions import NoSuchElementException
 from selenium.webdriver.common.by import By
 
 from bs4 import BeautifulSoup
@@ -135,61 +136,62 @@ def get_intro_info(driver, output):
 
 
 def get_time_list(shop_soup, output):
+    periods = []
+    weekday_text = []
+    
     open_now = blank_check(shop_soup.find('span', {'class':'LJKBpe-Tswv1b-hour-text'}).text.split('\xa0')[0])
     if open_now == '永久停業' or open_now == '暫時關閉':
         output['open_now'] = 'False'
     else:
         output['open_now'] = 'True'
-    periods = []
-    weekday_text = []
-    for tr_ in shop_soup.find_all('tr'):
-        if tr_.find('div').text.replace(' ','') != '':
-            week = tr_.find('div').text
-            time_list = [blank_check(i.text) for i in tr_.find_all('li')]
-            for time_ in time_list:
-                if time_ == '24 小時營業':
-                    periods += [{
-                        "open":{
-                            "day": week_list[week], 
-                            "time": 0000
-                        },
-                        "close":{
-                            "day": week_list[week], 
-                            "time": ''
-                        }
-                    }]
-                elif time_ == '休息':
-                    periods += [{
-                        "open":{
-                            "day": week_list[week], 
-                            "time": ''
-                        },
-                        "close":{
-                            "day": week_list[week], 
-                            "time": ''
-                        }
-                    }]
-                else:
-                    start, end = time_.split('–')
-                    end_hour, end_min = end.split(':')
-                    start_hour, start_min = start.split(':')
-
-                    if end_hour < start_hour:
-                        end_day = week_list[week] + 1 
+        for tr_ in shop_soup.find_all('tr'):
+            if tr_.find('div').text.replace(' ','') != '':
+                week = tr_.find('div').text
+                time_list = [blank_check(i.text) for i in tr_.find_all('li')]
+                for time_ in time_list:
+                    if time_ == '24 小時營業':
+                        periods += [{
+                            "open":{
+                                "day": week_list[week], 
+                                "time": 0000
+                            },
+                            "close":{
+                                "day": week_list[week], 
+                                "time": ''
+                            }
+                        }]
+                    elif time_ == '休息':
+                        periods += [{
+                            "open":{
+                                "day": week_list[week], 
+                                "time": ''
+                            },
+                            "close":{
+                                "day": week_list[week], 
+                                "time": ''
+                            }
+                        }]
                     else:
-                        end_day = week_list[week]
-
-                    periods += [{
-                        "open":{
-                            "day": week_list[week], 
-                            "time": start.replace(':','')
-                        },
-                        "close":{
-                            "day": end_day, 
-                            "time": end.replace(':','')
-                        }
-                    }]
-            weekday_text += ["{}: {}".format(week, ', '.join(time_list))]
+                        start, end = time_.split('–')
+                        end_hour, end_min = end.split(':')
+                        start_hour, start_min = start.split(':')
+
+                        if end_hour < start_hour:
+                            end_day = week_list[week] + 1 
+                        else:
+                            end_day = week_list[week]
+
+                        periods += [{
+                            "open":{
+                                "day": week_list[week], 
+                                "time": start.replace(':','')
+                            },
+                            "close":{
+                                "day": end_day, 
+                                "time": end.replace(':','')
+                            }
+                        }]
+                weekday_text += ["{}: {}".format(week, ', '.join(time_list))]
     output['periods'] = str(periods)
     output['weekday_text'] = str(weekday_text)
 
@@ -262,6 +264,7 @@ def get_reviews(driver, output):
 
 
 def find_photo_list(driver):
+    time.sleep(2)
     wait = WebDriverWait(driver, 60)
     wait.until(
         EC.element_to_be_clickable((By.XPATH, '//*[@id="pane"]/div/div[1]/div/div/div[3]/div[1]/div[1]/div/a'))
@@ -275,7 +278,7 @@ def find_photo_list(driver):
             actions.move_to_element(element).perform()
         except:
             break
-        
+
     photo_soup = BeautifulSoup(driver.page_source, 'html.parser')
     photo_url = []
     for photo_id in count_list:
@@ -358,6 +361,23 @@ def data_select_insert(db, table_name, table_col, data):
     DA.mysql_insert_data(db, insert_sql)
 
 
+def time_click(driver):
+    status = ''
+    try:
+        time_css =  "span[aria-label='顯示本週營業時間']"
+        element = driver.find_element_by_css_selector(time_css)
+        driver.implicitly_wait(30)
+        ActionChains(driver).move_to_element(element).click(element).perform()
+        status = '正常'
+
+    except NoSuchElementException:
+        time_css =  "div[aria-expanded='false']"
+        elem = driver.find_element_by_css_selector(time_css)
+        if elem:
+            status = '暫時關閉'
+    return status
+
+
 def main():
     data = pd.read_csv('lat_long_location.csv', index_col = 0)
     tmp = data.iloc[0]
@@ -379,16 +399,12 @@ def main():
     for item_url, name in url_list:
         print(name, ': ' ,item_url)
         driver.get(item_url)
+        for i in range(4, 26, 2):
+            element = driver.find_element_by_xpath('//*[@id="pane"]/div/div[1]/div/div/div[{}]'.format(i))
+            actions = ActionChains(driver)
+            actions.move_to_element(element).perform()
 
-        wait = WebDriverWait(driver, 120)
-        time_css =  "span[aria-label='顯示本週營業時間']"
-        wait.until(
-            EC.element_to_be_clickable((By.CSS_SELECTOR, time_css))
-        )
-        element = driver.find_element_by_css_selector(time_css)
-        driver.implicitly_wait(30)
-        ActionChains(driver).move_to_element(element).click(element).perform()
-
+        time_status = time_click(driver)
         time.sleep(1)
         shop_soup = BeautifulSoup(driver.page_source, 'html.parser')