noodles пре 3 година
родитељ
комит
33070eb4c9
1 измењених фајлова са 23 додато и 10 уклоњено
  1. 23 10
      run.py

+ 23 - 10
run.py

@@ -46,6 +46,7 @@ def brower_start(port):
 
     browser = webdriver.Remote(
         command_executor='http://127.0.0.1:'+str(port)+'/wd/hub',
+        # command_executor='http://192.53.174.202:'+str(port)+'/wd/hub',
         desired_capabilities=options.to_capabilities()
     )
     return browser
@@ -315,6 +316,10 @@ def find_photo_list(driver):
 
 def find_big_photo(output, driver):
     # element = driver.find_element(By.CSS_SELECTOR, "div[aria-label='{}的相片']".format(output['name']))
+    wait = WebDriverWait(driver, 60)
+    wait.until(
+        EC.element_to_be_clickable((By.XPATH, '//*[@id="pane"]/div/div[1]/div/div/div[1]/div[1]/button'))
+    )
     element = driver.find_element(By.XPATH, '//*[@id="pane"]/div/div[1]/div/div/div[1]/div[1]/button')
     ActionChains(driver).move_to_element(element).click(element).perform()
     output['shop_photo'] = '[]'
@@ -324,7 +329,8 @@ def find_big_photo(output, driver):
         '全部': 'shop_photo',
         '菜單': 'menu_photo'
     }
-    
+
+    driver.find_element(By.CSS_SELECTOR, "button[data-tab-index='1']")
     photo_soup = BeautifulSoup(driver.page_source, 'html.parser')
     tab_dict = {}
     for tab_index in [0, 1, 2]:
@@ -402,9 +408,12 @@ def time_click(driver):
             ActionChains(driver).move_to_element(element).click(element).perform()
             status = '正常'
         
-        elif len(shop_soup.select("img[aria-label='通知']")) != 0:
+        elif len(shop_soup_tmp.select("img[aria-label='通知']")) != 0:
             status = shop_soup_tmp.find('span',class_='LJKBpe-Tswv1b-text aSftqf').text
 #             status = '永久停業' or '暫時關閉'
+                
+        elif len(shop_soup_tmp.select('button[aria-label*="查看更詳細的營業時間"]')) != 0:
+            status = 'error'
             
         return status
     except:
@@ -491,7 +500,7 @@ def main():
     #driver = serive_create_linux(profilepath)
     
     for key, row in url_pd.iterrows():
-        # try:    
+        try:    
             name = row['name']
             item_url = row['item_url']
             print(key, name, ': ' ,item_url)
@@ -501,6 +510,10 @@ def main():
             page_down_(driver, "//div[@class='x3AX1-LfntMc-header-title-ij8cu']", 3)
             
             time_status = time_click(driver)
+            if time_status == 'error':
+                error_table_col = ['name', 'lon', 'lat', 'keyword', 'item_url', 'crawler_date']
+                data_select_insert(db, 'error_list', error_table_col, row)
+                continue
             time.sleep(0.5)
             shop_soup = BeautifulSoup(driver.page_source, 'html.parser')
 
@@ -513,7 +526,7 @@ def main():
             output = get_shop_info(driver, output, shop_soup)
 
             print('get_intro_info')
-            if len(shop_soup.select("div > [aria-label='{}簡介']".format(output['name']))) != 0:
+            if len(shop_soup.select("div[aria-label='{}簡介']".format(output['name']))) != 0:
                 output = get_intro_info(driver, output)
             else:
                 for key in intro_list:
@@ -543,12 +556,12 @@ def main():
             output['keyword'] = keyword
             output['google_url'] = 'https://www.google.com.tw/search?q={}'.format(query_name)
             data_select_insert(db, SHOP_LIST_TABLE, SHOP_LIST_TABLE_COL, output)
-
-        # except Exception as e:
-        #     print(e)
-        #     error_table_col = ['name', 'lon', 'lat', 'keyword', 'item_url', 'crawler_date']
-        #     data_select_insert(db, 'error_list', error_table_col, row)
-        #     time.sleep(2)
+            
+        except Exception as e:
+            print(e)
+            error_table_col = ['name', 'lon', 'lat', 'keyword', 'item_url', 'crawler_date']
+            data_select_insert(db, 'error_list', error_table_col, row)
+            time.sleep(1)
             # driver.close()
             # driver = brower_start(port)
             # driver = serive_create_linux(profilepath)