noodles 3 years ago
parent
commit
64bfe66b3c
2 changed files with 14 additions and 13 deletions
  1. 12 12
      run.py
  2. 2 1
      shop_item_list.py

+ 12 - 12
run.py

@@ -295,6 +295,8 @@ def find_photo_list(driver):
 def find_big_photo(output, driver):
     element = driver.find_element(By.CSS_SELECTOR, "div[aria-label='{}的相片']".format(output['name']))
     ActionChains(driver).move_to_element(element).click(element).perform()
+    output['shop_photo'] = '[]'
+    output['menu_photo'] = '[]'
     
     photo_map = {
         '全部': 'shop_photo',
@@ -391,19 +393,21 @@ def main():
     print('drvier start...')
     driver = brower_start()
     driver.get(url)
-    keyin_keyword(driver, '咖啡')
-    url_list = get_url_list(driver)
+    # keyin_keyword(driver, '咖啡')
+    # url_list = get_url_list(driver)
+    url_list = pd.read_csv('result/shop_item_list_20211210.csv', index_col=0)
 
-    result = []
     # try:
-    for item_url, name in url_list:
-        print(name, ': ' ,item_url)
+    for key, row in url_list.iterrows():
+        name = row['name']
+        item_url = row['item_url']
+        print(key, name, ': ' ,item_url)
         driver.get(item_url)
         for i in range(4, 26, 2):
             element = driver.find_element_by_xpath('//*[@id="pane"]/div/div[1]/div/div/div[{}]'.format(i))
             actions = ActionChains(driver)
             actions.move_to_element(element).perform()
-
+        print('start...')
         time_status = time_click(driver)
         time.sleep(1)
         shop_soup = BeautifulSoup(driver.page_source, 'html.parser')
@@ -428,13 +432,9 @@ def main():
         query_name = query_name.replace(' ','')
         output['query_name'] = query_name
         output['google_url'] = 'https://www.google.com.tw/search?q={}'.format(query_name)
-        time.sleep(2)
-        result += [output]
-        with open('result/20211207_{}.json'.format(name), 'w') as f:
-            json.dump(output, f)
-
+        time.sleep(1)
         data_select_insert(db, SHOP_LIST_TABLE, SHOP_LIST_TABLE_COL, output)
-        break
+
     # except:
         # shop_soup = BeautifulSoup(driver.page_source, 'html.parser')
         # print("error {}".format(id_))

+ 2 - 1
shop_item_list.py

@@ -64,12 +64,13 @@ def main():
     driver = brower_start()
 
     for k, row in data.iterrows():
+        # if k < 297:continue
         latitude = row['latitude'] #緯度
         longitude = row['longitude'] #精度
         url = 'https://www.google.com.tw/maps/@{},{},15z?hl=zh-TW'.format(latitude, longitude)
         driver.get(url)
 
-        keyword = '咖啡'
+        keyword = '火鍋'
         keyin_keyword(driver, keyword)
         
         for page in range(4):