noodles 3 years ago
parent
commit
5e08212582
3 changed files with 38 additions and 31 deletions
  1. 31 26
      shop_item_list.py
  2. 1 0
      start.sh
  3. 6 5
      utility/googlemapsearch.sql

+ 31 - 26
shop_item_list.py

@@ -35,7 +35,7 @@ def get_url_list(driver):
                 EC.element_to_be_clickable((By.XPATH, '//*[@id="pane"]/div/div[1]/div/div/div[2]/div[1]/div[{}]/div/a'.format(i)))
             )
             driver.find_element(By.XPATH,'//*[@id="pane"]/div/div[1]/div/div/div[2]/div[1]/div[{}]/div/a'.format(i)).send_keys(Keys.DOWN)
-            time.sleep(1)
+            time.sleep(0.5)
         except:
             pass
     url_soup = BeautifulSoup(driver.page_source, 'html.parser')
@@ -63,32 +63,37 @@ def main():
     print('drvier start...')
     driver = brower_start()
 
-    for k, row in data.iterrows():
-        # if k < 297:continue
-        latitude = row['latitude'] #緯度
-        longitude = row['longitude'] #精度
-        url = 'https://www.google.com.tw/maps/@{},{},15z?hl=zh-TW'.format(latitude, longitude)
-        driver.get(url)
+    for keyword in ['火鍋']:
+        for k, row in data.iterrows():
+            try:
+                latitude = row['latitude'] #緯度
+                longitude = row['longitude'] #精度
+                url = 'https://www.google.com.tw/maps/@{},{},15z?hl=zh-TW'.format(latitude, longitude)
+                driver.get(url)
+                keyin_keyword(driver, keyword)
+                
+                for page in range(4):
+                    print(keyword, k, row['location'], latitude, longitude, page)
+                    url_list = get_url_list(driver)
+                    
+                    shop_item_list_col = ['name','lon','lat','keyword','item_url','crawler_date']
+                    for item in url_list:
+                        result = [item[1], longitude, latitude, keyword, item[0], datetime.today().strftime("%Y/%m/%d %H:%M")]
+                        insert_sql = """INSERT IGNORE INTO {}{} VALUES {}"""\
+                                        .format('shop_item_list2', str(tuple(shop_item_list_col)).replace('\'',''), tuple(result))
 
-        keyword = '火鍋'
-        keyin_keyword(driver, keyword)
-        
-        for page in range(4):
-            print(k, row['location'], latitude, longitude, page)
-            url_list = get_url_list(driver)
-            
-            shop_item_list_col = ['name','lon','lat','keyword','item_url','crawler_date']
-            for item in url_list:
-                result = [item[1], longitude, latitude, keyword, item[0], datetime.today().strftime("%Y/%m/%d %H:%M")]
-                insert_sql = """INSERT IGNORE INTO {}{} VALUES {}"""\
-                                .format('shop_item_list', str(tuple(shop_item_list_col)).replace('\'',''), tuple(result))
+                        DA.mysql_insert_data(db, insert_sql)
+                    
+                    if page < 2 :
+                        element = driver.find_element_by_id('ppdPk-Ej1Yeb-LgbsSe-tJiF1e')
+                        driver.implicitly_wait(30)
+                        ActionChains(driver).move_to_element(element).click(element).perform() 
+            except:
+                error = pd.DataFrame([row])
+                error.to_csv('error_shop_item_list.csv', mode='a', header = False)
+                driver.close()
+                driver = brower_start()
 
-                DA.mysql_insert_data(db, insert_sql)
-            
-            if page < 2 :
-                element = driver.find_element_by_id('ppdPk-Ej1Yeb-LgbsSe-tJiF1e')
-                driver.implicitly_wait(30)
-                ActionChains(driver).move_to_element(element).click(element).perform() 
 
 if __name__ == '__main__':
-    main()
+    main()

+ 1 - 0
start.sh

@@ -0,0 +1 @@
+nohup python -u shop_item_list.py &

+ 6 - 5
utility/googlemapsearch.sql

@@ -34,20 +34,21 @@ CREATE TABLE `shop_list` (
    `menu_photo` JSON,
 
    `google_url` VARCHAR(200),
-   `query_name` VARCHAR(200),
+   `item_url` VARCHAR(200),
+   `keyword` VARCHAR(20),
    `crawler_date` char(20) NOT NULL,
    PRIMARY KEY (`id`),
-   UNIQUE KEY (`query_name`)
+   UNIQUE KEY (`item_url`)
 ) ENGINE=InnoDB DEFAULT CHARSET=utf8;
 
 
-CREATE TABLE `shop_item_list` (
+CREATE TABLE `shop_item_list2` (
    `id` int NOT NULL AUTO_INCREMENT,
    `name` VARCHAR(100),
    `lon` DOUBLE,
    `lat` DOUBLE,
    `keyword` VARCHAR(20),
-   `item_url` VARCHAR(200),
+   `item_url` VARCHAR(600),
    `crawler_date` char(20) NOT NULL,
    PRIMARY KEY (`id`),
    UNIQUE KEY (`item_url`)
@@ -87,4 +88,4 @@ CREATE TABLE `shop_reviews_photo_list` (
    `crawler_date` char(20) NOT NULL,
    PRIMARY KEY (`id`),
    UNIQUE KEY (`id`,`google_url`)
-) ENGINE=InnoDB DEFAULT CHARSET=utf8;
+) ENGINE=InnoDB DEFAULT CHARSET=utf8;