noodles 3 anni fa
parent
commit
90e4f2fb88
1 ha cambiato i file con 50 aggiunte e 51 eliminazioni
  1. 50 51
      shop_item_list.py

+ 50 - 51
shop_item_list.py

@@ -136,62 +136,61 @@ def main():
         print('restart docker p{}'.format(port))
         os.system('sudo docker container restart p'+str(port))
         time.sleep(8)
+
 #     if len(sys.argv) >2:
 #         port=int(sys.argv[2])
 
-    print('drvier start...')
-    driver = brower_start(port)
-#     db = DA.mysql_connect(MYSQL_CONFIG, DB_NAME)
-    db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/google_poi?charset=utf8mb4')
-    table=db['shop_item_list2']
-    table2=db['progress_list2']
+    for i in range(3):
+        print('drvier start...')
+        driver = brower_start(port)
+    #     db = DA.mysql_connect(MYSQL_CONFIG, DB_NAME)
+        db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/google_poi?charset=utf8mb4')
+        table=db['shop_item_list2']
+        table2=db['progress_list2']
     
-    keyword  = get_crawler_list(db)
-    print(keyword)
+        keyword  = get_crawler_list(db)
+        print(keyword)
+
+        c = 0
+        for row in lon_lat:
+            c += 1
+            try:
+                # latitude = row['lat'] #緯度
+                # longitude = row['lon'] #精度
+                latitude = row[1] #緯度
+                longitude = row[0] #精度
+                # table2.upsert({'kw':keyword,'num':row['num']},['kw'])
+                table2.insert({'kw':keyword,'num':c})
+
+                url = 'https://www.google.com.tw/maps/@{},{},15z?hl=zh-TW'.format(latitude, longitude)
+                driver.get(url)
+                keyin_keyword(driver, keyword)
+                failcnt = 0
+                for page in range(5):
+                    print(keyword, row['loc'], latitude, longitude, page)
+                    url_list = get_url_list(driver)
+                    duplicate = 0
+                    # shop_item_list_col = ['name','lon','lat','keyword','item_url','crawler_date']
+                    for item in url_list:
+                        try:
+                            table.insert({'name':item[1],'lon':longitude, 'lat':latitude, \
+                                        'keyword':keyword, 'item_url':item[0],'crawler_date':datetime.today().strftime("%Y/%m/%d %H:%M")})
+                        except:
+                            duplicate += 1
+                    print(len(url_list), duplicate)
+        #                     result = [item[1], longitude, latitude, keyword, item[0], datetime.today().strftime("%Y/%m/%d %H:%M")]
+        #                     insert_sql = """INSERT IGNORE INTO {}{} VALUES {}"""\
+        #                                     .format('shop_item_list', str(tuple(shop_item_list_col)).replace('\'',''), tuple(result))
+
+        #                     DA.mysql_insert_data(db, insert_sql)
+
+                    if page < 2 :
+                        element = driver.find_element_by_id('ppdPk-Ej1Yeb-LgbsSe-tJiF1e')
+                        driver.implicitly_wait(30)
+                        ActionChains(driver).move_to_element(element).click(element).perform() 
+            except:
+                pass
 
-    c = 0
-    for row in lon_lat:
-        c += 1
-        try:
-            # latitude = row['lat'] #緯度
-            # longitude = row['lon'] #精度
-            latitude = row[1] #緯度
-            longitude = row[0] #精度
-            # table2.upsert({'kw':keyword,'num':row['num']},['kw'])
-            table2.insert({'kw':keyword,'num':c})
-
-            url = 'https://www.google.com.tw/maps/@{},{},15z?hl=zh-TW'.format(latitude, longitude)
-            driver.get(url)
-            keyin_keyword(driver, keyword)
-            failcnt = 0
-            for page in range(5):
-                print(keyword, row['loc'], latitude, longitude, page)
-                url_list = get_url_list(driver)
-                duplicate = 0
-                # shop_item_list_col = ['name','lon','lat','keyword','item_url','crawler_date']
-                for item in url_list:
-                    try:
-                        table.insert({'name':item[1],'lon':longitude, 'lat':latitude, \
-                                      'keyword':keyword, 'item_url':item[0],'crawler_date':datetime.today().strftime("%Y/%m/%d %H:%M")})
-                    except:
-                        duplicate += 1
-                print(len(url_list), duplicate)
-    #                     result = [item[1], longitude, latitude, keyword, item[0], datetime.today().strftime("%Y/%m/%d %H:%M")]
-    #                     insert_sql = """INSERT IGNORE INTO {}{} VALUES {}"""\
-    #                                     .format('shop_item_list', str(tuple(shop_item_list_col)).replace('\'',''), tuple(result))
-
-    #                     DA.mysql_insert_data(db, insert_sql)
-
-                if page < 2 :
-                    element = driver.find_element_by_id('ppdPk-Ej1Yeb-LgbsSe-tJiF1e')
-                    driver.implicitly_wait(30)
-                    ActionChains(driver).move_to_element(element).click(element).perform() 
-        except:
-            pass
-#             error = pd.DataFrame([row])
-#             error.to_csv('error_shop_item_list.csv', mode='a', header = False)
-            #driver.close()
-            #driver = brower_start()
 
 
 if __name__ == '__main__':