noodles 3 anni fa
parent
commit
d4932bd8d6
1 ha cambiato i file con 39 aggiunte e 43 eliminazioni
  1. 39 43
      shop_item_list.py

+ 39 - 43
shop_item_list.py

@@ -139,57 +139,53 @@ def main():
 
 #     if len(sys.argv) >2:
 #         port=int(sys.argv[2])
+    print('drvier start...')
+    driver = brower_start(port)
+    #     db = DA.mysql_connect(MYSQL_CONFIG, DB_NAME)
+    db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/google_poi?charset=utf8mb4')
+    table=db['shop_item_list3']
+    table2=db['progress_list2']
 
     for i in range(3):
-        print('drvier start...')
-        driver = brower_start(port)
-    #     db = DA.mysql_connect(MYSQL_CONFIG, DB_NAME)
-        db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/google_poi?charset=utf8mb4')
-        table=db['shop_item_list2']
-        table2=db['progress_list2']
-    
         keyword  = get_crawler_list(db)
         print(keyword)
 
         c = 0
         for row in lon_lat:
             c += 1
-            try:
-                # latitude = row['lat'] #緯度
-                # longitude = row['lon'] #精度
-                latitude = row[1] #緯度
-                longitude = row[0] #精度
-                # table2.upsert({'kw':keyword,'num':row['num']},['kw'])
-                table2.insert({'kw':keyword,'num':c})
-
-                url = 'https://www.google.com.tw/maps/@{},{},15z?hl=zh-TW'.format(latitude, longitude)
-                driver.get(url)
-                keyin_keyword(driver, keyword)
-                failcnt = 0
-                for page in range(5):
-                    print(keyword, row['loc'], latitude, longitude, page)
-                    url_list = get_url_list(driver)
-                    duplicate = 0
-                    # shop_item_list_col = ['name','lon','lat','keyword','item_url','crawler_date']
-                    for item in url_list:
-                        try:
-                            table.insert({'name':item[1],'lon':longitude, 'lat':latitude, \
-                                        'keyword':keyword, 'item_url':item[0],'crawler_date':datetime.today().strftime("%Y/%m/%d %H:%M")})
-                        except:
-                            duplicate += 1
-                    print(len(url_list), duplicate)
-        #                     result = [item[1], longitude, latitude, keyword, item[0], datetime.today().strftime("%Y/%m/%d %H:%M")]
-        #                     insert_sql = """INSERT IGNORE INTO {}{} VALUES {}"""\
-        #                                     .format('shop_item_list', str(tuple(shop_item_list_col)).replace('\'',''), tuple(result))
-
-        #                     DA.mysql_insert_data(db, insert_sql)
-
-                    if page < 2 :
-                        element = driver.find_element_by_id('ppdPk-Ej1Yeb-LgbsSe-tJiF1e')
-                        driver.implicitly_wait(30)
-                        ActionChains(driver).move_to_element(element).click(element).perform() 
-            except:
-                pass
+            # latitude = row['lat'] #緯度
+            # longitude = row['lon'] #精度
+            latitude = row[1] #緯度
+            longitude = row[0] #精度
+            # table2.upsert({'kw':keyword,'num':row['num']},['kw'])
+            table2.insert({'kw':keyword,'num':c})
+
+            url = 'https://www.google.com.tw/maps/@{},{},15z?hl=zh-TW'.format(latitude, longitude)
+            driver.get(url)
+            keyin_keyword(driver, keyword)
+            failcnt = 0
+            for page in range(5):
+                print(keyword, latitude, longitude, page)
+                url_list = get_url_list(driver)
+                duplicate = 0
+                # shop_item_list_col = ['name','lon','lat','keyword','item_url','crawler_date']
+                for item in url_list:
+                    try:
+                        table.insert({'name':item[1],'lon':longitude, 'lat':latitude, \
+                                    'keyword':keyword, 'item_url':item[0],'crawler_date':datetime.today().strftime("%Y/%m/%d %H:%M")})
+                    except:
+                        duplicate += 1
+                print(len(url_list), duplicate)
+    #                     result = [item[1], longitude, latitude, keyword, item[0], datetime.today().strftime("%Y/%m/%d %H:%M")]
+    #                     insert_sql = """INSERT IGNORE INTO {}{} VALUES {}"""\
+    #                                     .format('shop_item_list', str(tuple(shop_item_list_col)).replace('\'',''), tuple(result))
+
+    #                     DA.mysql_insert_data(db, insert_sql)
+
+                if page < 2 :
+                    element = driver.find_element_by_id('ppdPk-Ej1Yeb-LgbsSe-tJiF1e')
+                    driver.implicitly_wait(30)
+                    ActionChains(driver).move_to_element(element).click(element).perform()