noodles 3 anni fa
parent
commit
23ce737db0
1 ha cambiato i file con 44 aggiunte e 42 eliminazioni
  1. 44 42
      shop_item_list.py

+ 44 - 42
shop_item_list.py

@@ -134,7 +134,7 @@ def main():
     if len(sys.argv) > 1 :
         port=int(sys.argv[1])
         print('restart docker p{}'.format(port))
-        os.system('sudo docker container restart p'+str(port))
+        os.system('sudo docker container restart pp'+str(port))
         time.sleep(8)
 
 #     if len(sys.argv) >2:
@@ -146,47 +146,49 @@ def main():
     table=db['shop_item_list3']
     table2=db['progress_list2']
 
-    for i in range(3):
-        keyword  = get_crawler_list(db)
-        print(keyword)
-
-        c = 0
-        for row in lon_lat:
-            c += 1
-            # latitude = row['lat'] #緯度
-            # longitude = row['lon'] #精度
-            latitude = row[1] #緯度
-            longitude = row[0] #精度
-            # table2.upsert({'kw':keyword,'num':row['num']},['kw'])
-            table2.insert({'kw':keyword,'num':c})
-
-            url = 'https://www.google.com.tw/maps/@{},{},15z?hl=zh-TW'.format(latitude, longitude)
-            driver.get(url)
-            keyin_keyword(driver, keyword)
-            failcnt = 0
-            for page in range(5):
-                print(keyword, latitude, longitude, page)
-                url_list = get_url_list(driver)
-                duplicate = 0
-                # shop_item_list_col = ['name','lon','lat','keyword','item_url','crawler_date']
-                for item in url_list:
-                    try:
-                        table.insert({'name':item[1],'lon':longitude, 'lat':latitude, \
-                                    'keyword':keyword, 'item_url':item[0],'crawler_date':datetime.today().strftime("%Y/%m/%d %H:%M")})
-                    except:
-                        duplicate += 1
-                print(len(url_list), duplicate)
-    #                     result = [item[1], longitude, latitude, keyword, item[0], datetime.today().strftime("%Y/%m/%d %H:%M")]
-    #                     insert_sql = """INSERT IGNORE INTO {}{} VALUES {}"""\
-    #                                     .format('shop_item_list', str(tuple(shop_item_list_col)).replace('\'',''), tuple(result))
-
-    #                     DA.mysql_insert_data(db, insert_sql)
-
-                if page < 2 :
-                    element = driver.find_element_by_id('ppdPk-Ej1Yeb-LgbsSe-tJiF1e')
-                    driver.implicitly_wait(30)
-                    ActionChains(driver).move_to_element(element).click(element).perform() 
-
+    for i in range(20):
+        try:
+            keyword  = get_crawler_list(db)
+            print(keyword)
+
+            c = 0
+            for row in lon_lat:
+                c += 1
+                # latitude = row['lat'] #緯度
+                # longitude = row['lon'] #精度
+                latitude = row[1] #緯度
+                longitude = row[0] #精度
+                # table2.upsert({'kw':keyword,'num':row['num']},['kw'])
+                table2.insert({'kw':keyword,'num':c})
+
+                url = 'https://www.google.com.tw/maps/@{},{},15z?hl=zh-TW'.format(latitude, longitude)
+                driver.get(url)
+                keyin_keyword(driver, keyword)
+                failcnt = 0
+                for page in range(5):
+                    print(keyword, latitude, longitude, page)
+                    url_list = get_url_list(driver)
+                    duplicate = 0
+                    # shop_item_list_col = ['name','lon','lat','keyword','item_url','crawler_date']
+                    for item in url_list:
+                        try:
+                            table.insert({'name':item[1],'lon':longitude, 'lat':latitude, \
+                                        'keyword':keyword, 'item_url':item[0],'crawler_date':datetime.today().strftime("%Y/%m/%d %H:%M")})
+                        except:
+                            duplicate += 1
+                    print(len(url_list), duplicate)
+        #                     result = [item[1], longitude, latitude, keyword, item[0], datetime.today().strftime("%Y/%m/%d %H:%M")]
+        #                     insert_sql = """INSERT IGNORE INTO {}{} VALUES {}"""\
+        #                                     .format('shop_item_list', str(tuple(shop_item_list_col)).replace('\'',''), tuple(result))
+
+        #                     DA.mysql_insert_data(db, insert_sql)
+
+                    if page < 2 :
+                        element = driver.find_element_by_id('ppdPk-Ej1Yeb-LgbsSe-tJiF1e')
+                        driver.implicitly_wait(30)
+                        ActionChains(driver).move_to_element(element).click(element).perform() 
+        except:
+            pass
 
 
 if __name__ == '__main__':