|
@@ -68,7 +68,7 @@ def brower_start(port):
|
|
|
# browser = webdriver.Chrome(options=options)
|
|
|
options.add_argument('--ignore-certificate-errors')
|
|
|
options.add_argument("--no-sandbox")
|
|
|
- options.add_argument("--headless")
|
|
|
+# options.add_argument("--headless")
|
|
|
options.add_argument("--disable-gpu")
|
|
|
options.add_argument("--disable-dev-shm-usage")
|
|
|
browser = webdriver.Chrome(options=options)
|
|
@@ -584,15 +584,18 @@ def main():
|
|
|
# port=int(sys.argv[2])
|
|
|
if len(sys.argv) > 1 :
|
|
|
port=int(sys.argv[1])
|
|
|
- print('restart docker p{}'.format(port))
|
|
|
- os.system('sudo docker container restart p'+str(port))
|
|
|
- time.sleep(8)
|
|
|
+# print('restart docker p{}'.format(port))
|
|
|
+# os.system('sudo docker container restart p'+str(port))
|
|
|
+# time.sleep(8)
|
|
|
else:
|
|
|
port = 2
|
|
|
|
|
|
for i in range(10):
|
|
|
- result = db2.query('select * from swire_store_list where check_ is null and fid not in (select distinct fid from error_list2) ORDER BY RAND() limit 500')
|
|
|
+# result = db2.query('select * from swire_store_list where check_ is null and fid not in (select distinct fid from error_list2) ORDER BY RAND() limit 500')
|
|
|
+ result = db2.query('SELECT * FROM swire_store_list a WHERE not exists (select 1 from error_list2 tei where tei.fid = a.fid limit 1 ) ORDER BY RAND() limit 500')
|
|
|
+
|
|
|
url_pd = pd.DataFrame([dict(i) for i in result])
|
|
|
+# print(url_pd)
|
|
|
url_pd['item_url'] = url_pd['fid'].apply(lambda x: 'https://www.google.com.tw/maps/@24.1753633,120.6747136,15z/data=!4m5!3m4!1s{}!8m2!3d24.1760271!4d120.6705323'.format(x))
|
|
|
|
|
|
# keyword = get_new_keyword(db2)
|
|
@@ -615,6 +618,7 @@ def main():
|
|
|
|
|
|
print('start...')
|
|
|
driver.get(item_url)
|
|
|
+ time.sleep(9999)
|
|
|
# page_down_(driver, "//div[@class='x3AX1-LfntMc-header-title-ij8cu']", 3)
|
|
|
page_down_(driver, "//div[@class='x3AX1-LfntMc-header-title-ij8cu-haAclf']", 3)
|
|
|
|
|
@@ -667,6 +671,7 @@ def main():
|
|
|
data_select_insert(db, SHOP_LIST_TABLE, SHOP_LIST_TABLE_COL, output)
|
|
|
table2.upsert({'place_id':row['place_id'],'check_':1},['place_id'])
|
|
|
except Exception as e:
|
|
|
+ traceback.print_exc()
|
|
|
table3 = db2['error_list2']
|
|
|
table3.insert({'fid':row['fid'],'num':row['name'],'keyword':row['keyword'],'item_url':row['item_url'],'crawler_date':datetime.today().strftime("%Y/%m/%d %H:%M")})
|
|
|
print(e)
|