|
@@ -0,0 +1,62 @@
|
|
|
+# -*- coding: utf-8 -*-
|
|
|
+from selenium import webdriver
|
|
|
+from selenium.webdriver.common.action_chains import ActionChains
|
|
|
+from selenium.webdriver.common.keys import Keys
|
|
|
+from selenium.webdriver.support import expected_conditions as EC
|
|
|
+from selenium.webdriver.support.wait import WebDriverWait
|
|
|
+from selenium.webdriver.common.by import By
|
|
|
+
|
|
|
+from bs4 import BeautifulSoup
|
|
|
+
|
|
|
+from utility import database_access as DA
|
|
|
+from utility.parseutils import *
|
|
|
+from utility.connect import *
|
|
|
+import dataset
|
|
|
+import sys
|
|
|
+from datetime import datetime
|
|
|
+import pandas as pd
|
|
|
+import time
|
|
|
+import json
|
|
|
+import re, os
|
|
|
+
|
|
|
+def brower_start(port):
|
|
|
+ options = webdriver.ChromeOptions()
|
|
|
+# browser = webdriver.Chrome(options=options)
|
|
|
+
|
|
|
+ browser = webdriver.Remote(
|
|
|
+ command_executor='http://127.0.0.1:'+str(port)+'/wd/hub',
|
|
|
+ desired_capabilities=options.to_capabilities()
|
|
|
+ )
|
|
|
+ return browser
|
|
|
+
|
|
|
+
|
|
|
+def main():
|
|
|
+ port=4444
|
|
|
+ if len(sys.argv) >1:
|
|
|
+ port=int(sys.argv[1])
|
|
|
+ print('restart docker p{}'.format(port))
|
|
|
+ os.system('sudo docker container restart p'+str(port))
|
|
|
+ time.sleep(8)
|
|
|
+ print('drvier start...')
|
|
|
+ driver = brower_start(port)
|
|
|
+ db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/google_poi?charset=utf8mb4')
|
|
|
+ table=db['shop_list2']
|
|
|
+
|
|
|
+ for i in range(1):
|
|
|
+ result = db.query('select * from shop_list2 where google_id is null ORDER BY RAND() limit 20')
|
|
|
+ result = pd.DataFrame([i for i in result])
|
|
|
+
|
|
|
+ for key, group in result.iterrows():
|
|
|
+ unique_id = group['unique_id']
|
|
|
+ item_url = group['item_url']
|
|
|
+ url = url = 'view-source:' + item_url
|
|
|
+ driver.get(url)
|
|
|
+ time.sleep(0.5)
|
|
|
+ sourcetext = driver.page_source
|
|
|
+ google_id = re.findall('null,\\\\"ChIJ[a-zA-Z0-9-_+]*\\\\"', sourcetext)[0].replace('null,','').replace('\\','').replace('"','')
|
|
|
+ print(google_id)
|
|
|
+ table.upsert({'unique_id': unique_id,'google_id':google_id},['unique_id'])
|
|
|
+
|
|
|
+
|
|
|
+if __name__ == '__main__':
|
|
|
+ main()
|