| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162 | # -*- coding: utf-8 -*-from selenium import webdriverfrom selenium.webdriver.common.action_chains import ActionChainsfrom selenium.webdriver.common.keys import Keysfrom selenium.webdriver.support import expected_conditions as ECfrom selenium.webdriver.support.wait import WebDriverWaitfrom selenium.webdriver.common.by import Byfrom bs4 import BeautifulSoupfrom utility import database_access as DAfrom utility.parseutils import *from utility.connect import *import datasetimport sysfrom datetime import datetimeimport pandas as pdimport timeimport jsonimport re, osdef brower_start(port):    options = webdriver.ChromeOptions()#    browser = webdriver.Chrome(options=options)    browser = webdriver.Remote(        command_executor='http://127.0.0.1:'+str(port)+'/wd/hub',        desired_capabilities=options.to_capabilities()    )    return browserdef main():    port=4444    if len(sys.argv) >1:        port=int(sys.argv[1])        print('restart docker p{}'.format(port))        os.system('sudo docker container restart p'+str(port))        time.sleep(8)    print('drvier start...')    driver = brower_start(port)    db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/google_poi?charset=utf8mb4')    table=db['shop_list2']    for i in range(1):        result = db.query('select * from shop_list2 where google_id is null ORDER BY RAND() limit 20')        result = pd.DataFrame([i for i in result])        for key, group in result.iterrows():            unique_id = group['unique_id']            item_url = group['item_url']            url = url = 'view-source:' + item_url            driver.get(url)            time.sleep(0.5)            sourcetext = driver.page_source            google_id = re.findall('null,\\\\"ChIJ[a-zA-Z0-9-_+]*\\\\"', sourcetext)[0].replace('null,','').replace('\\','').replace('"','')            print(google_id)            table.upsert({'unique_id': unique_id,'google_id':google_id},['unique_id'])            if __name__ == '__main__':    main()
 |