3 vuotta sitten · 86dde79e59
--- a/get_google_id.py
+++ b/get_google_id.py
@@ -0,0 +1,62 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+from selenium import webdriver
			
 
				+from selenium.webdriver.common.action_chains import ActionChains
			
 
				+from selenium.webdriver.common.keys import Keys
			
 
				+from selenium.webdriver.support import expected_conditions as EC
			
 
				+from selenium.webdriver.support.wait import WebDriverWait
			
 
				+from selenium.webdriver.common.by import By
			
 
				+
			
 
				+from bs4 import BeautifulSoup
			
 
				+
			
 
				+from utility import database_access as DA
			
 
				+from utility.parseutils import *
			
 
				+from utility.connect import *
			
 
				+import dataset
			
 
				+import sys
			
 
				+from datetime import datetime
			
 
				+import pandas as pd
			
 
				+import time
			
 
				+import json
			
 
				+import re, os
			
 
				+
			
 
				+def brower_start(port):
			
 
				+    options = webdriver.ChromeOptions()
			
 
				+#    browser = webdriver.Chrome(options=options)
			
 
				+
			
 
				+    browser = webdriver.Remote(
			
 
				+        command_executor='http://127.0.0.1:'+str(port)+'/wd/hub',
			
 
				+        desired_capabilities=options.to_capabilities()
			
 
				+    )
			
 
				+    return browser
			
 
				+
			
 
				+
			
 
				+def main():
			
 
				+    port=4444
			
 
				+    if len(sys.argv) >1:
			
 
				+        port=int(sys.argv[1])
			
 
				+        print('restart docker p{}'.format(port))
			
 
				+        os.system('sudo docker container restart p'+str(port))
			
 
				+        time.sleep(8)
			
 
				+    print('drvier start...')
			
 
				+    driver = brower_start(port)
			
 
				+    db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/google_poi?charset=utf8mb4')
			
 
				+    table=db['shop_list2']
			
 
				+
			
 
				+    for i in range(1):
			
 
				+        result = db.query('select * from shop_list2 where google_id is null ORDER BY RAND() limit 20')
			
 
				+        result = pd.DataFrame([i for i in result])
			
 
				+
			
 
				+        for key, group in result.iterrows():
			
 
				+            unique_id = group['unique_id']
			
 
				+            item_url = group['item_url']
			
 
				+            url = url = 'view-source:' + item_url
			
 
				+            driver.get(url)
			
 
				+            time.sleep(0.5)
			
 
				+            sourcetext = driver.page_source
			
 
				+            google_id = re.findall('null,\\\\"ChIJ[a-zA-Z0-9-_+]*\\\\"', sourcetext)[0].replace('null,','').replace('\\','').replace('"','')
			
 
				+            print(google_id)
			
 
				+            table.upsert({'unique_id': unique_id,'google_id':google_id},['unique_id'])
			
 
				+            
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    main()
			
--- a/run.py
+++ b/run.py
@@ -14,11 +14,14 @@ from utility.parseutils import *
 
				 from utility.connect import *
			
 
				 
			
 
				 from datetime import datetime
			
 
				+import traceback
			
 
				 import dataset
			
 
				 import pandas as pd
			
 
				 import time
			
 
				 import json
			
 
				 import re
			
 
				+import sys
			
 
				+import os
			
 
				 # import pyautogui as pag
			
 
				 
			
 
				 def serive_create(profilepath):
			
@@ -26,10 +29,10 @@ def serive_create(profilepath):
 
				 
			
 
				     option.add_argument('--disable-web-security')
			
 
				     option.add_argument('--allow-running-insecure-content') 
			
 
				-    option.add_argument("--user-data-dir=C:\\Users\\noodles\\AppData\\Local\\Google\\Chrome\\User Data")
			
 
				+    option.add_argument("--user-data-dir=C:\\Users\\user\\AppData\\Local\\Google\\Chrome\\User Data")
			
 
				     option.add_argument("profile-directory="+profilepath)
			
 
				 
			
 
				-    driver = webdriver.Chrome('./utility/chromedriver_20211103/chromedriver', options=option)
			
 
				+    driver = webdriver.Chrome('./utility/chromedriver_win32/chromedriver', options=option)
			
 
				     executor_url = driver.command_executor._url
			
 
				     session_id = driver.session_id
			
 
				     print (session_id)
			
@@ -38,10 +41,13 @@ def serive_create(profilepath):
 
				     
			
 
				     return driver
			
 
				 
			
 
				-def brower_start():
			
 
				+def brower_start(port):
			
 
				     options = webdriver.ChromeOptions()
			
 
				+#    browser = webdriver.Chrome(options=options)
			
 
				+
			
 
				     browser = webdriver.Remote(
			
 
				-        command_executor='http://192.53.174.202:4444/wd/hub',
			
 
				+        command_executor='http://127.0.0.1:'+str(port)+'/wd/hub',
			
 
				+        # command_executor='http://192.53.174.202:'+str(port)+'/wd/hub',
			
 
				         desired_capabilities=options.to_capabilities()
			
 
				     )
			
 
				     return browser
			
@@ -62,7 +68,7 @@ def open_time(driver):
 
				     element = driver.find_element_by_xpath('//*[@id="pane"]/div/div[1]/div/div/div[9]/div[2]')
			
 
				     if element.text.find('預訂') == -1:
			
 
				         element = driver.find_element_by_xpath('//*[@id="pane"]/div/div[1]/div/div/div[9]/div[2]')
			
 
				-        driver.implicitly_wait(20)
			
 
				+        driver.implicitly_wait(10)
			
 
				         ActionChains(driver).move_to_element(element).click(element).perform()
			
 
				         return 1
			
 
				     else:
			
@@ -70,69 +76,85 @@ def open_time(driver):
 
				 
			
 
				 
			
 
				 def get_shop_info(driver, output, shop_soup):
			
 
				-    current_url_split = driver.current_url.split('@')[1].split(',')
			
 
				-    output['lon'] = current_url_split[1]
			
 
				-    output['lat'] = current_url_split[0]
			
 
				+    # current_url_split = driver.current_url.split('@')[1].split(',')
			
 
				+    # output['lon'] = current_url_split[1]
			
 
				+    # output['lat'] = current_url_split[0]
			
 
				     
			
 
				     location = shop_soup.find('button',{'data-item-id':'oloc'})['aria-label'].split(' ')
			
 
				     output['city'] = location[-1]
			
 
				     output['area'] = location[-2]
			
 
				-
			
 
				-    output['addr'] = shop_soup.find('button',{'data-item-id':'address'})['aria-label'].replace('地址:', '')
			
 
				-    output['tel'] = blank_check(shop_soup.find('button',{'data-tooltip':'複製電話號碼'})['aria-label'].split(':')[1])
			
 
				+    
			
 
				+    try:
			
 
				+        output['addr'] = shop_soup.find('button',{'data-item-id':'address'})['aria-label'].replace('地址:', '')
			
 
				+    except:
			
 
				+        output['addr'] = ''
			
 
				+        
			
 
				+    try:
			
 
				+        output['tel'] = blank_check(shop_soup.find('button',{'data-tooltip':'複製電話號碼'})['aria-label'].split(':')[1])
			
 
				+    except:
			
 
				+        output['tel'] = ''
			
 
				     print(output['addr'], ', ' ,output['tel'])
			
 
				 
			
 
				     for key in element_list:
			
 
				-        element = element_list[key]
			
 
				-        if len(element) == 3:
			
 
				-            value = shop_soup.find(element[0],element[1])[element[2]]
			
 
				-            
			
 
				-        else:
			
 
				-            tmp_value = shop_soup.find(element[0],element[1])
			
 
				-            if tmp_value:
			
 
				-                value = tmp_value.text
			
 
				+        try:
			
 
				+            element = element_list[key]
			
 
				+            if len(element) == 3:
			
 
				+                value = shop_soup.find(element[0],element[1])[element[2]]
			
 
				+
			
 
				             else:
			
 
				-                value = ''
			
 
				+                tmp_value = shop_soup.find(element[0],element[1])
			
 
				+                if tmp_value:
			
 
				+                    value = tmp_value.text
			
 
				+                else:
			
 
				+                    value = ''
			
 
				 
			
 
				-        output[key] = value_check(key, value)
			
 
				+            output[key] = value_check(key, value)
			
 
				+        except:
			
 
				+            output[key] = ''
			
 
				 
			
 
				     return output
			
 
				 
			
 
				 
			
 
				 def get_intro_info(driver, output):
			
 
				-    element = driver.find_element_by_xpath('//*[@id="pane"]/div/div[1]/div/div/div[6]')
			
 
				-    driver.implicitly_wait(20)
			
 
				-    ActionChains(driver).move_to_element(element).click(element).perform()
			
 
				-    
			
 
				-    for i in range(5, 35, 3):
			
 
				-        try:
			
 
				-            element = driver.find_element(By.XPATH,'//*[@id="pane"]/div/div[1]/div/div/div[2]/div[{}]'.format(i))
			
 
				-            actions = ActionChains(driver)
			
 
				-            actions.move_to_element(element).perform()
			
 
				-        except:
			
 
				-            break
			
 
				-    
			
 
				-    intro_soup = BeautifulSoup(driver.page_source, 'html.parser')
			
 
				-    for key in intro_list:
			
 
				-        elements = intro_soup.find('div',{'aria-label':key})
			
 
				-        if elements:
			
 
				-            element = elements.find_all('li',{'class':'LQjNnc-p83tee-JNdkSc-ibnC6b'})
			
 
				-            count = 0
			
 
				-            tmp = []
			
 
				-            for ele in element:
			
 
				-                # if ele.find('img',{'src':"//www.gstatic.com/images/icons/material/system_gm/2x/check_black_18dp.png"}):
			
 
				-                if ele.find('img',{'src':"//www.gstatic.com/images/icons/material/system_gm/1x/check_black_18dp.png"}):
			
 
				-                    tmp += [{
			
 
				-                        'id':count,
			
 
				-                        intro_list[key][1]: blank_check(ele.text)
			
 
				-                    }]
			
 
				-                    count += 1
			
 
				-            print(str(tmp))
			
 
				-            output[intro_list[key][0]] = str(tmp)
			
 
				-        else:
			
 
				+    # element = driver.find_element_by_xpath('//*[@id="pane"]/div/div[1]/div/div/div[6]')
			
 
				+    try:
			
 
				+        element = driver.find_element(By.CSS_SELECTOR, "div[aria-label='{}簡介']".format(output['name']))
			
 
				+        driver.implicitly_wait(5)
			
 
				+        ActionChains(driver).move_to_element(element).click(element).perform()
			
 
				+
			
 
				+        # pageSource = driver.page_source
			
 
				+        # fileToWrite = open("page_source.html", "w")
			
 
				+        # fileToWrite.write(pageSource)
			
 
				+        # fileToWrite.close()
			
 
				+
			
 
				+        page_down_(driver, '//*[@id="pane"]/div/div[1]', 3)
			
 
				+
			
 
				+        intro_soup = BeautifulSoup(driver.page_source, 'html.parser')
			
 
				+        for key in intro_list:
			
 
				+            elements = intro_soup.find('div',{'aria-label':key})
			
 
				+            if elements:
			
 
				+                element = elements.find_all('li',{'class':'LQjNnc-p83tee-JNdkSc-ibnC6b'})
			
 
				+                count = 0
			
 
				+                tmp = []
			
 
				+                for ele in element:
			
 
				+                    # if ele.find('img',{'src':"//www.gstatic.com/images/icons/material/system_gm/2x/check_black_18dp.png"}):
			
 
				+                    if ele.find('img',{'src':"//www.gstatic.com/images/icons/material/system_gm/1x/check_black_18dp.png"}):
			
 
				+                        tmp += [{
			
 
				+                            'id':count,
			
 
				+                            intro_list[key][1]: blank_check(ele.text)
			
 
				+                        }]
			
 
				+                        count += 1
			
 
				+                print(str(tmp))
			
 
				+                output[intro_list[key][0]] = str(tmp)
			
 
				+            else:
			
 
				+                output[intro_list[key][0]] = '[]'
			
 
				+        driver.back()
			
 
				+        return output
			
 
				+
			
 
				+    except:
			
 
				+        for key in intro_list:
			
 
				             output[intro_list[key][0]] = '[]'
			
 
				-    driver.back()
			
 
				-    return output
			
 
				+        return output
			
 
				 
			
 
				 
			
 
				 def get_time_list(shop_soup, output):
			
@@ -205,19 +227,26 @@ def get_reviews(driver, output):
 
				         EC.element_to_be_clickable((By.CSS_SELECTOR, more_reviews_css))
			
 
				     )
			
 
				     element = driver.find_element_by_css_selector(more_reviews_css)
			
 
				-    driver.implicitly_wait(20)
			
 
				+    driver.implicitly_wait(10)
			
 
				     ActionChains(driver).move_to_element(element).click(element).perform()
			
 
				-    time.sleep(1)
			
 
				+    time.sleep(0.5)
			
 
				 
			
 
				-    all_photo = driver.find_elements_by_class_name('ODSEW-ShBeI-xJzy8c-bF1uUb')
			
 
				-    for ap in all_photo:
			
 
				-        ap.click()
			
 
				+    # page_down_(driver, '//*[@id="pane"]/div/div[1]/div/div/div[2]/div[1]', 5)
			
 
				+    page_down_(driver, '//div[@class="PPCwl"]',5)
			
 
				 
			
 
				-    all_review = driver.find_elements_by_css_selector('button[aria-label="顯示更多"')
			
 
				-    for ap in all_review:
			
 
				-        ap.click()
			
 
				+    comment_soup = BeautifulSoup(driver.page_source, 'html.parser')
			
 
				+    if comment_soup.find_all('div',class_='ODSEW-ShBeI-xJzy8c-bF1uUb') != 0:
			
 
				+        all_photo = driver.find_elements_by_class_name('ODSEW-ShBeI-xJzy8c-bF1uUb')
			
 
				+        for ap in all_photo:
			
 
				+            ap.click()
			
 
				+
			
 
				+    if comment_soup.select('button[aria-label="顯示更多"]') != 0:
			
 
				+        all_review = driver.find_elements_by_css_selector('button[aria-label="顯示更多"]')
			
 
				+        for ap in all_review:
			
 
				+            ap.click()
			
 
				 
			
 
				     comment_soup = BeautifulSoup(driver.page_source, 'html.parser')
			
 
				+
			
 
				     count = 0
			
 
				     reviews = []
			
 
				     for comment in comment_soup.find_all('div',{'class':'ODSEW-ShBeI'}):
			
@@ -264,36 +293,35 @@ def get_reviews(driver, output):
 
				 
			
 
				 
			
 
				 def find_photo_list(driver):
			
 
				-    time.sleep(2)
			
 
				+    time.sleep(0.5)
			
 
				     wait = WebDriverWait(driver, 60)
			
 
				     wait.until(
			
 
				         EC.element_to_be_clickable((By.XPATH, '//*[@id="pane"]/div/div[1]/div/div/div[3]/div[1]/div[1]/div/a'))
			
 
				     )
			
 
				-    count_list = []
			
 
				-    for i in range(1, 6):
			
 
				-        try:
			
 
				-            element = driver.find_element_by_xpath('//*[@id="pane"]/div/div[1]/div/div/div[3]/div[1]/div[{}]/div/a'.format(i))
			
 
				-            count_list += [element.get_attribute('data-photo-index')]
			
 
				-            actions = ActionChains(driver)
			
 
				-            actions.move_to_element(element).perform()
			
 
				-        except:
			
 
				-            break
			
 
				-    time.sleep(1)
			
 
				+    page_down_(driver,'//*[@id="pane"]/div/div[1]/div/div/div[3]/div[1]/div[1]/div/a' , 10)
			
 
				     photo_soup = BeautifulSoup(driver.page_source, 'html.parser')
			
 
				+
			
 
				     photo_url = []
			
 
				-    for photo_id in count_list:
			
 
				-        for i in photo_soup.select('a[data-photo-index="{}"]'.format(photo_id))[0].find_all('div'):
			
 
				-            if i['style'].find('width') != -1:
			
 
				-                sentence = i['style']
			
 
				+    count = 0
			
 
				+    for i in photo_soup.find_all('a', class_='mWq4Rd-eEDwDf'):
			
 
				+        if count > 5: break
			
 
				+        a_url = i.find('div', class_='mWq4Rd-HiaYvf-CNusmb-gevUs loaded')
			
 
				+        if a_url:
			
 
				+            if a_url.find('width') != -1:
			
 
				+                sentence = a_url['style']
			
 
				                 photo = re.search(r'https:(.*)\"', sentence)
			
 
				-                print(sentence)
			
 
				                 photo_url += [photo.group(0).replace('\"','')]
			
 
				-                break
			
 
				+        count += 1
			
 
				     return photo_url
			
 
				 
			
 
				 
			
 
				 def find_big_photo(output, driver):
			
 
				-    element = driver.find_element(By.CSS_SELECTOR, "div[aria-label='{}的相片']".format(output['name']))
			
 
				+    # element = driver.find_element(By.CSS_SELECTOR, "div[aria-label='{}的相片']".format(output['name']))
			
 
				+    wait = WebDriverWait(driver, 60)
			
 
				+    wait.until(
			
 
				+        EC.element_to_be_clickable((By.XPATH, '//*[@id="pane"]/div/div[1]/div/div/div[1]/div[1]/button'))
			
 
				+    )
			
 
				+    element = driver.find_element(By.XPATH, '//*[@id="pane"]/div/div[1]/div/div/div[1]/div[1]/button')
			
 
				     ActionChains(driver).move_to_element(element).click(element).perform()
			
 
				     output['shop_photo'] = '[]'
			
 
				     output['menu_photo'] = '[]'
			
@@ -302,14 +330,18 @@ def find_big_photo(output, driver):
 
				         '全部': 'shop_photo',
			
 
				         '菜單': 'menu_photo'
			
 
				     }
			
 
				-    
			
 
				+
			
 
				+    driver.find_element(By.CSS_SELECTOR, "button[data-tab-index='1']")
			
 
				+    photo_soup = BeautifulSoup(driver.page_source, 'html.parser')
			
 
				     tab_dict = {}
			
 
				     for tab_index in [0, 1, 2]:
			
 
				-        photo_name = driver.find_element(By.CSS_SELECTOR, "button[data-tab-index='{}']".format(tab_index)).text
			
 
				-        if photo_name == '菜單':
			
 
				-            tab_dict[photo_name] = tab_index
			
 
				-        elif photo_name == '全部':
			
 
				-            tab_dict[photo_name] = tab_index
			
 
				+        selector = photo_soup.select("button[data-tab-index='{}']".format(tab_index))
			
 
				+        if len(selector) != 0:
			
 
				+            photo_name = selector[0].text
			
 
				+            if photo_name == '菜單':
			
 
				+                tab_dict[photo_name] = tab_index
			
 
				+            elif photo_name == '全部':
			
 
				+                tab_dict[photo_name] = tab_index
			
 
				     print(tab_dict)                            
			
 
				     for tab_ in tab_dict:
			
 
				         tab_index = tab_dict[tab_]
			
@@ -367,32 +399,52 @@ def data_select_insert(db, table_name, table_col, data):
 
				 
			
 
				 
			
 
				 def time_click(driver):
			
 
				+    shop_soup_tmp = BeautifulSoup(driver.page_source, 'html.parser')
			
 
				     status = ''
			
 
				     try:
			
 
				-        time_css =  "span[aria-label='顯示本週營業時間']"
			
 
				-        element = driver.find_element_by_css_selector(time_css)
			
 
				-        driver.implicitly_wait(30)
			
 
				-        ActionChains(driver).move_to_element(element).click(element).perform()
			
 
				-        status = '正常'
			
 
				+        if len(shop_soup_tmp.select("span[aria-label='顯示本週營業時間']")) != 0:
			
 
				+            time_css =  "span[aria-label='顯示本週營業時間']"
			
 
				+            element = driver.find_element_by_css_selector(time_css)
			
 
				+            driver.implicitly_wait(10)
			
 
				+            ActionChains(driver).move_to_element(element).click(element).perform()
			
 
				+            status = '正常'
			
 
				+        
			
 
				+        elif len(shop_soup_tmp.select("img[aria-label='通知']")) != 0:
			
 
				+            status = shop_soup_tmp.find('span',class_='LJKBpe-Tswv1b-text aSftqf').text
			
 
				+#             status = '永久停業' or '暫時關閉'
			
 
				+                
			
 
				+        elif len(shop_soup_tmp.select('button[aria-label*="查看更詳細的營業時間"]')) != 0:
			
 
				+            status = 'error'
			
 
				+            
			
 
				+        return status
			
 
				+    except:
			
 
				+        return ''
			
 
				 
			
 
				-    except NoSuchElementException:
			
 
				-        time_css =  "div[aria-expanded='false']"
			
 
				-        elem = driver.find_element_by_css_selector(time_css)
			
 
				-        if elem:
			
 
				-            status = '暫時關閉'
			
 
				-    return status
			
 
				+
			
 
				+def get_new_keyword(db):
			
 
				+    result = db.query('select distinct(keyword) from shop_item_list order by keyword')
			
 
				+    result = pd.DataFrame([i for i in result])
			
 
				+    progress = db.query('select distinct(kw) from progress_list2')
			
 
				+    progress = pd.DataFrame([i for i in progress])
			
 
				+
			
 
				+    if len(progress) != 0:
			
 
				+        keyword = result[~result['keyword'].isin(progress.kw.to_list())].iloc[0].values[0]
			
 
				+    else:
			
 
				+        keyword = result.iloc[0].values[0]
			
 
				+        
			
 
				+    return keyword
			
 
				 
			
 
				 
			
 
				 def get_not_cralwer_url(keyword):
			
 
				     db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/google_poi?charset=utf8mb4')
			
 
				-    table = db['shop_item_list']
			
 
				+    table = db['shop_item_list3']
			
 
				     url_list = list(table.find(keyword=keyword))
			
 
				-    shop_item = [i['item_url'] for i in db.query('SELECT item_url FROM shop_list where keyword="{}"'.format(keyword))]
			
 
				-    error_item = [i['item_url'] for i in db.query('SELECT item_url FROM error_list where keyword="{}"'.format(keyword))]
			
 
				+    shop_item = [i['item_url'] for i in db.query('SELECT item_url FROM shop_list2 where keyword="{}"'.format(keyword))]
			
 
				+    error_item = [i['item_url'] for i in db.query('SELECT item_url FROM error_list2 where keyword="{}"'.format(keyword))]
			
 
				     
			
 
				     url_pd = pd.DataFrame(url_list, columns=url_list[0].keys())
			
 
				-    url_pd['item_url_length'] = url_pd.item_url.apply(lambda x: len(x))
			
 
				-    url_pd = url_pd[url_pd['item_url_length']!=1000]
			
 
				+    # url_pd['item_url_length'] = url_pd.item_url.apply(lambda x: len(x))
			
 
				+    # url_pd = url_pd[(url_pd['item_url_length']!=1000) & (url_pd['item_url_length']!=600)]
			
 
				     url_pd = url_pd[~url_pd['item_url'].isin(shop_item)]
			
 
				     url_pd = url_pd[~url_pd['item_url'].isin(error_item)]
			
 
				 
			
@@ -426,66 +478,162 @@ def serive_create_linux(profilepath):
 
				 
			
 
				     return driver
			
 
				 
			
 
				-def main():
			
 
				-    keyword = '咖啡'
			
 
				-    db = DA.mysql_connect(MYSQL_CONFIG, DB_NAME)
			
 
				-    url_pd = get_not_cralwer_url(keyword)
			
 
				 
			
 
				-    print('drvier start...')
			
 
				-    driver = brower_start()
			
 
				+def find_lon_lat(driver):
			
 
				+    e = driver.find_element_by_css_selector("#scene > div.widget-scene > canvas")
			
 
				+    size = e.size
			
 
				+    total_height = size['height']
			
 
				+    total_width = size['width']
			
 
				 
			
 
				-    # driver = serive_create('Profile 1')
			
 
				-    # profilepath = 'Profile 1'
			
 
				-    # driver = serive_create_linux(profilepath)
			
 
				-    
			
 
				-    for key, row in url_pd.iterrows():
			
 
				-        try:    
			
 
				-            name = row['name']
			
 
				-            item_url = row['item_url']
			
 
				-            print(key, name, ': ' ,item_url)
			
 
				-
			
 
				-            driver.get(item_url)
			
 
				-            for i in range(4, 26, 2):
			
 
				-                element = driver.find_element_by_xpath('//*[@id="pane"]/div/div[1]/div/div/div[{}]'.format(i))
			
 
				-                actions = ActionChains(driver)
			
 
				-                actions.move_to_element(element).perform()
			
 
				-                time.sleep(0.5)
			
 
				-            print('start...')
			
 
				-            time_status = time_click(driver)
			
 
				-            time.sleep(0.5)
			
 
				-            shop_soup = BeautifulSoup(driver.page_source, 'html.parser')
			
 
				-
			
 
				-            output = {
			
 
				-                'name': blank_check(shop_soup.find('h1', class_='x3AX1-LfntMc-header-title-title').text)
			
 
				-            }
			
 
				-            print(output['name'])
			
 
				+    size2 = driver.find_element_by_css_selector("#pane > div.Yr7JMd-pane").size
			
 
				+    left_width = size2['width']
			
 
				+    print(total_height, total_width, left_width)
			
 
				+    x = (total_width - left_width) / 2 + left_width
			
 
				+    y = total_height / 2
			
 
				 
			
 
				-            output = get_shop_info(driver, output, shop_soup)
			
 
				+    e = driver.find_element_by_css_selector("#pane > div.Yr7JMd-pane")
			
 
				+    action = webdriver.common.action_chains.ActionChains(driver)
			
 
				+    action.move_to_element_with_offset(e, x, y)
			
 
				+    action.context_click()
			
 
				+    action.perform()
			
 
				 
			
 
				-            output = get_intro_info(driver, output)
			
 
				+    time.sleep(0.5)
			
 
				 
			
 
				-            output = get_time_list(shop_soup, output)
			
 
				+    element = driver.find_element_by_css_selector('#action-menu > ul > li:nth-child(1)')
			
 
				+    lat, lon = element.text.split(',')
			
 
				+    return float(lat), float(lon)
			
 
				 
			
 
				-            output = get_reviews(driver, output)
			
 
				 
			
 
				-            output = find_big_photo(output, driver)
			
 
				+def get_unique_id(driver):
			
 
				+    element = driver.find_element(By.CSS_SELECTOR, "button[data-value='分享']")
			
 
				+    driver.implicitly_wait(5)
			
 
				+    ActionChains(driver).move_to_element(element).click(element).perform()
			
 
				+    time.sleep(0.5)
			
 
				+    for i in range(5):
			
 
				+        ele = driver.find_element(By.CSS_SELECTOR, "input")
			
 
				+        short_url = ele.get_attribute('value')
			
 
				+        unique_id = short_url.split('/')[-1]
			
 
				+        if len(unique_id) != 0:
			
 
				+            break
			
 
				+        time.sleep(0.5)
			
 
				+    element = driver.find_element(By.CSS_SELECTOR, "button[aria-label='關閉']")
			
 
				+    driver.implicitly_wait(5)
			
 
				+    ActionChains(driver).move_to_element(element).click(element).perform()
			
 
				+    return unique_id
			
 
				 
			
 
				-            output_name = output['name'].replace('（','').replace('）', '')
			
 
				-            query_name = '{}+{}'.format(output_name, output['addr'])
			
 
				-            query_name = query_name.replace(' ','')
			
 
				-            output['item_url'] = item_url
			
 
				-            output['keyword'] = keyword
			
 
				-            output['google_url'] = 'https://www.google.com.tw/search?q={}'.format(query_name)
			
 
				-            data_select_insert(db, SHOP_LIST_TABLE, SHOP_LIST_TABLE_COL, output)
			
 
				 
			
 
				+def page_down_(driver, xpath_css, time_):
			
 
				+    elmts = driver.find_elements_by_xpath(xpath_css)
			
 
				+    print(elmts)
			
 
				+    if len(elmts)>1:
			
 
				+        elmt=elmts[1]
			
 
				+    else:
			
 
				+        elmt=elmts[0]
			
 
				+    actions = ActionChains(driver)
			
 
				+    actions.move_to_element(elmt).click().perform()
			
 
				+    for i in range(time_):
			
 
				+        try:
			
 
				+            actions = ActionChains(driver)
			
 
				+            actions.send_keys(Keys.PAGE_DOWN).perform()
			
 
				         except:
			
 
				-            error_table_col = ['name', 'lon', 'lat', 'keyword', 'item_url', 'crawler_date']
			
 
				-            data_select_insert(db, 'error_list', error_table_col, row)
			
 
				-            driver.close()
			
 
				-            driver = brower_start()
			
 
				-            # driver = serive_create_linux(profilepath)
			
 
				-   
			
 
				+            traceback.print_exc()
			
 
				+        time.sleep(0.5)
			
 
				 
			
 
				 
			
 
				+def main():
			
 
				+    db = DA.mysql_connect(MYSQL_CONFIG, DB_NAME)
			
 
				+    db2 = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/google_poi?charset=utf8mb4')
			
 
				+    table2 = db2['progress_list2']
			
 
				+    # keyword = '麻辣火鍋'
			
 
				+    # if len(sys.argv) >1:
			
 
				+    #     keyword=sys.argv[1]
			
 
				+    # port=4444
			
 
				+    # if len(sys.argv) >2:
			
 
				+    #     port=int(sys.argv[2])
			
 
				+    if len(sys.argv) > 1 :
			
 
				+        port=int(sys.argv[1])
			
 
				+        print('restart docker p{}'.format(port))
			
 
				+        os.system('sudo docker container restart p'+str(port))
			
 
				+        time.sleep(8)
			
 
				+
			
 
				+    for i in range(5):
			
 
				+        keyword = get_new_keyword(db2)
			
 
				+        table2.insert({'kw':keyword,'num':0})
			
 
				+        url_pd = get_not_cralwer_url(keyword)
			
 
				+        print('drvier start {}...'.format(keyword))
			
 
				+        driver = brower_start(port)
			
 
				+        #driver = serive_create('Profile 6')
			
 
				+        #profilepath = 'Profile 1'
			
 
				+        #driver = serive_create_linux(profilepath)
			
 
				+        
			
 
				+        for key, row in url_pd.iterrows():
			
 
				+            try:    
			
 
				+                name = row['name']
			
 
				+                item_url = row['item_url']
			
 
				+                print(key, name, ': ' ,item_url)
			
 
				+    
			
 
				+                print('start...')
			
 
				+                driver.get(item_url)
			
 
				+                page_down_(driver, "//div[@class='x3AX1-LfntMc-header-title-ij8cu']", 3)
			
 
				+                lat, lon = find_lon_lat(driver)
			
 
				+                unique_id = get_unique_id(driver)
			
 
				+                time_status = time_click(driver)
			
 
				+                if time_status == 'error' or len(unique_id) ==0:
			
 
				+                    error_table_col = ['name', 'lon', 'lat', 'keyword', 'item_url', 'crawler_date']
			
 
				+                    data_select_insert(db, 'error_list2', error_table_col, row)
			
 
				+                    continue
			
 
				+                time.sleep(0.5)
			
 
				+                shop_soup = BeautifulSoup(driver.page_source, 'html.parser')
			
 
				+
			
 
				+                output = {
			
 
				+                    'name': blank_check(shop_soup.find('h1', class_='x3AX1-LfntMc-header-title-title').text),
			
 
				+                    'lon': lon,
			
 
				+                    'lat': lat,
			
 
				+                    'unique_id': unique_id.replace('?share','')
			
 
				+                }
			
 
				+                print(output['name'], lon, lat, unique_id)
			
 
				+
			
 
				+                print('get_shop_info')
			
 
				+                output = get_shop_info(driver, output, shop_soup)
			
 
				+
			
 
				+                print('get_intro_info')
			
 
				+                if len(shop_soup.select("div[aria-label='{}簡介']".format(output['name']))) != 0:
			
 
				+                    output = get_intro_info(driver, output)
			
 
				+                else:
			
 
				+                    for key in intro_list:
			
 
				+                        output[intro_list[key][0]] = '[]'
			
 
				+
			
 
				+                print('get_time_list')
			
 
				+                if time_status == '正常':
			
 
				+                    output = get_time_list(shop_soup, output)
			
 
				+                else:
			
 
				+                    output['open_now'] = False
			
 
				+                    output['periods'] = ''
			
 
				+                    output['weekday_text'] = ''
			
 
				+
			
 
				+                print('user_ratings_total')
			
 
				+                if output['user_ratings_total'] == '':
			
 
				+                    output['reviews'] = ''
			
 
				+                else:
			
 
				+                    output = get_reviews(driver, output)
			
 
				+
			
 
				+                print('find_big_photo')
			
 
				+                output = find_big_photo(output, driver)
			
 
				+
			
 
				+                output_name = output['name'].replace('（','').replace('）', '')
			
 
				+                query_name = '{}+{}'.format(output_name, output['addr'])
			
 
				+                query_name = query_name.replace(' ','')
			
 
				+                output['item_url'] = item_url
			
 
				+                output['keyword'] = keyword
			
 
				+                output['google_url'] = 'https://www.google.com.tw/search?q={}'.format(query_name)
			
 
				+                data_select_insert(db, SHOP_LIST_TABLE, SHOP_LIST_TABLE_COL, output)
			
 
				+                
			
 
				+            except Exception as e:
			
 
				+                print(e)
			
 
				+                error_table_col = ['name', 'lon', 'lat', 'keyword', 'item_url', 'crawler_date']
			
 
				+                data_select_insert(db, 'error_list2', error_table_col, row)
			
 
				+                time.sleep(1)
			
 
				+
			
 
				+        
			
 
				 if __name__ == '__main__':
			
 
				     main()
			
--- a/run2.py
+++ b/run2.py
@@ -0,0 +1,574 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+from selenium import webdriver
			
 
				+from selenium.webdriver.common.action_chains import ActionChains
			
 
				+from selenium.webdriver.common.keys import Keys
			
 
				+from selenium.webdriver.support import expected_conditions as EC
			
 
				+from selenium.webdriver.support.wait import WebDriverWait
			
 
				+from selenium.common.exceptions import NoSuchElementException
			
 
				+from selenium.webdriver.common.by import By
			
 
				+
			
 
				+from bs4 import BeautifulSoup
			
 
				+
			
 
				+from utility import database_access as DA
			
 
				+from utility.parseutils import *
			
 
				+from utility.connect import *
			
 
				+
			
 
				+from datetime import datetime
			
 
				+import traceback
			
 
				+import dataset
			
 
				+import pandas as pd
			
 
				+import time
			
 
				+import json
			
 
				+import re
			
 
				+import sys
			
 
				+# import pyautogui as pag
			
 
				+
			
 
				+def serive_create(profilepath):
			
 
				+    option = webdriver.ChromeOptions()
			
 
				+
			
 
				+    option.add_argument('--disable-web-security')
			
 
				+    option.add_argument('--allow-running-insecure-content') 
			
 
				+    option.add_argument("--user-data-dir=C:\\Users\\user\\AppData\\Local\\Google\\Chrome\\User Data")
			
 
				+    option.add_argument("profile-directory="+profilepath)
			
 
				+
			
 
				+    driver = webdriver.Chrome('./utility/chromedriver_win32/chromedriver', options=option)
			
 
				+    executor_url = driver.command_executor._url
			
 
				+    session_id = driver.session_id
			
 
				+    print (session_id)
			
 
				+    print (executor_url)
			
 
				+    time.sleep(3)
			
 
				+    
			
 
				+    return driver
			
 
				+
			
 
				+def brower_start(port):
			
 
				+    options = webdriver.ChromeOptions()
			
 
				+#    browser = webdriver.Chrome(options=options)
			
 
				+
			
 
				+    browser = webdriver.Remote(
			
 
				+        command_executor='http://127.0.0.1:'+str(port)+'/wd/hub',
			
 
				+        #command_executor='http://192.53.174.202:'+str(port)+'/wd/hub',
			
 
				+        desired_capabilities=options.to_capabilities()
			
 
				+    )
			
 
				+    return browser
			
 
				+
			
 
				+
			
 
				+def keyin_keyword(driver, keyword):
			
 
				+    button = driver.find_element_by_id("searchbox")
			
 
				+    driver.implicitly_wait(30)
			
 
				+    ActionChains(driver).move_to_element(button).send_keys(keyword).send_keys(Keys.RETURN).perform()
			
 
				+    time.sleep(3)
			
 
				+
			
 
				+    # element = driver.find_element_by_class_name("V0h1Ob-haAclf")
			
 
				+    # driver.implicitly_wait(30)
			
 
				+    # ActionChains(driver).move_to_element(element).click(element).perform()
			
 
				+
			
 
				+
			
 
				+def open_time(driver):
			
 
				+    element = driver.find_element_by_xpath('//*[@id="pane"]/div/div[1]/div/div/div[9]/div[2]')
			
 
				+    if element.text.find('預訂') == -1:
			
 
				+        element = driver.find_element_by_xpath('//*[@id="pane"]/div/div[1]/div/div/div[9]/div[2]')
			
 
				+        driver.implicitly_wait(10)
			
 
				+        ActionChains(driver).move_to_element(element).click(element).perform()
			
 
				+        return 1
			
 
				+    else:
			
 
				+        return 0
			
 
				+
			
 
				+
			
 
				+def get_shop_info(driver, output, shop_soup):
			
 
				+    current_url_split = driver.current_url.split('@')[1].split(',')
			
 
				+    output['lon'] = current_url_split[1]
			
 
				+    output['lat'] = current_url_split[0]
			
 
				+    
			
 
				+    location = shop_soup.find('button',{'data-item-id':'oloc'})['aria-label'].split(' ')
			
 
				+    output['city'] = location[-1]
			
 
				+    output['area'] = location[-2]
			
 
				+    
			
 
				+    try:
			
 
				+        output['addr'] = shop_soup.find('button',{'data-item-id':'address'})['aria-label'].replace('地址:', '')
			
 
				+    except:
			
 
				+        output['addr'] = ''
			
 
				+        
			
 
				+    try:
			
 
				+        output['tel'] = blank_check(shop_soup.find('button',{'data-tooltip':'複製電話號碼'})['aria-label'].split(':')[1])
			
 
				+    except:
			
 
				+        output['tel'] = ''
			
 
				+    print(output['addr'], ', ' ,output['tel'])
			
 
				+
			
 
				+    for key in element_list:
			
 
				+        try:
			
 
				+            element = element_list[key]
			
 
				+            if len(element) == 3:
			
 
				+                value = shop_soup.find(element[0],element[1])[element[2]]
			
 
				+
			
 
				+            else:
			
 
				+                tmp_value = shop_soup.find(element[0],element[1])
			
 
				+                if tmp_value:
			
 
				+                    value = tmp_value.text
			
 
				+                else:
			
 
				+                    value = ''
			
 
				+
			
 
				+            output[key] = value_check(key, value)
			
 
				+        except:
			
 
				+            output[key] = ''
			
 
				+
			
 
				+    return output
			
 
				+
			
 
				+
			
 
				+def get_intro_info(driver, output):
			
 
				+    # element = driver.find_element_by_xpath('//*[@id="pane"]/div/div[1]/div/div/div[6]')
			
 
				+    try:
			
 
				+        element = driver.find_element(By.CSS_SELECTOR, "div[aria-label='{}簡介']".format(output['name']))
			
 
				+        driver.implicitly_wait(5)
			
 
				+        ActionChains(driver).move_to_element(element).click(element).perform()
			
 
				+
			
 
				+        # pageSource = driver.page_source
			
 
				+        # fileToWrite = open("page_source.html", "w")
			
 
				+        # fileToWrite.write(pageSource)
			
 
				+        # fileToWrite.close()
			
 
				+
			
 
				+        page_down_(driver, '//*[@id="pane"]/div/div[1]', 3)
			
 
				+
			
 
				+        intro_soup = BeautifulSoup(driver.page_source, 'html.parser')
			
 
				+        for key in intro_list:
			
 
				+            elements = intro_soup.find('div',{'aria-label':key})
			
 
				+            if elements:
			
 
				+                element = elements.find_all('li',{'class':'LQjNnc-p83tee-JNdkSc-ibnC6b'})
			
 
				+                count = 0
			
 
				+                tmp = []
			
 
				+                for ele in element:
			
 
				+                    # if ele.find('img',{'src':"//www.gstatic.com/images/icons/material/system_gm/2x/check_black_18dp.png"}):
			
 
				+                    if ele.find('img',{'src':"//www.gstatic.com/images/icons/material/system_gm/1x/check_black_18dp.png"}):
			
 
				+                        tmp += [{
			
 
				+                            'id':count,
			
 
				+                            intro_list[key][1]: blank_check(ele.text)
			
 
				+                        }]
			
 
				+                        count += 1
			
 
				+                print(str(tmp))
			
 
				+                output[intro_list[key][0]] = str(tmp)
			
 
				+            else:
			
 
				+                output[intro_list[key][0]] = '[]'
			
 
				+        driver.back()
			
 
				+        return output
			
 
				+
			
 
				+    except:
			
 
				+        for key in intro_list:
			
 
				+            output[intro_list[key][0]] = '[]'
			
 
				+        return output
			
 
				+
			
 
				+
			
 
				+def get_time_list(shop_soup, output):
			
 
				+    periods = []
			
 
				+    weekday_text = []
			
 
				+    
			
 
				+    open_now = blank_check(shop_soup.find('span', {'class':'LJKBpe-Tswv1b-hour-text'}).text.split('\xa0')[0])
			
 
				+    if open_now == '永久停業' or open_now == '暫時關閉':
			
 
				+        output['open_now'] = 'False'
			
 
				+    else:
			
 
				+        output['open_now'] = 'True'
			
 
				+        for tr_ in shop_soup.find_all('tr'):
			
 
				+            if tr_.find('div').text.replace(' ','') != '':
			
 
				+                week = tr_.find('div').text
			
 
				+                time_list = [blank_check(i.text) for i in tr_.find_all('li')]
			
 
				+                for time_ in time_list:
			
 
				+                    if time_ == '24 小時營業':
			
 
				+                        periods += [{
			
 
				+                            "open":{
			
 
				+                                "day": week_list[week], 
			
 
				+                                "time": 0000
			
 
				+                            },
			
 
				+                            "close":{
			
 
				+                                "day": week_list[week], 
			
 
				+                                "time": ''
			
 
				+                            }
			
 
				+                        }]
			
 
				+                    elif time_ == '休息':
			
 
				+                        periods += [{
			
 
				+                            "open":{
			
 
				+                                "day": week_list[week], 
			
 
				+                                "time": ''
			
 
				+                            },
			
 
				+                            "close":{
			
 
				+                                "day": week_list[week], 
			
 
				+                                "time": ''
			
 
				+                            }
			
 
				+                        }]
			
 
				+                    else:
			
 
				+                        start, end = time_.split('–')
			
 
				+                        end_hour, end_min = end.split(':')
			
 
				+                        start_hour, start_min = start.split(':')
			
 
				+
			
 
				+                        if end_hour < start_hour:
			
 
				+                            end_day = week_list[week] + 1 
			
 
				+                        else:
			
 
				+                            end_day = week_list[week]
			
 
				+
			
 
				+                        periods += [{
			
 
				+                            "open":{
			
 
				+                                "day": week_list[week], 
			
 
				+                                "time": start.replace(':','')
			
 
				+                            },
			
 
				+                            "close":{
			
 
				+                                "day": end_day, 
			
 
				+                                "time": end.replace(':','')
			
 
				+                            }
			
 
				+                        }]
			
 
				+                weekday_text += ["{}: {}".format(week, ', '.join(time_list))]
			
 
				+    output['periods'] = str(periods)
			
 
				+    output['weekday_text'] = str(weekday_text)
			
 
				+
			
 
				+    return output
			
 
				+
			
 
				+
			
 
				+def get_reviews(driver, output):
			
 
				+    wait = WebDriverWait(driver, 30)
			
 
				+    more_reviews_css = "button[jsaction='pane.rating.moreReviews']"
			
 
				+    wait.until(
			
 
				+        EC.element_to_be_clickable((By.CSS_SELECTOR, more_reviews_css))
			
 
				+    )
			
 
				+    element = driver.find_element_by_css_selector(more_reviews_css)
			
 
				+    driver.implicitly_wait(10)
			
 
				+    ActionChains(driver).move_to_element(element).click(element).perform()
			
 
				+    time.sleep(0.5)
			
 
				+
			
 
				+    # page_down_(driver, '//*[@id="pane"]/div/div[1]/div/div/div[2]/div[1]', 5)
			
 
				+    page_down_(driver, '//div[@class="PPCwl"]',5)
			
 
				+
			
 
				+    comment_soup = BeautifulSoup(driver.page_source, 'html.parser')
			
 
				+    if comment_soup.find_all('div',class_='ODSEW-ShBeI-xJzy8c-bF1uUb') != 0:
			
 
				+        all_photo = driver.find_elements_by_class_name('ODSEW-ShBeI-xJzy8c-bF1uUb')
			
 
				+        for ap in all_photo:
			
 
				+            ap.click()
			
 
				+
			
 
				+    if comment_soup.select('button[aria-label="顯示更多"]') != 0:
			
 
				+        all_review = driver.find_elements_by_css_selector('button[aria-label="顯示更多"]')
			
 
				+        for ap in all_review:
			
 
				+            ap.click()
			
 
				+
			
 
				+    comment_soup = BeautifulSoup(driver.page_source, 'html.parser')
			
 
				+
			
 
				+    count = 0
			
 
				+    reviews = []
			
 
				+    for comment in comment_soup.find_all('div',{'class':'ODSEW-ShBeI'}):
			
 
				+        comment_a_tag = comment.find_all('a')
			
 
				+        author_name = blank_check(comment_a_tag[1].find('div', class_= 'ODSEW-ShBeI-title').text)
			
 
				+        profile_photo_url = comment_a_tag[0].find('img')['src']
			
 
				+        rating = blank_check(comment.find('span',{'role':'img'})['aria-label'].replace('顆星', ''))
			
 
				+        text = comment.find('div', class_='ODSEW-ShBeI-ShBeI-content').text
			
 
				+        created_at = comment.find('span', class_='ODSEW-ShBeI-RgZmSc-date').text
			
 
				+        photos = []
			
 
				+        c = 0
			
 
				+        for i in comment.find_all('button', class_='ODSEW-ShBeI-xJzy8c'):
			
 
				+            path = i['style'].split(';')[0].split('url')[1].replace('\"','').replace('(','').replace(')','') 
			
 
				+            photos += [path]
			
 
				+            c += 1
			
 
				+            
			
 
				+        reviews += [{
			
 
				+            'id': comment.find('a')['href'].split('/')[5],
			
 
				+            'author_name': author_name,
			
 
				+            'profile_photo_url': profile_photo_url,
			
 
				+            'rating': int(rating), 
			
 
				+            'text': text,
			
 
				+            'created_at': created_at,
			
 
				+            'photos': photos
			
 
				+        }]
			
 
				+        count += 1
			
 
				+
			
 
				+    output['reviews'] = str(reviews)
			
 
				+    driver.back()
			
 
				+    return output
			
 
				+
			
 
				+# def get_photo(output, shop_soup):
			
 
				+#     shop_photo = {}
			
 
				+#     for i in shop_soup.find('div',{'aria-label':'{}的相片'.format(output['name'])}).find_all('button'):
			
 
				+#         try:
			
 
				+#             if i['aria-label'] == '街景服務和 360 度相片' or i['aria-label'] == '影片':
			
 
				+#                 continue
			
 
				+                
			
 
				+#             shop_photo[i['aria-label']] = i.find('img')['src']
			
 
				+#         except:
			
 
				+#             pass
			
 
				+#     output['shop_photo'] = shop_photo
			
 
				+#     return output
			
 
				+
			
 
				+
			
 
				+def find_photo_list(driver):
			
 
				+    time.sleep(0.5)
			
 
				+    wait = WebDriverWait(driver, 60)
			
 
				+    wait.until(
			
 
				+        EC.element_to_be_clickable((By.XPATH, '//*[@id="pane"]/div/div[1]/div/div/div[3]/div[1]/div[1]/div/a'))
			
 
				+    )
			
 
				+    page_down_(driver,'//*[@id="pane"]/div/div[1]/div/div/div[3]/div[1]/div[1]/div/a' , 10)
			
 
				+    photo_soup = BeautifulSoup(driver.page_source, 'html.parser')
			
 
				+
			
 
				+    photo_url = []
			
 
				+    count = 0
			
 
				+    for i in photo_soup.find_all('a', class_='mWq4Rd-eEDwDf'):
			
 
				+        if count > 5: break
			
 
				+        a_url = i.find('div', class_='mWq4Rd-HiaYvf-CNusmb-gevUs loaded')
			
 
				+        if a_url:
			
 
				+            if a_url.find('width') != -1:
			
 
				+                sentence = a_url['style']
			
 
				+                photo = re.search(r'https:(.*)\"', sentence)
			
 
				+                photo_url += [photo.group(0).replace('\"','')]
			
 
				+        count += 1
			
 
				+    return photo_url
			
 
				+
			
 
				+
			
 
				+def find_big_photo(output, driver):
			
 
				+    # element = driver.find_element(By.CSS_SELECTOR, "div[aria-label='{}的相片']".format(output['name']))
			
 
				+    wait = WebDriverWait(driver, 60)
			
 
				+    wait.until(
			
 
				+        EC.element_to_be_clickable((By.XPATH, '//*[@id="pane"]/div/div[1]/div/div/div[1]/div[1]/button'))
			
 
				+    )
			
 
				+    element = driver.find_element(By.XPATH, '//*[@id="pane"]/div/div[1]/div/div/div[1]/div[1]/button')
			
 
				+    ActionChains(driver).move_to_element(element).click(element).perform()
			
 
				+    output['shop_photo'] = '[]'
			
 
				+    output['menu_photo'] = '[]'
			
 
				+    
			
 
				+    photo_map = {
			
 
				+        '全部': 'shop_photo',
			
 
				+        '菜單': 'menu_photo'
			
 
				+    }
			
 
				+
			
 
				+    driver.find_element(By.CSS_SELECTOR, "button[data-tab-index='1']")
			
 
				+    photo_soup = BeautifulSoup(driver.page_source, 'html.parser')
			
 
				+    tab_dict = {}
			
 
				+    for tab_index in [0, 1, 2]:
			
 
				+        selector = photo_soup.select("button[data-tab-index='{}']".format(tab_index))
			
 
				+        if len(selector) != 0:
			
 
				+            photo_name = selector[0].text
			
 
				+            if photo_name == '菜單':
			
 
				+                tab_dict[photo_name] = tab_index
			
 
				+            elif photo_name == '全部':
			
 
				+                tab_dict[photo_name] = tab_index
			
 
				+    print(tab_dict)                            
			
 
				+    for tab_ in tab_dict:
			
 
				+        tab_index = tab_dict[tab_]
			
 
				+        print(tab_index)
			
 
				+        wait = WebDriverWait(driver, 60)
			
 
				+        wait.until(
			
 
				+            EC.element_to_be_clickable((By.CSS_SELECTOR, "button[data-tab-index='{}']".format(tab_index)))
			
 
				+        )
			
 
				+        element = driver.find_element(By.CSS_SELECTOR, "button[data-tab-index='{}']".format(tab_index))
			
 
				+        ActionChains(driver).move_to_element(element).click(element).perform()
			
 
				+        photo_list = find_photo_list(driver)
			
 
				+        output[photo_map[tab_]] = str(photo_list)
			
 
				+        
			
 
				+    return output
			
 
				+
			
 
				+
			
 
				+def get_url_list(driver):
			
 
				+    # wait = WebDriverWait(driver, 10)
			
 
				+    # wait.until(
			
 
				+    #     EC.element_to_be_clickable((By.XPATH, '//*[@id="sGi9mc-m5SR9c-bottom-pane"]/div/div[1]/div/div/div/div[1]/div[2]/div[2]'))
			
 
				+    # )
			
 
				+    # driver.back()
			
 
				+
			
 
				+    time.sleep(2)
			
 
				+    for i in range(5, 43, 2):
			
 
				+        driver.find_element(By.XPATH,'//*[@id="pane"]/div/div[1]/div/div/div[2]/div[1]/div[{}]/div/a'.format(i)).send_keys(Keys.DOWN)
			
 
				+    url_soup = BeautifulSoup(driver.page_source, 'html.parser')
			
 
				+    url_list = []
			
 
				+    for i in url_soup.find_all('a'):
			
 
				+        try:
			
 
				+            if i['href'].find('maps/place') != -1:
			
 
				+                url_list += [[i['href'], i['aria-label']]]
			
 
				+        except:
			
 
				+            pass
			
 
				+    
			
 
				+    return url_list
			
 
				+
			
 
				+
			
 
				+def data_select_insert(db, table_name, table_col, data):
			
 
				+    tmp = []
			
 
				+    for name_ in table_col:
			
 
				+        if name_ == 'crawler_date':
			
 
				+            continue
			
 
				+        if name_ == 'lon' or name_ == 'lat':
			
 
				+            tmp += [float(data[name_])]
			
 
				+        else:
			
 
				+            tmp += [data[name_]]
			
 
				+
			
 
				+    tmp += [datetime.today().strftime("%Y/%m/%d %H:%M")]
			
 
				+
			
 
				+    insert_sql = """INSERT IGNORE INTO {}{} VALUES {}"""\
			
 
				+                    .format(table_name, str(tuple(table_col)).replace('\'',''), tuple(tmp))
			
 
				+
			
 
				+    DA.mysql_insert_data(db, insert_sql)
			
 
				+
			
 
				+
			
 
				+def time_click(driver):
			
 
				+    shop_soup_tmp = BeautifulSoup(driver.page_source, 'html.parser')
			
 
				+    status = ''
			
 
				+    try:
			
 
				+        if len(shop_soup_tmp.select("span[aria-label='顯示本週營業時間']")) != 0:
			
 
				+            time_css =  "span[aria-label='顯示本週營業時間']"
			
 
				+            element = driver.find_element_by_css_selector(time_css)
			
 
				+            driver.implicitly_wait(10)
			
 
				+            ActionChains(driver).move_to_element(element).click(element).perform()
			
 
				+            status = '正常'
			
 
				+        
			
 
				+        elif len(shop_soup_tmp.select("img[aria-label='通知']")) != 0:
			
 
				+            status = shop_soup_tmp.find('span',class_='LJKBpe-Tswv1b-text aSftqf').text
			
 
				+#             status = '永久停業' or '暫時關閉'
			
 
				+                
			
 
				+        elif len(shop_soup_tmp.select('button[aria-label*="查看更詳細的營業時間"]')) != 0:
			
 
				+            status = 'error'
			
 
				+            
			
 
				+        return status
			
 
				+    except:
			
 
				+        return ''
			
 
				+
			
 
				+
			
 
				+def get_not_cralwer_url(keyword):
			
 
				+    db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/google_poi?charset=utf8mb4')
			
 
				+    table = db['shop_item_list']
			
 
				+    url_list = list(table.find(keyword=keyword))
			
 
				+    shop_item = [i['item_url'] for i in db.query('SELECT item_url FROM shop_list where keyword="{}"'.format(keyword))]
			
 
				+    error_item = [i['item_url'] for i in db.query('SELECT item_url FROM error_list where keyword="{}"'.format(keyword))]
			
 
				+    
			
 
				+    url_pd = pd.DataFrame(url_list, columns=url_list[0].keys())
			
 
				+    url_pd['item_url_length'] = url_pd.item_url.apply(lambda x: len(x))
			
 
				+    url_pd = url_pd[(url_pd['item_url_length']!=1000) & (url_pd['item_url_length']!=600)]
			
 
				+    url_pd = url_pd[~url_pd['item_url'].isin(shop_item)]
			
 
				+    url_pd = url_pd[~url_pd['item_url'].isin(error_item)]
			
 
				+
			
 
				+    print('have {} URL list'.format(len(url_pd)))
			
 
				+    # url_list = pd.read_csv('result/shop_item_list_20211210.csv', index_col=0)
			
 
				+
			
 
				+    return url_pd
			
 
				+
			
 
				+
			
 
				+def serive_create_linux(profilepath):
			
 
				+    option = webdriver.ChromeOptions()
			
 
				+    option.add_argument('--headless')
			
 
				+    option.add_argument('--no-sandbox')
			
 
				+    option.add_argument('--disable-web-security')
			
 
				+    option.add_argument('--allow-running-insecure-content')
			
 
				+    option.add_argument('--incognito')
			
 
				+    option.add_argument(
			
 
				+        'user-agent=Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:65.0) Gecko/20100101 Firefox/65.0')
			
 
				+    # option.add_argument("--user-data-dir=C:\\Users\\noodles\\AppData\\Local\\Google\\Chrome\\User Data")
			
 
				+    option.add_argument(
			
 
				+        "--user-data-dir=/home/noodlesloves/.config/google-chrome/")
			
 
				+    option.add_argument("profile-directory="+profilepath)
			
 
				+    driver = webdriver.Chrome('utility/chromedriver', options=option)
			
 
				+    # driver = webdriver.Chrome(executable_path='/usr/bin/chromedriver', chrome_options=option,
			
 
				+    #                           service_args=['--verbose', '--log-path=/tmp/chromedriver.log'])
			
 
				+
			
 
				+    executor_url = driver.command_executor._url
			
 
				+    session_id = driver.session_id
			
 
				+    print(session_id)
			
 
				+    print(executor_url)
			
 
				+
			
 
				+    return driver
			
 
				+
			
 
				+
			
 
				+def page_down_(driver, xpath_css, time_):
			
 
				+    elmts = driver.find_elements_by_xpath(xpath_css)
			
 
				+    print(elmts)
			
 
				+    if len(elmts)>1:
			
 
				+        elmt=elmts[1]
			
 
				+    else:
			
 
				+        elmt=elmts[0]
			
 
				+    actions = ActionChains(driver)
			
 
				+    actions.move_to_element(elmt).click().perform()
			
 
				+    for i in range(time_):
			
 
				+        try:
			
 
				+            actions = ActionChains(driver)
			
 
				+            actions.send_keys(Keys.PAGE_DOWN).perform()
			
 
				+        except:
			
 
				+            traceback.print_exc()
			
 
				+        time.sleep(0.5)
			
 
				+
			
 
				+
			
 
				+def main():
			
 
				+    #db = DA.mysql_connect(MYSQL_CONFIG, DB_NAME)
			
 
				+    
			
 
				+    keyword = '麻辣火鍋'
			
 
				+    if len(sys.argv) >1:
			
 
				+        keyword=sys.argv[1]
			
 
				+    port=4448
			
 
				+    if len(sys.argv) >2:
			
 
				+        port=int(sys.argv[2])
			
 
				+        
			
 
				+    for keyword in ['鳳梨酥','蔥油餅','滷肉飯']:
			
 
				+        db = DA.mysql_connect(MYSQL_CONFIG, DB_NAME)
			
 
				+        url_pd = get_not_cralwer_url(keyword)
			
 
				+        print('drvier start {}...'.format(keyword))
			
 
				+        driver = brower_start(port)
			
 
				+        #driver = serive_create('Profile 6')
			
 
				+        #profilepath = 'Profile 1'
			
 
				+        #driver = serive_create_linux(profilepath)
			
 
				+        
			
 
				+        for key, row in url_pd.iterrows():
			
 
				+            try:    
			
 
				+                name = row['name']
			
 
				+                item_url = row['item_url']
			
 
				+                print(key, name, ': ' ,item_url)
			
 
				+
			
 
				+                print('start...')
			
 
				+                driver.get(item_url)
			
 
				+                page_down_(driver, "//div[@class='x3AX1-LfntMc-header-title-ij8cu']", 3)
			
 
				+                
			
 
				+                time_status = time_click(driver)
			
 
				+                if time_status == 'error':
			
 
				+                    error_table_col = ['name', 'lon', 'lat', 'keyword', 'item_url', 'crawler_date']
			
 
				+                    data_select_insert(db, 'error_list', error_table_col, row)
			
 
				+                    continue
			
 
				+                time.sleep(0.5)
			
 
				+                shop_soup = BeautifulSoup(driver.page_source, 'html.parser')
			
 
				+
			
 
				+                output = {
			
 
				+                    'name': blank_check(shop_soup.find('h1', class_='x3AX1-LfntMc-header-title-title').text)
			
 
				+                }
			
 
				+                print(output['name'])
			
 
				+
			
 
				+                print('get_shop_info')
			
 
				+                output = get_shop_info(driver, output, shop_soup)
			
 
				+
			
 
				+                print('get_intro_info')
			
 
				+                if len(shop_soup.select("div[aria-label='{}簡介']".format(output['name']))) != 0:
			
 
				+                    output = get_intro_info(driver, output)
			
 
				+                else:
			
 
				+                    for key in intro_list:
			
 
				+                        output[intro_list[key][0]] = '[]'
			
 
				+
			
 
				+                print('get_time_list')
			
 
				+                if time_status == '正常':
			
 
				+                    output = get_time_list(shop_soup, output)
			
 
				+                else:
			
 
				+                    output['open_now'] = False
			
 
				+                    output['periods'] = ''
			
 
				+                    output['weekday_text'] = ''
			
 
				+
			
 
				+                print('user_ratings_total')
			
 
				+                if output['user_ratings_total'] == '':
			
 
				+                    output['reviews'] = ''
			
 
				+                else:
			
 
				+                    output = get_reviews(driver, output)
			
 
				+
			
 
				+                print('find_big_photo')
			
 
				+                output = find_big_photo(output, driver)
			
 
				+
			
 
				+                output_name = output['name'].replace('（','').replace('）', '')
			
 
				+                query_name = '{}+{}'.format(output_name, output['addr'])
			
 
				+                query_name = query_name.replace(' ','')
			
 
				+                output['item_url'] = item_url
			
 
				+                output['keyword'] = keyword
			
 
				+                output['google_url'] = 'https://www.google.com.tw/search?q={}'.format(query_name)
			
 
				+                data_select_insert(db, SHOP_LIST_TABLE, SHOP_LIST_TABLE_COL, output)
			
 
				+                
			
 
				+            except Exception as e:
			
 
				+                print(e)
			
 
				+                error_table_col = ['name', 'lon', 'lat', 'keyword', 'item_url', 'crawler_date']
			
 
				+                data_select_insert(db, 'error_list', error_table_col, row)
			
 
				+                time.sleep(1)
			
 
				+                # driver.close()
			
 
				+                # driver = brower_start(port)
			
 
				+                # driver = serive_create_linux(profilepath)
			
 
				+   
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    main()
			
--- a/shop_item_list.py
+++ b/shop_item_list.py
@@ -5,7 +5,8 @@ from selenium.webdriver.common.keys import Keys
 
				 from selenium.webdriver.support import expected_conditions as EC
			
 
				 from selenium.webdriver.support.wait import WebDriverWait
			
 
				 from selenium.webdriver.common.by import By
			
 
				-
			
 
				+import selenium
			
 
				+import traceback
			
 
				 from bs4 import BeautifulSoup
			
 
				 
			
 
				 from utility import database_access as DA
			
@@ -14,30 +15,73 @@ from utility.connect import *
 
				 
			
 
				 from datetime import datetime
			
 
				 import pandas as pd
			
 
				+import dataset
			
 
				 import time
			
 
				 import json
			
 
				 import re
			
 
				+import sys, os
			
 
				 
			
 
				-def brower_start():
			
 
				+def brower_start(port):
			
 
				     options = webdriver.ChromeOptions()
			
 
				     browser = webdriver.Remote(
			
 
				-        command_executor='http://192.53.174.202:4444/wd/hub',
			
 
				+        #command_executor='http://192.53.174.202:4444/wd/hub',
			
 
				+        command_executor='http://127.0.0.1:'+str(port)+'/wd/hub',
			
 
				         desired_capabilities=options.to_capabilities()
			
 
				     )
			
 
				     return browser
			
 
				 
			
 
				 
			
 
				-def get_url_list(driver):
			
 
				-    for i in range(5, 43, 2):
			
 
				-        try:
			
 
				-            wait = WebDriverWait(driver, 60)
			
 
				-            wait.until(
			
 
				-                EC.element_to_be_clickable((By.XPATH, '//*[@id="pane"]/div/div[1]/div/div/div[2]/div[1]/div[{}]/div/a'.format(i)))
			
 
				-            )
			
 
				-            driver.find_element(By.XPATH,'//*[@id="pane"]/div/div[1]/div/div/div[2]/div[1]/div[{}]/div/a'.format(i)).send_keys(Keys.DOWN)
			
 
				+def page_down_(driver, xpath_css, time_):
			
 
				+    e = driver.find_element_by_css_selector('span[class="Jl2AFb"]')
			
 
				+    result_count = e.text.split('-')[1].replace(' 項結果','')
			
 
				+    print(result_count)
			
 
				+    if int(result_count) > 5:
			
 
				+        for i in range(time_):
			
 
				+            e = driver.find_elements_by_css_selector('div[class="TFQHme"]')
			
 
				+            action = webdriver.common.action_chains.ActionChains(driver)
			
 
				+            action.move_to_element_with_offset(e[-1], e[-1].size['width'] + 1 , 0)
			
 
				+            action.click()
			
 
				+            action.perform()
			
 
				             time.sleep(0.5)
			
 
				-        except:
			
 
				-            pass
			
 
				+
			
 
				+    # elmts = driver.find_elements_by_xpath(xpath_css)
			
 
				+    # print(elmts)
			
 
				+    # if len(elmts)>1:
			
 
				+    #     elmt=elmts[1]
			
 
				+    # else:
			
 
				+    #     elmt=elmts[0]
			
 
				+    # actions = ActionChains(driver)
			
 
				+    # actions.move_to_element(elmt).click().perform()
			
 
				+    # for i in range(time_):
			
 
				+    #     try:
			
 
				+    #         actions = ActionChains(driver)
			
 
				+    #         actions.send_keys(Keys.PAGE_DOWN).perform()
			
 
				+    #     except:
			
 
				+    #         traceback.print_exc()
			
 
				+    #     time.sleep(0.5)
			
 
				+
			
 
				+
			
 
				+def get_url_list(driver):
			
 
				+    # for i in range(5, 43, 2):
			
 
				+    #     try:
			
 
				+    #         wait = WebDriverWait(driver, 60)
			
 
				+    #         wait.until(
			
 
				+    #             EC.element_to_be_clickable((By.XPATH, '//*[@id="pane"]/div/div[1]/div/div/div[2]/div[1]/div[{}]/div/a'.format(i)))
			
 
				+    #         )
			
 
				+    #         driver.find_element(By.XPATH,'//*[@id="pane"]/div/div[1]/div/div/div[2]/div[1]/div[{}]/div/a'.format(i)).send_keys(Keys.DOWN)
			
 
				+    #         time.sleep(0.5)
			
 
				+    #     except:
			
 
				+    #         pass
			
 
				+
			
 
				+    # wait = WebDriverWait(driver, 30)
			
 
				+    # try:
			
 
				+    #     wait.until(EC.element_to_be_clickable((By.XPATH, '//*[@id="ppdPk-Ej1Yeb-LgbsSe-tJiF1e"]')))
			
 
				+    # except selenium.common.exceptions.TimeoutException:
			
 
				+    #     traceback.print_exc()
			
 
				+    #     return "EMPTY"
			
 
				+
			
 
				+    page_down_(driver, '//div[@class="TFQHme"]', 8)
			
 
				+
			
 
				     url_soup = BeautifulSoup(driver.page_source, 'html.parser')
			
 
				     url_list = []
			
 
				     for i in url_soup.find_all('a'):
			
@@ -46,7 +90,7 @@ def get_url_list(driver):
 
				                 url_list += [[i['href'], i['aria-label']]]
			
 
				         except:
			
 
				             pass
			
 
				-    
			
 
				+    # print(len(url_list))
			
 
				     return url_list
			
 
				 
			
 
				 
			
@@ -57,44 +101,119 @@ def keyin_keyword(driver, keyword):
 
				     time.sleep(3)
			
 
				 
			
 
				 
			
 
				+# def get_crawler_list(db):
			
 
				+    
			
 
				+#     result = db.query('select keyword, count(*) from shop_item_list group by keyword')
			
 
				+#     result = pd.DataFrame([i for i in result])
			
 
				+#     result.columns = ['keyword', 'count']
			
 
				+#     result = result[result['count'] < 100]
			
 
				+#     keyword = result.sample(1).iloc[0]['keyword']
			
 
				+    
			
 
				+#     num=0
			
 
				+#     cursor=db.query('select num from progress_list2 where kw = "'+keyword+'"')
			
 
				+#     for c in cursor:
			
 
				+#         num=c['num']
			
 
				+#         break
			
 
				+
			
 
				+#     cursor=db.query('select * from lat_lon_loc where num >= '+str(num))
			
 
				+#     #    cursor=db.query('select * from lat_lon_loc')
			
 
				+#     lst=[]
			
 
				+#     for c in cursor:
			
 
				+#         lst.append({'num':c['num'],'loc':c['loc'],'lat':c['lat'],'lon':c['lon']})
			
 
				+        
			
 
				+#     return keyword, lst
			
 
				+    
			
 
				+
			
 
				+def get_crawler_list(db):
			
 
				+    result = db.query('select * from shop_item_list order by keyword')
			
 
				+    result = pd.DataFrame([i for i in result])
			
 
				+    result = result[~result.keyword.str.contains('項')]
			
 
				+
			
 
				+    progress = db.query('select distinct(kw) from progress_list2 where num < 367')
			
 
				+    progress = pd.DataFrame([i for i in progress])
			
 
				+
			
 
				+    if len(progress) != 0:
			
 
				+        keyword = result[~result['keyword'].isin(progress.kw.to_list())].iloc[0]['keyword']
			
 
				+    else:
			
 
				+        keyword = result.iloc[0]['keyword']
			
 
				+        
			
 
				+    return keyword
			
 
				+
			
 
				+
			
 
				+def get_lon_lat_list(db, keyword):
			
 
				+    num=0
			
 
				+    cursor=db.query('select num from progress_list where kw = "'+keyword+'"')
			
 
				+    for c in cursor:
			
 
				+        num=c['num']
			
 
				+        break
			
 
				+
			
 
				+    cursor=db.query('select * from lat_lon_loc where num >= '+str(num))
			
 
				+
			
 
				+    lst=[]
			
 
				+    for c in cursor:
			
 
				+        lst.append({'num':c['num'],'loc':c['loc'],'lat':c['lat'],'lon':c['lon']})
			
 
				+
			
 
				+    return lst
			
 
				+
			
 
				+
			
 
				 def main():
			
 
				-    data = pd.read_csv('lat_long_location.csv', index_col = 0)
			
 
				-    db = DA.mysql_connect(MYSQL_CONFIG, DB_NAME)
			
 
				+    db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/google_poi?charset=utf8mb4')
			
 
				+    table = db['shop_item_list3']
			
 
				+    table2 = db['progress_list2']
			
 
				+
			
 
				+    port=4447
			
 
				+    if len(sys.argv) > 1 :
			
 
				+        port=int(sys.argv[1])
			
 
				+        print('restart docker p{}'.format(port))
			
 
				+        os.system('sudo docker container restart p'+str(port))
			
 
				+        time.sleep(8)
			
 
				+
			
 
				     print('drvier start...')
			
 
				-    driver = brower_start()
			
 
				+    driver = brower_start(port)
			
 
				+    
			
 
				+
			
 
				+    for i in range(10):
			
 
				+        try:
			
 
				+            keyword  = get_crawler_list(db)
			
 
				+            print(keyword)
			
 
				+            lst = get_lon_lat_list(db, keyword)
			
 
				+            print(keyword, len(lst))
			
 
				 
			
 
				-#    for keyword in ['碗粿','炒麵','肉粽']:
			
 
				-    for keyword in ['碗粿']:
			
 
				+            for r in lst:
			
 
				+                latitude = r['lat'] #緯度
			
 
				+                longitude = r['lon'] #精度
			
 
				+                table2.upsert({'kw':keyword,'num':r['num']},['kw'])
			
 
				 
			
 
				-        for k, row in data.iterrows():
			
 
				-            try:
			
 
				-                latitude = row['latitude'] #緯度
			
 
				-                longitude = row['longitude'] #精度
			
 
				                 url = 'https://www.google.com.tw/maps/@{},{},15z?hl=zh-TW'.format(latitude, longitude)
			
 
				                 driver.get(url)
			
 
				                 keyin_keyword(driver, keyword)
			
 
				-                
			
 
				-                for page in range(4):
			
 
				-                    print(keyword, k, row['location'], latitude, longitude, page)
			
 
				+                failcnt = 0
			
 
				+                for page in range(10):
			
 
				+                    print(keyword, latitude, longitude, page)
			
 
				                     url_list = get_url_list(driver)
			
 
				-                    
			
 
				-                    shop_item_list_col = ['name','lon','lat','keyword','item_url','crawler_date']
			
 
				+                    duplicate = 0
			
 
				+                    # shop_item_list_col = ['name','lon','lat','keyword','item_url','crawler_date']
			
 
				                     for item in url_list:
			
 
				-                        result = [item[1], longitude, latitude, keyword, item[0], datetime.today().strftime("%Y/%m/%d %H:%M")]
			
 
				-                        insert_sql = """INSERT IGNORE INTO {}{} VALUES {}"""\
			
 
				-                                        .format('shop_item_list', str(tuple(shop_item_list_col)).replace('\'',''), tuple(result))
			
 
				+                        try:
			
 
				+                            table.insert({'name':item[1],'lon':longitude, 'lat':latitude, \
			
 
				+                                          'keyword':keyword, 'item_url':item[0],'crawler_date':datetime.today().strftime("%Y/%m/%d %H:%M")})
			
 
				+                        except:
			
 
				+                            duplicate += 1
			
 
				+                    print(len(url_list), duplicate)
			
 
				+        #                     result = [item[1], longitude, latitude, keyword, item[0], datetime.today().strftime("%Y/%m/%d %H:%M")]
			
 
				+        #                     insert_sql = """INSERT IGNORE INTO {}{} VALUES {}"""\
			
 
				+        #                                     .format('shop_item_list', str(tuple(shop_item_list_col)).replace('\'',''), tuple(result))
			
 
				+
			
 
				+        #                     DA.mysql_insert_data(db, insert_sql)
			
 
				 
			
 
				-                        DA.mysql_insert_data(db, insert_sql)
			
 
				-                    
			
 
				                     if page < 2 :
			
 
				                         element = driver.find_element_by_id('ppdPk-Ej1Yeb-LgbsSe-tJiF1e')
			
 
				+                        if element.get_attribute('disabled'):
			
 
				+                            break
			
 
				                         driver.implicitly_wait(30)
			
 
				                         ActionChains(driver).move_to_element(element).click(element).perform() 
			
 
				-            except:
			
 
				-                error = pd.DataFrame([row])
			
 
				-                error.to_csv('error_shop_item_list.csv', mode='a', header = False)
			
 
				-                driver.close()
			
 
				-                driver = brower_start()
			
 
				+        except:
			
 
				+            pass
			
 
				 
			
 
				 
			
 
				 if __name__ == '__main__':
			
--- a/utility/googlemapsearch.sql
+++ b/utility/googlemapsearch.sql
@@ -1,5 +1,6 @@
 
				-CREATE TABLE `shop_list` (
			
 
				+CREATE TABLE `shop_list2` (
			
 
				    `id` int NOT NULL AUTO_INCREMENT,
			
 
				+   `unique_id` VARCHAR(20),
			
 
				    `name` VARCHAR(100),
			
 
				    `lon` DOUBLE,
			
 
				    `lat` DOUBLE,
			
@@ -33,12 +34,12 @@ CREATE TABLE `shop_list` (
 
				    `shop_photo` JSON,
			
 
				    `menu_photo` JSON,
			
 
				 
			
 
				-   `google_url` VARCHAR(200),
			
 
				-   `item_url` VARCHAR(200),
			
 
				+   `google_url` text,
			
 
				+   `item_url` text,
			
 
				    `keyword` VARCHAR(20),
			
 
				    `crawler_date` char(20) NOT NULL,
			
 
				    PRIMARY KEY (`id`),
			
 
				-   UNIQUE KEY (`item_url`)
			
 
				+   UNIQUE KEY (`unique_id`)
			
 
				 ) ENGINE=InnoDB DEFAULT CHARSET=utf8;
			
 
				 
			
 
				 
			
@@ -55,7 +56,7 @@ CREATE TABLE `shop_item_list` (
 
				 ) ENGINE=InnoDB DEFAULT CHARSET=utf8;
			
 
				 
			
 
				 
			
 
				-CREATE TABLE `error_list` (
			
 
				+CREATE TABLE `error_list2` (
			
 
				    `id` int NOT NULL AUTO_INCREMENT,
			
 
				    `name` VARCHAR(100),
			
 
				    `lon` DOUBLE,
			
--- a/utility/parseutils.py
+++ b/utility/parseutils.py
@@ -1,7 +1,7 @@
 
				 # -*- coding: utf-8 -*-
			
 
				 DB_NAME = 'google_poi'
			
 
				-SHOP_LIST_TABLE = 'shop_list'
			
 
				-SHOP_LIST_TABLE_COL = ['name', 'lon', 'lat', 'city', 'area', 
			
 
				+SHOP_LIST_TABLE = 'shop_list2'
			
 
				+SHOP_LIST_TABLE_COL = ['unique_id', 'name', 'lon', 'lat', 'city', 'area', 
			
 
				                       'rating', 'user_ratings_total', 'category', 'price_level', 
			
 
				                       'addr', 'tel', 'services', 'products', 'choices', 
			
 
				                       'facilities', 'groups', 'plans', 'payments', 'safeties', 
			
@@ -29,7 +29,7 @@ intro_list = {
 
				     '健康與安全':['safeties','safety'],
			
 
				     '特色':['specials','special'],
			
 
				     '無障礙程度':['barrierlevels','barrierlevel'],
			
 
				-    '詳細資料':['items','item'],
			
 
				+    '詳細資料':['items','item']
			
 
				 }
			
 
				 
			
 
				 week_list = {