|
@@ -381,64 +381,59 @@ def time_click(driver):
|
|
|
|
|
|
|
|
|
def main():
|
|
|
- data = pd.read_csv('lat_long_location.csv', index_col = 0)
|
|
|
- tmp = data.iloc[0]
|
|
|
- latitude = tmp['latitude'] #緯度
|
|
|
- longitude = tmp['longitude'] #精度
|
|
|
-
|
|
|
- url = 'https://www.google.com.tw/maps/@{},{},15z?hl=zh-TW'.format(latitude,longitude)
|
|
|
# driver = serive_create('Profile 1')
|
|
|
-
|
|
|
db = DA.mysql_connect(MYSQL_CONFIG, DB_NAME)
|
|
|
+ url_list = pd.read_csv('result/shop_item_list_20211210.csv', index_col=0)
|
|
|
print('drvier start...')
|
|
|
driver = brower_start()
|
|
|
- driver.get(url)
|
|
|
- # keyin_keyword(driver, '咖啡')
|
|
|
- # url_list = get_url_list(driver)
|
|
|
- url_list = pd.read_csv('result/shop_item_list_20211210.csv', index_col=0)
|
|
|
-
|
|
|
- # try:
|
|
|
- for key, row in url_list.iterrows():
|
|
|
- name = row['name']
|
|
|
- item_url = row['item_url']
|
|
|
- print(key, name, ': ' ,item_url)
|
|
|
- driver.get(item_url)
|
|
|
- for i in range(4, 26, 2):
|
|
|
- element = driver.find_element_by_xpath('//*[@id="pane"]/div/div[1]/div/div/div[{}]'.format(i))
|
|
|
- actions = ActionChains(driver)
|
|
|
- actions.move_to_element(element).perform()
|
|
|
- print('start...')
|
|
|
- time_status = time_click(driver)
|
|
|
- time.sleep(1)
|
|
|
- shop_soup = BeautifulSoup(driver.page_source, 'html.parser')
|
|
|
-
|
|
|
- output = {
|
|
|
- 'name': blank_check(shop_soup.find('h1', class_='x3AX1-LfntMc-header-title-title').text)
|
|
|
- }
|
|
|
- print(output['name'])
|
|
|
-
|
|
|
- output = get_shop_info(driver, output, shop_soup)
|
|
|
-
|
|
|
- output = get_intro_info(driver, output)
|
|
|
-
|
|
|
- output = get_time_list(shop_soup, output)
|
|
|
-
|
|
|
- output = get_reviews(driver, output)
|
|
|
-
|
|
|
- output = find_big_photo(output, driver)
|
|
|
-
|
|
|
- output_name = output['name'].replace('(','').replace(')', '')
|
|
|
- query_name = '{}+{}'.format(output_name, output['addr'])
|
|
|
- query_name = query_name.replace(' ','')
|
|
|
- output['query_name'] = query_name
|
|
|
- output['google_url'] = 'https://www.google.com.tw/search?q={}'.format(query_name)
|
|
|
- time.sleep(1)
|
|
|
- data_select_insert(db, SHOP_LIST_TABLE, SHOP_LIST_TABLE_COL, output)
|
|
|
-
|
|
|
- # except:
|
|
|
- # shop_soup = BeautifulSoup(driver.page_source, 'html.parser')
|
|
|
- # print("error {}".format(id_))
|
|
|
- # print(blank_check(shop_soup.find('h1', class_='x3AX1-LfntMc-header-title-title').text))
|
|
|
+
|
|
|
+ try:
|
|
|
+ for key, row in url_list.iterrows():
|
|
|
+ name = row['name']
|
|
|
+ item_url = row['item_url']
|
|
|
+ result = DA.mysql_select_data(db, 'select item_url from shop_list where item_url="{}"'.format(url_list.iloc[1].item_url))
|
|
|
+ if len(result) != 0: continue
|
|
|
+ print(key, name, ': ' ,item_url)
|
|
|
+
|
|
|
+ driver.get(item_url)
|
|
|
+ for i in range(4, 26, 2):
|
|
|
+ element = driver.find_element_by_xpath('//*[@id="pane"]/div/div[1]/div/div/div[{}]'.format(i))
|
|
|
+ actions = ActionChains(driver)
|
|
|
+ actions.move_to_element(element).perform()
|
|
|
+ time.sleep(0.5)
|
|
|
+ print('start...')
|
|
|
+ time_status = time_click(driver)
|
|
|
+ time.sleep(1)
|
|
|
+ shop_soup = BeautifulSoup(driver.page_source, 'html.parser')
|
|
|
+
|
|
|
+ output = {
|
|
|
+ 'name': blank_check(shop_soup.find('h1', class_='x3AX1-LfntMc-header-title-title').text)
|
|
|
+ }
|
|
|
+ print(output['name'])
|
|
|
+
|
|
|
+ output = get_shop_info(driver, output, shop_soup)
|
|
|
+
|
|
|
+ output = get_intro_info(driver, output)
|
|
|
+
|
|
|
+ output = get_time_list(shop_soup, output)
|
|
|
+
|
|
|
+ output = get_reviews(driver, output)
|
|
|
+
|
|
|
+ output = find_big_photo(output, driver)
|
|
|
+
|
|
|
+ output_name = output['name'].replace('(','').replace(')', '')
|
|
|
+ query_name = '{}+{}'.format(output_name, output['addr'])
|
|
|
+ query_name = query_name.replace(' ','')
|
|
|
+ output['item_url'] = item_url
|
|
|
+ output['google_url'] = 'https://www.google.com.tw/search?q={}'.format(query_name)
|
|
|
+ time.sleep(1)
|
|
|
+ data_select_insert(db, SHOP_LIST_TABLE, SHOP_LIST_TABLE_COL, output)
|
|
|
+
|
|
|
+ except:
|
|
|
+ error = pd.DataFrame([row])
|
|
|
+ error.to_csv('error.csv', mode='a', header = False)
|
|
|
+ driver.close()
|
|
|
+ driver = brower_start()
|
|
|
|
|
|
|
|
|
|