|
@@ -35,7 +35,7 @@ def get_url_list(driver):
|
|
|
EC.element_to_be_clickable((By.XPATH, '//*[@id="pane"]/div/div[1]/div/div/div[2]/div[1]/div[{}]/div/a'.format(i)))
|
|
|
)
|
|
|
driver.find_element(By.XPATH,'//*[@id="pane"]/div/div[1]/div/div/div[2]/div[1]/div[{}]/div/a'.format(i)).send_keys(Keys.DOWN)
|
|
|
- time.sleep(1)
|
|
|
+ time.sleep(0.5)
|
|
|
except:
|
|
|
pass
|
|
|
url_soup = BeautifulSoup(driver.page_source, 'html.parser')
|
|
@@ -63,32 +63,37 @@ def main():
|
|
|
print('drvier start...')
|
|
|
driver = brower_start()
|
|
|
|
|
|
- for k, row in data.iterrows():
|
|
|
- # if k < 297:continue
|
|
|
- latitude = row['latitude'] #緯度
|
|
|
- longitude = row['longitude'] #精度
|
|
|
- url = 'https://www.google.com.tw/maps/@{},{},15z?hl=zh-TW'.format(latitude, longitude)
|
|
|
- driver.get(url)
|
|
|
+ for keyword in ['火鍋']:
|
|
|
+ for k, row in data.iterrows():
|
|
|
+ try:
|
|
|
+ latitude = row['latitude'] #緯度
|
|
|
+ longitude = row['longitude'] #精度
|
|
|
+ url = 'https://www.google.com.tw/maps/@{},{},15z?hl=zh-TW'.format(latitude, longitude)
|
|
|
+ driver.get(url)
|
|
|
+ keyin_keyword(driver, keyword)
|
|
|
+
|
|
|
+ for page in range(4):
|
|
|
+ print(keyword, k, row['location'], latitude, longitude, page)
|
|
|
+ url_list = get_url_list(driver)
|
|
|
+
|
|
|
+ shop_item_list_col = ['name','lon','lat','keyword','item_url','crawler_date']
|
|
|
+ for item in url_list:
|
|
|
+ result = [item[1], longitude, latitude, keyword, item[0], datetime.today().strftime("%Y/%m/%d %H:%M")]
|
|
|
+ insert_sql = """INSERT IGNORE INTO {}{} VALUES {}"""\
|
|
|
+ .format('shop_item_list2', str(tuple(shop_item_list_col)).replace('\'',''), tuple(result))
|
|
|
|
|
|
- keyword = '火鍋'
|
|
|
- keyin_keyword(driver, keyword)
|
|
|
-
|
|
|
- for page in range(4):
|
|
|
- print(k, row['location'], latitude, longitude, page)
|
|
|
- url_list = get_url_list(driver)
|
|
|
-
|
|
|
- shop_item_list_col = ['name','lon','lat','keyword','item_url','crawler_date']
|
|
|
- for item in url_list:
|
|
|
- result = [item[1], longitude, latitude, keyword, item[0], datetime.today().strftime("%Y/%m/%d %H:%M")]
|
|
|
- insert_sql = """INSERT IGNORE INTO {}{} VALUES {}"""\
|
|
|
- .format('shop_item_list', str(tuple(shop_item_list_col)).replace('\'',''), tuple(result))
|
|
|
+ DA.mysql_insert_data(db, insert_sql)
|
|
|
+
|
|
|
+ if page < 2 :
|
|
|
+ element = driver.find_element_by_id('ppdPk-Ej1Yeb-LgbsSe-tJiF1e')
|
|
|
+ driver.implicitly_wait(30)
|
|
|
+ ActionChains(driver).move_to_element(element).click(element).perform()
|
|
|
+ except:
|
|
|
+ error = pd.DataFrame([row])
|
|
|
+ error.to_csv('error_shop_item_list.csv', mode='a', header = False)
|
|
|
+ driver.close()
|
|
|
+ driver = brower_start()
|
|
|
|
|
|
- DA.mysql_insert_data(db, insert_sql)
|
|
|
-
|
|
|
- if page < 2 :
|
|
|
- element = driver.find_element_by_id('ppdPk-Ej1Yeb-LgbsSe-tJiF1e')
|
|
|
- driver.implicitly_wait(30)
|
|
|
- ActionChains(driver).move_to_element(element).click(element).perform()
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
- main()
|
|
|
+ main()
|