# -*- coding: utf-8 -*- from selenium import webdriver from selenium.webdriver.common.action_chains import ActionChains from selenium.webdriver.common.keys import Keys from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.support.wait import WebDriverWait from selenium.webdriver.common.by import By from bs4 import BeautifulSoup from utility import database_access as DA from utility.parseutils import * from utility.connect import * from datetime import datetime import pandas as pd import time import json import re def brower_start(): options = webdriver.ChromeOptions() browser = webdriver.Remote( command_executor='http://192.53.174.202:4444/wd/hub', desired_capabilities=options.to_capabilities() ) return browser def get_url_list(driver): for i in range(5, 43, 2): try: wait = WebDriverWait(driver, 60) wait.until( EC.element_to_be_clickable((By.XPATH, '//*[@id="pane"]/div/div[1]/div/div/div[2]/div[1]/div[{}]/div/a'.format(i))) ) driver.find_element(By.XPATH,'//*[@id="pane"]/div/div[1]/div/div/div[2]/div[1]/div[{}]/div/a'.format(i)).send_keys(Keys.DOWN) time.sleep(1) except: pass url_soup = BeautifulSoup(driver.page_source, 'html.parser') url_list = [] for i in url_soup.find_all('a'): try: if i['href'].find('maps/place') != -1: url_list += [[i['href'], i['aria-label']]] except: pass return url_list def keyin_keyword(driver, keyword): button = driver.find_element_by_id("searchbox") driver.implicitly_wait(30) ActionChains(driver).move_to_element(button).send_keys(keyword).send_keys(Keys.RETURN).perform() time.sleep(3) def main(): data = pd.read_csv('lat_long_location.csv', index_col = 0) db = DA.mysql_connect(MYSQL_CONFIG, DB_NAME) print('drvier start...') driver = brower_start() for k, row in data.iterrows(): latitude = row['latitude'] #緯度 longitude = row['longitude'] #精度 url = 'https://www.google.com.tw/maps/@{},{},15z?hl=zh-TW'.format(latitude, longitude) driver.get(url) keyword = '咖啡' keyin_keyword(driver, keyword) for page in range(4): print(k, row['location'], latitude, longitude, page) url_list = get_url_list(driver) shop_item_list_col = ['name','lon','lat','keyword','item_url','crawler_date'] for item in url_list: result = [item[1], longitude, latitude, keyword, item[0], datetime.today().strftime("%Y/%m/%d %H:%M")] insert_sql = """INSERT IGNORE INTO {}{} VALUES {}"""\ .format('shop_item_list', str(tuple(shop_item_list_col)).replace('\'',''), tuple(result)) DA.mysql_insert_data(db, insert_sql) if page < 2 : element = driver.find_element_by_id('ppdPk-Ej1Yeb-LgbsSe-tJiF1e') driver.implicitly_wait(30) ActionChains(driver).move_to_element(element).click(element).perform() if __name__ == '__main__': main()