123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354 |
- import pandas as pd
- from bs4 import BeautifulSoup
- from selenium import webdriver
- #from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
- def brower_start():
- options = webdriver.ChromeOptions()
- browser = webdriver.Remote(
- command_executor='http://192.53.174.202:4444/wd/hub',
- desired_capabilities=options.to_capabilities()
- )
- return browser
- def STR_to_NUM(data):
- line = tuple(data.split(',')) #註1
- num1 = float(line[1])
- num2 = float(line[2])
- line = [num2, num1]
- return line
- def crawler(browser, location):
- url = 'https://www.google.com.tw/maps/place?q={}'.format(location)
- browser.get(url)
- soup = BeautifulSoup(browser.page_source, 'html.parser')
- #soup = BeautifulSoup(response.text, "html.parser")
- text = soup.prettify() #text 包含了html的內容
- initial_pos = text.find(";window.APP_INITIALIZATION_STATE")
- #尋找;window.APP_INITIALIZATION_STATE所在位置
- data = text[initial_pos+36:initial_pos+85] #將其後的參數進行存取
- num_data = STR_to_NUM(data)
- return num_data
- def main():
- location_list = pd.read_csv('location_list.csv')
- print('start brower...')
- browser = brower_start()
- result = []
- print('start brower...')
- for key, group in location_list.iterrows():
- print(key)
- location = group['location']
- num_data = crawler(browser, location)
- result += [[location, num_data[0], num_data[1]]]
- pd.DataFrame(result, columns=['location', 'latitude', 'longitude']).to_csv('lat_long_location.csv')
- if __name__ == "__main__":
- main()
|