123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354 |
- import pandas as pd
- from bs4 import BeautifulSoup
- from selenium import webdriver
- def brower_start():
- options = webdriver.ChromeOptions()
- browser = webdriver.Remote(
- command_executor='http://192.53.174.202:4444/wd/hub',
- desired_capabilities=options.to_capabilities()
- )
- return browser
- def STR_to_NUM(data):
- line = tuple(data.split(','))
- num1 = float(line[1])
- num2 = float(line[2])
- line = [num2, num1]
- return line
- def crawler(browser, location):
- url = 'https://www.google.com.tw/maps/place?q={}'.format(location)
- browser.get(url)
- soup = BeautifulSoup(browser.page_source, 'html.parser')
-
- text = soup.prettify()
- initial_pos = text.find(";window.APP_INITIALIZATION_STATE")
-
- data = text[initial_pos+36:initial_pos+85]
- num_data = STR_to_NUM(data)
- return num_data
- def main():
- location_list = pd.read_csv('location_list.csv')
- print('start brower...')
- browser = brower_start()
- result = []
- print('start brower...')
- for key, group in location_list.iterrows():
- print(key)
- location = group['location']
- num_data = crawler(browser, location)
- result += [[location, num_data[0], num_data[1]]]
- pd.DataFrame(result, columns=['location', 'latitude', 'longitude']).to_csv('lat_long_location.csv')
- if __name__ == "__main__":
- main()
|