lat_long_search.py 1.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354
  1. import pandas as pd
  2. from bs4 import BeautifulSoup
  3. from selenium import webdriver
  4. #from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
  5. def brower_start():
  6. options = webdriver.ChromeOptions()
  7. browser = webdriver.Remote(
  8. command_executor='http://192.53.174.202:4444/wd/hub',
  9. desired_capabilities=options.to_capabilities()
  10. )
  11. return browser
  12. def STR_to_NUM(data):
  13. line = tuple(data.split(',')) #註1
  14. num1 = float(line[1])
  15. num2 = float(line[2])
  16. line = [num2, num1]
  17. return line
  18. def crawler(browser, location):
  19. url = 'https://www.google.com.tw/maps/place?q={}'.format(location)
  20. browser.get(url)
  21. soup = BeautifulSoup(browser.page_source, 'html.parser')
  22. #soup = BeautifulSoup(response.text, "html.parser")
  23. text = soup.prettify() #text 包含了html的內容
  24. initial_pos = text.find(";window.APP_INITIALIZATION_STATE")
  25. #尋找;window.APP_INITIALIZATION_STATE所在位置
  26. data = text[initial_pos+36:initial_pos+85] #將其後的參數進行存取
  27. num_data = STR_to_NUM(data)
  28. return num_data
  29. def main():
  30. location_list = pd.read_csv('location_list.csv')
  31. print('start brower...')
  32. browser = brower_start()
  33. result = []
  34. print('start brower...')
  35. for key, group in location_list.iterrows():
  36. print(key)
  37. location = group['location']
  38. num_data = crawler(browser, location)
  39. result += [[location, num_data[0], num_data[1]]]
  40. pd.DataFrame(result, columns=['location', 'latitude', 'longitude']).to_csv('lat_long_location.csv')
  41. if __name__ == "__main__":
  42. main()