feature_snippets.py 2.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869
  1. from selenium import webdriver
  2. from selenium.webdriver.chrome.service import Service
  3. from selenium.webdriver.common.by import By
  4. import urllib
  5. import time
  6. import csv
  7. from random import randint
  8. import dataset
  9. import pandas as pd
  10. import pymysql
  11. def restart_browser():
  12. s = Service('/Users/zooeytsai/Downloads/chromedriver 4')
  13. options = webdriver.ChromeOptions()
  14. # options.add_argument("user-agent=%s" % rua())
  15. options.add_argument('--headless')
  16. options.add_argument("--incognito")
  17. driver = webdriver.Chrome(options=options, service=s)
  18. str1 = driver.capabilities['chrome']['chromedriverVersion'].split(' ')[0]
  19. print('這裡',str1)
  20. driver.delete_all_cookies()
  21. driver.set_window_size(950, 20000)
  22. return driver
  23. def read_csv():
  24. lst = []
  25. df = pd.read_csv('/Users/zooeytsai/Documents/幸福空間FAQ關鍵字.csv')
  26. for i, row in df.iterrows():
  27. lst.append(row['熱門查詢項目'])
  28. return lst
  29. lst = read_csv()
  30. for term in lst:
  31. print(term)
  32. driver = restart_browser()
  33. escaped_search_term = urllib.parse.quote(term)
  34. googleurl = 'https://www.google.com/search?q={}&num={}&hl={}'.format(escaped_search_term, 20, 'zh-TW')
  35. driver.get(googleurl)
  36. print(driver.current_url)
  37. time.sleep(6)
  38. # df = pd.DataFrame()
  39. elmts = driver.find_elements(By.XPATH,
  40. '/html/body/div[7]/div/div[10]/div/div[2]/div[2]/div/div/div[1]/div/block-component/div/div[1]/div/div/div/div/div[1]/div/div/div/div/div/div[2]/div/div/div[1]/a')
  41. datalist = []
  42. print(len(elmts))
  43. for elmt in elmts:
  44. href = elmt.get_attribute('href')
  45. txt = elmt.text
  46. print(txt)
  47. datalist.append([term, elmt.text, href])
  48. with open('/Users/zooeytsai/Documents/幸福空間FAQ統計2.csv', 'a') as f:
  49. writer = csv.writer(f)
  50. for i in datalist:
  51. print(i)
  52. writer.writerow(i)
  53. driver.quit()
  54. print('中場休息')
  55. time.sleep(randint(45, 50))