feature_snippets.py 1.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869
  1. from selenium import webdriver
  2. from selenium.webdriver.chrome.service import Service
  3. from selenium.webdriver.common.by import By
  4. from setting import rua
  5. import urllib
  6. import time
  7. import csv
  8. from random import randint
  9. import dataset
  10. import pandas as pd
  11. import pymysql
  12. def restart_browser():
  13. s = Service('/Users/zooeytsai/Downloads/chromedriver 4')
  14. options = webdriver.ChromeOptions()
  15. options.add_argument("user-agent=%s" % rua())
  16. options.add_argument('--headless')
  17. options.add_argument("--incognito")
  18. driver = webdriver.Chrome(options=options, service=s)
  19. str1 = driver.capabilities['chrome']['chromedriverVersion'].split(' ')[0]
  20. print('這裡',str1)
  21. driver.delete_all_cookies()
  22. driver.set_window_size(950, 20000)
  23. return driver
  24. def read_csv():
  25. lst = []
  26. df = pd.read_csv('/Users/zooeytsai/Documents/幸福空間FAQ關鍵字.csv')
  27. for i, row in df.iterrows():
  28. lst.append(row['熱門查詢項目'])
  29. return lst
  30. lst = read_csv()
  31. for term in lst:
  32. print(term)
  33. driver = restart_browser()
  34. escaped_search_term = urllib.parse.quote(term)
  35. url = 'https://mops.twse.com.tw/mops/web/t135sb03'
  36. driver.get(url)
  37. time.sleep(6)
  38. df = pd.DataFrame()
  39. elmts = driver.find_elements(By.XPATH, "/html/body/div[7]/div/div[10]/div/div[2]/div[2]/div/div/div[1]/block-component/div/div[1]/div/div/div/div/div[1]/div/div/div/div/div/div[2]/div/div/div[1]/a")
  40. datalist = []
  41. print(len(elmts))
  42. for elmt in elmts:
  43. href = elmt.get_attribute('href')
  44. txt = elmt.text
  45. print(txt)
  46. datalist.append([term,elmt.text,href])
  47. with open('/Users/zooeytsai/Documents/幸福空間FAQ統計.csv','a') as f:
  48. writer = csv.writer(f)
  49. for i in datalist:
  50. print(i)
  51. writer.writerow(i)
  52. driver.quit()
  53. print('中場休息')
  54. time.sleep(randint(45, 50))