|
@@ -0,0 +1,69 @@
|
|
|
+from selenium import webdriver
|
|
|
+from selenium.webdriver.chrome.service import Service
|
|
|
+from selenium.webdriver.common.by import By
|
|
|
+from setting import rua
|
|
|
+import urllib
|
|
|
+import time
|
|
|
+import csv
|
|
|
+from random import randint
|
|
|
+import dataset
|
|
|
+import pandas as pd
|
|
|
+import pymysql
|
|
|
+
|
|
|
+
|
|
|
+def restart_browser():
|
|
|
+ s = Service('/Users/zooeytsai/Downloads/chromedriver 4')
|
|
|
+ options = webdriver.ChromeOptions()
|
|
|
+ options.add_argument("user-agent=%s" % rua())
|
|
|
+ options.add_argument('--headless')
|
|
|
+ options.add_argument("--incognito")
|
|
|
+ driver = webdriver.Chrome(options=options, service=s)
|
|
|
+ str1 = driver.capabilities['chrome']['chromedriverVersion'].split(' ')[0]
|
|
|
+ print('這裡',str1)
|
|
|
+ driver.delete_all_cookies()
|
|
|
+ driver.set_window_size(950, 20000)
|
|
|
+ return driver
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+def read_csv():
|
|
|
+ lst = []
|
|
|
+ df = pd.read_csv('/Users/zooeytsai/Documents/幸福空間FAQ關鍵字.csv')
|
|
|
+ for i, row in df.iterrows():
|
|
|
+ lst.append(row['熱門查詢項目'])
|
|
|
+ return lst
|
|
|
+
|
|
|
+lst = read_csv()
|
|
|
+for term in lst:
|
|
|
+ print(term)
|
|
|
+ driver = restart_browser()
|
|
|
+ escaped_search_term = urllib.parse.quote(term)
|
|
|
+ url = 'https://mops.twse.com.tw/mops/web/t135sb03'
|
|
|
+ driver.get(url)
|
|
|
+ time.sleep(6)
|
|
|
+
|
|
|
+ df = pd.DataFrame()
|
|
|
+
|
|
|
+ elmts = driver.find_elements(By.XPATH, "/html/body/div[7]/div/div[10]/div/div[2]/div[2]/div/div/div[1]/block-component/div/div[1]/div/div/div/div/div[1]/div/div/div/div/div/div[2]/div/div/div[1]/a")
|
|
|
+
|
|
|
+ datalist = []
|
|
|
+ print(len(elmts))
|
|
|
+ for elmt in elmts:
|
|
|
+ href = elmt.get_attribute('href')
|
|
|
+ txt = elmt.text
|
|
|
+ print(txt)
|
|
|
+
|
|
|
+ datalist.append([term,elmt.text,href])
|
|
|
+
|
|
|
+
|
|
|
+ with open('/Users/zooeytsai/Documents/幸福空間FAQ統計.csv','a') as f:
|
|
|
+ writer = csv.writer(f)
|
|
|
+ for i in datalist:
|
|
|
+ print(i)
|
|
|
+ writer.writerow(i)
|
|
|
+
|
|
|
+
|
|
|
+ driver.quit()
|
|
|
+
|
|
|
+ print('中場休息')
|
|
|
+ time.sleep(randint(45, 50))
|