zooeytsai %!s(int64=2) %!d(string=hai) anos
pai
achega
765e00bbc2
Modificáronse 1 ficheiros con 69 adicións e 0 borrados
  1. 69 0
      SEO/feature_snippets.py

+ 69 - 0
SEO/feature_snippets.py

@@ -0,0 +1,69 @@
+from selenium import webdriver
+from selenium.webdriver.chrome.service import Service
+from selenium.webdriver.common.by import By
+from setting import rua
+import urllib
+import time
+import csv
+from random import randint
+import dataset
+import pandas as pd
+import pymysql
+
+
+def restart_browser():
+    s = Service('/Users/zooeytsai/Downloads/chromedriver 4')
+    options = webdriver.ChromeOptions()
+    options.add_argument("user-agent=%s" % rua())
+    options.add_argument('--headless')
+    options.add_argument("--incognito")
+    driver = webdriver.Chrome(options=options, service=s)
+    str1 = driver.capabilities['chrome']['chromedriverVersion'].split(' ')[0]
+    print('這裡',str1)
+    driver.delete_all_cookies()
+    driver.set_window_size(950, 20000)
+    return driver
+
+
+
+def read_csv():
+    lst = []
+    df = pd.read_csv('/Users/zooeytsai/Documents/幸福空間FAQ關鍵字.csv')
+    for i, row in df.iterrows():
+        lst.append(row['熱門查詢項目'])
+    return lst
+
+lst = read_csv()
+for term in lst:
+    print(term)
+    driver = restart_browser()
+    escaped_search_term = urllib.parse.quote(term)
+    url = 'https://mops.twse.com.tw/mops/web/t135sb03'
+    driver.get(url)
+    time.sleep(6)
+
+    df = pd.DataFrame()
+    
+    elmts = driver.find_elements(By.XPATH, "/html/body/div[7]/div/div[10]/div/div[2]/div[2]/div/div/div[1]/block-component/div/div[1]/div/div/div/div/div[1]/div/div/div/div/div/div[2]/div/div/div[1]/a")
+
+    datalist = []
+    print(len(elmts))
+    for elmt in elmts:
+        href = elmt.get_attribute('href')
+        txt = elmt.text
+        print(txt)
+        
+        datalist.append([term,elmt.text,href])
+
+
+    with open('/Users/zooeytsai/Documents/幸福空間FAQ統計.csv','a') as f:
+        writer = csv.writer(f)
+        for i in datalist:
+            print(i)
+            writer.writerow(i)
+
+    
+    driver.quit()
+    
+    print('中場休息')
+    time.sleep(randint(45, 50))