|
@@ -1,7 +1,6 @@
|
|
from selenium import webdriver
|
|
from selenium import webdriver
|
|
from selenium.webdriver.chrome.service import Service
|
|
from selenium.webdriver.chrome.service import Service
|
|
from selenium.webdriver.common.by import By
|
|
from selenium.webdriver.common.by import By
|
|
-from setting import rua
|
|
|
|
import urllib
|
|
import urllib
|
|
import time
|
|
import time
|
|
import csv
|
|
import csv
|
|
@@ -14,7 +13,7 @@ import pymysql
|
|
def restart_browser():
|
|
def restart_browser():
|
|
s = Service('/Users/zooeytsai/Downloads/chromedriver 4')
|
|
s = Service('/Users/zooeytsai/Downloads/chromedriver 4')
|
|
options = webdriver.ChromeOptions()
|
|
options = webdriver.ChromeOptions()
|
|
- options.add_argument("user-agent=%s" % rua())
|
|
|
|
|
|
+ # options.add_argument("user-agent=%s" % rua())
|
|
options.add_argument('--headless')
|
|
options.add_argument('--headless')
|
|
options.add_argument("--incognito")
|
|
options.add_argument("--incognito")
|
|
driver = webdriver.Chrome(options=options, service=s)
|
|
driver = webdriver.Chrome(options=options, service=s)
|
|
@@ -38,14 +37,17 @@ for term in lst:
|
|
print(term)
|
|
print(term)
|
|
driver = restart_browser()
|
|
driver = restart_browser()
|
|
escaped_search_term = urllib.parse.quote(term)
|
|
escaped_search_term = urllib.parse.quote(term)
|
|
- url = 'https://mops.twse.com.tw/mops/web/t135sb03'
|
|
|
|
- driver.get(url)
|
|
|
|
|
|
+ googleurl = 'https://www.google.com/search?q={}&num={}&hl={}'.format(escaped_search_term, 20, 'zh-TW')
|
|
|
|
+
|
|
|
|
+ driver.get(googleurl)
|
|
|
|
+ print(driver.current_url)
|
|
time.sleep(6)
|
|
time.sleep(6)
|
|
-
|
|
|
|
- df = pd.DataFrame()
|
|
|
|
|
|
|
|
- elmts = driver.find_elements(By.XPATH, "/html/body/div[7]/div/div[10]/div/div[2]/div[2]/div/div/div[1]/block-component/div/div[1]/div/div/div/div/div[1]/div/div/div/div/div/div[2]/div/div/div[1]/a")
|
|
|
|
-
|
|
|
|
|
|
+ # df = pd.DataFrame()
|
|
|
|
+
|
|
|
|
+ elmts = driver.find_elements(By.XPATH,
|
|
|
|
+ '/html/body/div[7]/div/div[10]/div/div[2]/div[2]/div/div/div[1]/div/block-component/div/div[1]/div/div/div/div/div[1]/div/div/div/div/div/div[2]/div/div/div[1]/a')
|
|
|
|
+
|
|
datalist = []
|
|
datalist = []
|
|
print(len(elmts))
|
|
print(len(elmts))
|
|
for elmt in elmts:
|
|
for elmt in elmts:
|
|
@@ -53,15 +55,13 @@ for term in lst:
|
|
txt = elmt.text
|
|
txt = elmt.text
|
|
print(txt)
|
|
print(txt)
|
|
|
|
|
|
- datalist.append([term,elmt.text,href])
|
|
|
|
-
|
|
|
|
-
|
|
|
|
- with open('/Users/zooeytsai/Documents/幸福空間FAQ統計.csv','a') as f:
|
|
|
|
|
|
+ datalist.append([term, elmt.text, href])
|
|
|
|
+
|
|
|
|
+ with open('/Users/zooeytsai/Documents/幸福空間FAQ統計2.csv', 'a') as f:
|
|
writer = csv.writer(f)
|
|
writer = csv.writer(f)
|
|
for i in datalist:
|
|
for i in datalist:
|
|
print(i)
|
|
print(i)
|
|
writer.writerow(i)
|
|
writer.writerow(i)
|
|
-
|
|
|
|
|
|
|
|
driver.quit()
|
|
driver.quit()
|
|
|
|
|