123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869 |
- from selenium import webdriver
- from selenium.webdriver.chrome.service import Service
- from selenium.webdriver.common.by import By
- import urllib
- import time
- import csv
- from random import randint
- import dataset
- import pandas as pd
- import pymysql
- def restart_browser():
- s = Service('/Users/zooeytsai/Downloads/chromedriver 4')
- options = webdriver.ChromeOptions()
- # options.add_argument("user-agent=%s" % rua())
- options.add_argument('--headless')
- options.add_argument("--incognito")
- driver = webdriver.Chrome(options=options, service=s)
- str1 = driver.capabilities['chrome']['chromedriverVersion'].split(' ')[0]
- print('這裡',str1)
- driver.delete_all_cookies()
- driver.set_window_size(950, 20000)
- return driver
- def read_csv():
- lst = []
- df = pd.read_csv('/Users/zooeytsai/Documents/幸福空間FAQ關鍵字.csv')
- for i, row in df.iterrows():
- lst.append(row['熱門查詢項目'])
- return lst
- lst = read_csv()
- for term in lst:
- print(term)
- driver = restart_browser()
- escaped_search_term = urllib.parse.quote(term)
- googleurl = 'https://www.google.com/search?q={}&num={}&hl={}'.format(escaped_search_term, 20, 'zh-TW')
-
- driver.get(googleurl)
- print(driver.current_url)
- time.sleep(6)
-
- # df = pd.DataFrame()
-
- elmts = driver.find_elements(By.XPATH,
- '/html/body/div[7]/div/div[10]/div/div[2]/div[2]/div/div/div[1]/div/block-component/div/div[1]/div/div/div/div/div[1]/div/div/div/div/div/div[2]/div/div/div[1]/a')
-
- datalist = []
- print(len(elmts))
- for elmt in elmts:
- href = elmt.get_attribute('href')
- txt = elmt.text
- print(txt)
-
- datalist.append([term, elmt.text, href])
-
- with open('/Users/zooeytsai/Documents/幸福空間FAQ統計2.csv', 'a') as f:
- writer = csv.writer(f)
- for i in datalist:
- print(i)
- writer.writerow(i)
-
- driver.quit()
-
- print('中場休息')
- time.sleep(randint(45, 50))
|