import time import json from selenium import webdriver from selenium.webdriver.common.desired_capabilities import DesiredCapabilities import time import os import urllib.parse from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.common.by import By from selenium.webdriver.chrome.service import Service from selenium.webdriver.support import expected_conditions as EC import codecs import random import requests import datetime import dataset import time import traceback import sys import fire import pymysql pymysql.install_as_MySQLdb() driver = None def empty_query(q): global driver googleurl = 'https://www.google.com/search?q=' + urllib.parse.quote(q) driver.get(googleurl) time.sleep(3) def process_query(qs): db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4') table = db['google_rank'] q = qs global driver googleurl = 'https://www.google.com/search?q={}&num={}&hl={}'.format(urllib.parse.quote(q), 100, 'zh-TW') driver.get(googleurl) print(driver.current_url) time.sleep(6) elmts = driver.find_elements(By.XPATH, "//div[@class='yuRUbf']/a") idx = 1 ranking = -1 print(len(elmts)) # driver.save_screenshot('C:\/Users\/s1301\/Pictures\/Saved Pictures\/') for elmt in elmts: href = elmt.get_attribute('href') txt = elmt.text table.insert({'title': elmt.text, 'url': href, 'keyword': q, 'dt': datetime.datetime.now(), 'ranking': idx}) idx += 1 db.close() def run_once(q): global driver result = [] user_agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36' s = Service('C:\/Users\/s1301\/Downloads\/chromedriver_107\/chromedriver') options = webdriver.ChromeOptions() options.add_argument('--headless') # options.add_argument('--remote-debugging-port=9222') # options.add_experimental_option("debuggerAddress", "192.168.192.45:9922") options.add_argument("--user-agent=" +user_agent) options.add_argument("--incognito") driver = webdriver.Chrome( options=options, service=s) str1 = driver.capabilities['browserVersion'] print('版本', str1) driver.delete_all_cookies() driver.set_window_size(1400, 1000) print(q) process_query(q) time.sleep(3) driver.quit() # for c in lst:ㄕ # while True: # try: # c=random.choice(lst) # except: # traceback.print_exc() # sleepint=random.randint(320,520) # time.sleep(sleepint) lst = ['信義房仲','信義 房屋','信義 房仲','雙響泡'] for i in lst: run_once(i)