import time import json from selenium import webdriver from selenium.webdriver.common.desired_capabilities import DesiredCapabilities import time import os import urllib.parse from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.common.by import By from selenium.webdriver.support import expected_conditions as EC import codecs import random import requests import dataset import time import traceback import sys driver=None headers = { "Authorization": "Bearer " + "t35vhZtWNgvDNWHc3DJh0OKll3mcB9GvC8K2EAkBug2", "Content-Type": "application/x-www-form-urlencoded" } def send_msg(kw): params = {"message": "處理關鍵字: "+kw} r = requests.post("https://notify-api.line.me/api/notify",headers=headers, params=params) def empty_query(q): global driver googleurl='https://www.google.com/search?q='+urllib.parse.quote(q) driver.get(googleurl) time.sleep(3) def process_query(qs): q=qs[0] domain=qs[1] global driver googleurl = 'https://www.google.com/search?q={}&num={}&hl={}'.format(urllib.parse.quote(q), 100,'zh-TW') print(googleurl) driver.get(googleurl) time.sleep(6) elmts=driver.find_elements_by_xpath("//div[@class='yuRUbf']/a") idx=1 ranking=-1 print(len(elmts)) # driver.save_screenshot('c:/tmp/test.png') for elmt in elmts: href=elmt.get_attribute('href') txt=elmt.text if len(txt)>10: if domain in href: print('clicked....') print(href) print(txt) webdriver.ActionChains(driver).move_to_element(elmt).perform() webdriver.ActionChains(driver).move_to_element(elmt).click().perform() break def run_once(q): global driver result=[] options = webdriver.ChromeOptions() options.add_argument('--headless') # options.add_argument("--user-agent=" +user_agent) options.add_argument("--incognito") driver = webdriver.Chrome( options=options) driver.delete_all_cookies() driver.set_window_size(1400,1000) print(q) process_query(q) time.sleep(3) driver.quit() #lst=[{'kw':'幸福空間','domain':'hhh.com.tw','page':0}] lst=[] db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4') cursor=db.query('select term,domain from selected_kw order by rand()') for c in cursor: lst.append(c) #for c in lst: while True: try: c=random.choice(lst) run_once( (c['term'],c['domain']) ) except: traceback.print_exc() sleepint=random.randint(320,520) time.sleep(sleepint)