import time from selenium import webdriver from selenium.webdriver.common.desired_capabilities import DesiredCapabilities import time import os import urllib.parse from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.common.by import By from selenium.webdriver.chrome.service import Service from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.common.keys import Keys import codecs import random import datetime import dataset import time import traceback import sys import random import socket import requests db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4') table=db['general_log'] driver = None headers = { "Authorization": "Bearer " + "6SDULL1Ebklduc6TFxa97AFto5Sj21kyJ30CxiLiSoi", "Content-Type": "application/x-www-form-urlencoded" } def scrolling(driver,pgnum): ub = driver.find_element_by_css_selector('body') for i in range(pgnum): ub.send_keys(Keys.PAGE_DOWN) if pgnum>1: time.sleep(0.3) def rua(): pool = [ "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:73.0) Gecko/20100101 Firefox/73.0", "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:76.0) Gecko/20100101 Firefox/76.0", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.101 Safari/537.36", "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36 OPR/68.0.3618.125", ] return random.choice(pool) def send_msg(kw): hname=socket.gethostname() params = {"message": hname+": "+kw} r = requests.post("https://notify-api.line.me/api/notify",headers=headers, params=params) def empty_query(q): global driver googleurl='https://www.google.com/search?q='+urllib.parse.quote(q) driver.get(googleurl) time.sleep(3) def process_query(qs): q=qs[0] domain=qs[1] global driver # googleurl = 'https://www.google.com/search?q={}&num={}&hl={}'.format(urllib.parse.quote(q), 100,'zh-TW') googleurl = 'https://www.google.com/' print(googleurl) driver.get(googleurl) time.sleep(6) if 'sorry' in driver.current_url: print('sorry...............') return None try: elmt = driver.find_element(By.XPATH, "//button[@id='L2AGLb']") if elmt: elmt.click() except: print('exception') try: # elmt = driver.find_element(By.XPATH, "//input[@aria-label='搜尋']") elmt = driver.find_element(By.XPATH, "//input[@class='gLFyf gsfi']") if elmt: time.sleep(1) elmt.send_keys(q) time.sleep(6) elmt.send_keys(Keys.ENTER) except: print(elmt) time.sleep(6) while True: try: elmts = driver.find_elements(By.XPATH, "//div[@class='yuRUbf']/a") print('尋找') time.sleep(2) break except: pass idx=1 ranking=-1 print('搜尋結果數量',len(elmts)) if len(elmts) <=0: send_msg('network failed...') for elmt in elmts: href=elmt.get_attribute('href') txt=elmt.text if len(txt)>10: if domain in href: print('clicked....') print(href) print(txt) print("ranking", idx) driver.execute_script("return document.body.scrollHeight") driver.execute_script("window.scrollTo(0, document.body.scrollHeight);") elmt.click() table.insert({'kw':q,'domain':domain,'ranking':idx,'title':txt,'url':href,'dt':datetime.datetime.now(),'num':1}) # webdriver.ActionChains(driver).move_to_element(elmt).click().perform() print('clicked') time.sleep(12) break idx+=1 return "ok" def run_once(q): global driver result=[] os.system('docker container restart proxy1') time.sleep(12) # s = Service('/root/driver/chromedriver') # s = Service('/Users/zooeytsai/Downloads/chromedriver 2') user_agent = rua() options = webdriver.ChromeOptions() options.add_argument('--headless') options.add_argument("--no-sandbox") options.add_argument("--disable-dev-shm-usage") ### options.add_argument("--proxy-server=socks5://192.168.192.156:1080") options.add_argument("--proxy-server=socks5://127.0.0.1:9050") # options.add_argument("start-maximized") # options.add_argument('--remote-debugging-port='+str(q[2])) # options.add_argument('--remote-debugging-port=9222') # options.add_argument("--user-agent=" +user_agent) options.add_argument("--incognito") # driver = webdriver.Chrome(options=options,service=s) print('before init') # driver = webdriver.Chrome(options=options) profile = webdriver.FirefoxProfile() profile.set_preference("network.proxy.type", 1) profile.set_preference("network.proxy.socks", "127.0.0.1") profile.set_preference("network.proxy.socks_port", 9050) profile.set_preference("network.proxy.socks_version", 5) profile.update_preferences() options = webdriver.FirefoxOptions() options.add_argument('--headless') driver = webdriver.Firefox(firefox_profile=profile,options=options) print('after init') driver.delete_all_cookies() driver.set_window_size(1400,1000) # driver.set_window_size(900, 3000) print('到此') data=process_query(q) if data is not None: time.sleep(3) driver.quit() sys.exit() cursor = db.query('SELECT query FROM seo.hhh_gsc_imp where position >=2.5 and position <=8.5 order by rand() limit 1') query=None for c in cursor: query=c['query'] print(query) break run_once((query,'hhh.com.tw'))