import time import json from selenium import webdriver from selenium.webdriver.common.desired_capabilities import DesiredCapabilities import time import os import urllib.parse from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.common.by import By from selenium.webdriver.chrome.service import Service from selenium.webdriver.common.keys import Keys from selenium.webdriver.support import expected_conditions as EC import codecs import random import requests import datetime import dataset import time import traceback import sys import fire #import pymysql #pymysql.install_as_MySQLdb() driver = None def rua(): pool = [ "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:73.0) Gecko/20100101 Firefox/73.0", "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:76.0) Gecko/20100101 Firefox/76.0", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.101 Safari/537.36", "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36 OPR/68.0.3618.125", ] return random.choice(pool) def empty_query(q): global driver googleurl='https://www.google.com/search?q='+urllib.parse.quote(q) driver.get(googleurl) time.sleep(3) def process_query(qs): db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4') table=db['sns_log'] q=qs[0] url=qs[1] client=qs[2] domain=qs[3] global driver googleurl = 'https://www.google.com/?num=100' driver.get(googleurl) time.sleep(6) send_kw_elmt = driver.find_element(By.XPATH, '/html/body/div[1]/div[3]/form/div[1]/div[1]/div[1]/div/div[2]/input') send_kw_elmt.send_keys(q) time.sleep(3) send_kw_elmt.send_keys(Keys.ENTER) time.sleep(6) time.sleep(10) elmts = driver.find_elements(By.XPATH,"//div[@class='yuRUbf']/a") print('網頁數量',len(elmts)) idx = 1 for elmt in elmts: href=elmt.get_attribute('href') txt=elmt.text if len(txt)>10: if href == url: print('clicked....') print(href) print(txt) print("ranking", idx) table.insert({'kw':q,'client':client,'ranking':idx,'title':txt,'url':href,'dt':datetime.datetime.now(),'domain':domain}) webdriver.ActionChains(driver).move_to_element(elmt).perform() webdriver.ActionChains(driver).move_to_element(elmt).click().perform() time.sleep(5) break idx+=1 db.close() def run_once(q): global driver s = Service('/root/driver/chromedriver') user_agent = rua() options = webdriver.ChromeOptions() options.add_argument('--headless') options.add_argument('--remote-debugging-port=9222') options.add_experimental_option("debuggerAddress", f"127.0.0.1:{q[3]}") options.add_argument("--user-agent=" +user_agent) options.add_argument("--incognito") driver = webdriver.Chrome( options=options,service=s) driver.delete_all_cookies() driver.set_window_size(1400,1000) process_query(q) time.sleep(3) driver.quit() class JParams(object): def get(self, kw,url,client,port,domain): print('關鍵字',kw) run_once( (kw,url,client,port,domain) ) if __name__ == '__main__': fire.Fire(JParams)