from random import randint import sys import dataset from selenium import webdriver import traceback import datetime import codecs import time import urllib import argparse import logging import sys from logging.handlers import SysLogHandler import socket import pandas as pd #import pymysql #pymysql.install_as_MySQLdb() import random from selenium.webdriver.common.by import By from selenium.webdriver.chrome.service import Service from selenium.webdriver.support.ui import WebDriverWait import os import fire path = 'C:\portable\chromedriver' path_z = '/Users/zooeytsai/Downloads/chromedriver 2' driver = None db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4') lst = [] table = db['google_rank'] def rua(): pool = [ "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:73.0) Gecko/20100101 Firefox/73.0", "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:76.0) Gecko/20100101 Firefox/76.0", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36" "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.101 Safari/537.36", "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36 OPR/68.0.3618.125", ] return random.choice(pool) def process_one(item): global driver term = item[0] domain = item[1] print(term, domain) escaped_search_term = urllib.parse.quote(term) googleurl = 'https://www.google.com/search?q={}&num={}&hl={}'.format(escaped_search_term, 100, 'zh-TW') print(googleurl) driver.get(googleurl) time.sleep(6) # fname=term.replace(' ','_') # driver.save_screenshot('c:/tmp/seo/'+fname+'.png') # df=pd.DataFrame() elmts = driver.find_elements(By.XPATH,"//div[@class='yuRUbf']/a") cnt = 1 clickelmt=None datadict = {'搜尋詞': [], '結果標題': [], '結果網址': [], '結果名次': []} print('搜尋結果',len(elmts)) for elmt in elmts: try: href = elmt.get_attribute('href') datadict['搜尋詞'].append(term) datadict['結果標題'].append(elmt.text) datadict['結果網址'].append(href) datadict['結果名次'].append(str(cnt)) if domain in href: clickelmt = elmt print(href) print(elmt.text) table.insert( {'kw': term, 'domain': domain, 'ranking': cnt, 'title': elmt.text, 'url': href,'dt': datetime.datetime.now()}) # webdriver.ActionChains(driver).move_to_element(elmt).perform() # webdriver.ActionChains(driver).move_to_element(elmt).click().perform() cnt += 1 except: print('href2 exception') traceback.print_exc() if clickelmt: webdriver.ActionChains(driver).move_to_element(clickelmt).perform() webdriver.ActionChains(driver).move_to_element(clickelmt).click().perform() time.sleep(5) print('點擊完成') if len(datadict['結果標題']) <= 0: print('None') driver.quit() sys.exit() # df['搜尋詞']=datadict['搜尋詞'] # df['結果標題']=datadict['結果標題'] # df['結果網址']=datadict['結果網址'] # df['結果名次']=datadict['結果名次'] # # df.to_excel('/Users/zooeytsai/'+fname+".xls") driver.quit() print('結束') def run_once(q): global driver result = [] s = Service('/root/driver/chromedriver') user_agent = rua() options = webdriver.ChromeOptions() options.add_argument('--headless') options.add_argument('--remote-debugging-port=9222') options.add_experimental_option("debuggerAddress", f"127.0.0.1:{q[2]}") options.add_argument("--user-agent=" + user_agent) options.add_argument("--incognito") driver = webdriver.Chrome(options=options, service=s) driver.delete_all_cookies() driver.set_window_size(1400, 1000) process_one(q) time.sleep(3) driver.quit() class JParams(object): def get(self, kw, domain, port): print(kw) print(domain) run_once((kw, domain, port)) if __name__ == '__main__': fire.Fire(JParams)