from random import randint import sys import os import dataset from selenium import webdriver import traceback import datetime import codecs import time import urllib import argparse from selenium.webdriver.chrome.service import Service from selenium.webdriver.common.by import By from selenium.webdriver.common.keys import Keys import logging import sys from logging.handlers import SysLogHandler import socket import pandas as pd _LOG_SERVER = ('hhh.ptt.cx', 514) logger = logging.getLogger('clickbot_100') handler1 = SysLogHandler(address=_LOG_SERVER, socktype=socket.SOCK_DGRAM) logger.addHandler(handler1) logger.debug('[clickbot_100][火柴星人]begin') def restart_browser(pport): while True: try: os.system('docker container restart tiny4') time.sleep(1) break except: os.system('docker container restart tiny4') time.sleep(15) s = Service('/root/driver/chromedriver') options = webdriver.ChromeOptions() options.add_argument('--headless') options.add_argument('--remote-debugging-port=9222') options.add_experimental_option("debuggerAddress", f"127.0.0.1:{pport}") options.add_argument( "--user-agent=" + "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:76.0) Gecko/20100101 Firefox/76.0") options.add_argument("--incognito") driver = webdriver.Chrome(options=options, service=s) driver.delete_all_cookies() driver.set_window_size(950, 20000) return driver def process_one(pport): db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4') lst = ['裝潢預算','工程費','丈量費','裝修預算'] table = db['general_log'] main_url = 'https://hhh.com.tw/columns/detail/2094/' for term in lst: print(term) logger.debug('[clickbot_100][' + term + ']') driver = restart_browser(pport) googleurl = 'https://www.google.com/?num=40' driver.get(googleurl) time.sleep(6) send_kw_elmt = driver.find_element(By.XPATH,'/html/body/div[1]/div[3]/form/div[1]/div[1]/div[1]/div/div[2]/input') send_kw_elmt.send_keys(term) time.sleep(3) send_kw_elmt.send_keys(Keys.ENTER) time.sleep(6) elmts = driver.find_elements(By.XPATH, "//div[@class='yuRUbf']/a") idx = 1 print(len(elmts)) for elmt in elmts: href = elmt.get_attribute('href') txt = elmt.text # print(txt) if len(txt) > 10: if href == main_url: print("ranking", idx) table.insert({'kw': term, 'domain':'hhh.com.tw','ranking': idx, 'title': txt, 'url': href,'dt': datetime.datetime.now()}) print('clicked....') webdriver.ActionChains(driver).move_to_element(elmt).perform() webdriver.ActionChains(driver).move_to_element(elmt).click().perform() time.sleep(5) break idx += 1 driver.quit() print('中場休息') time.sleep(randint(20,30)) db.close() process_one('9927') # parser = argparse.ArgumentParser() # parser.add_argument('--loop', action="store_true") # args = parser.parse_args() # if args.loop: # schedule.every(0.4).minutes.do(process_one) # # print('今天開始') # # schedule.every().day.at('9:30').do(process_one) # # while True: # schedule.run_pending() # time.sleep(1) # >> C:\tmp\seo_line.txt 2>&1