from random import randint import sys import os import dataset from selenium import webdriver import traceback import datetime import codecs import time import urllib import argparse import schedule from selenium.webdriver.chrome.service import Service from selenium.webdriver.common.by import By from selenium.webdriver.common.keys import Keys import logging import sys from logging.handlers import SysLogHandler import socket import pandas as pd import pymysql pymysql.install_as_MySQLdb() _LOG_SERVER = ('hhh.ptt.cx', 514) logger = logging.getLogger('clickbot_100') handler1 = SysLogHandler(address=_LOG_SERVER, socktype=socket.SOCK_DGRAM) logger.addHandler(handler1) logger.debug('[clickbot_100][火柴星人]begin') path = 'C:\portable\chromedriver' path_z = '/Users/zooeytsai/Downloads/chromedriver 4' def restart_browser(pport): s = Service('/root/driver/chromedriver') options = webdriver.ChromeOptions() options.add_argument('--headless') options.add_argument('--remote-debugging-port=9222') options.add_experimental_option("debuggerAddress", f"127.0.0.1:{pport}") options.add_argument("--user-agent=" + "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:76.0) Gecko/20100101 Firefox/76.0") options.add_argument("--incognito") driver = webdriver.Chrome(options=options, service=s) driver.delete_all_cookies() driver.set_window_size(950, 20000) return driver def process_one(pport): db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4') lst = ['好睡王 引新聞','好睡王 Yahoo','好睡王 HiNet','好睡王 PCHOME','好睡王 蕃新聞','好睡王 新浪','好睡王 台北郵報','好睡王 LIFE','好睡王 match生活網','好睡王 POPDAILY','好睡王 LINE TODAY'] table = db['news_log'] for term in lst: print(term) logger.debug('[clickbot_100][' + term + ']') driver = restart_browser(pport) googleurl = 'https://www.google.com/?num=30' driver.get(googleurl) time.sleep(6) send_kw_elmt = driver.find_element(By.XPATH,'/html/body/div[1]/div[3]/form/div[1]/div[1]/div[1]/div/div[2]/input') send_kw_elmt.send_keys(term) time.sleep(3) send_kw_elmt.send_keys(Keys.ENTER) time.sleep(6) elmts = driver.find_elements(By.XPATH, "//div[@class='yuRUbf']/a") idx = 1 print(len(elmts)) for elmt in elmts: href = elmt.get_attribute('href') txt = elmt.text print(txt) if len(txt) > 10: if '炎炎夏日易輾轉難眠' in txt: print("ranking", idx) table.insert({'kw': term, 'ranking': idx, 'title': txt, 'url': href, 'dt': datetime.datetime.now()}) print('clicked....') webdriver.ActionChains(driver).move_to_element(elmt).perform() webdriver.ActionChains(driver).move_to_element(elmt).click().perform() time.sleep(5) break idx += 1 driver.quit() print('中場休息') time.sleep(randint(90, 120)) db.close() while True: try: os.system('docker container restart tiny4') time.sleep(1) break except: os.system('docker container restart tiny4') time.sleep(15) process_one('9925') # parser = argparse.ArgumentParser() # parser.add_argument('--loop', action="store_true") # args = parser.parse_args() # if args.loop: # schedule.every(0.4).minutes.do(process_one) # # print('今天開始') # # schedule.every().day.at('9:30').do(process_one) # # while True: # schedule.run_pending() # time.sleep(1) # >> C:\tmp\seo_line.txt 2>&1