import random import sys import dataset from selenium import webdriver import traceback import datetime import codecs import time import urllib import argparse import schedule import logging import sys from logging.handlers import SysLogHandler import socket _LOG_SERVER = ('hhh.ptt.cx', 514) logger = logging.getLogger('clickbot_100') handler1 = SysLogHandler(address=_LOG_SERVER,socktype=socket.SOCK_DGRAM) logger.addHandler(handler1) term='programming running..' logger.debug('[clickbot_100]['+term+']') def restart_browser(): options = webdriver.ChromeOptions() options.add_argument('--headless') driver=webdriver.Chrome(options=options) driver.set_window_size(1400,1000) return driver def process_one(): db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4') lst=[] table=db['save_result'] cursor=db.query('select term from selected_kw where client="清原"') for c in cursor: lst.append(c['term']) term=random.choice(lst) print(term) logger.debug('[clickbot_100]['+term+']') driver=restart_browser() escaped_search_term=urllib.parse.quote(term) googleurl = 'https://www.google.com/search?q={}&num={}&hl={}'.format(escaped_search_term, 100,'zh-TW') print(googleurl) driver.get(googleurl) time.sleep(6) elmts=driver.find_elements_by_xpath("//div[@class='yuRUbf']/a") clickelmt=None cnt=1 for elmt in elmts: try: href=elmt.get_attribute('href') if 'taroboba-yuan.com' in href: clickelmt=elmt logger.debug('[clickbot_100]['+term+']['+str(cnt)+']') print(href) print(elmt.text) table.insert({'title':elmt.text,'url':href,'keyword':term,'dt':datetime.datetime.now(),'num':cnt}) cnt+=1 except: print('href2 exception') traceback.print_exc() if clickelmt: webdriver.ActionChains(driver).move_to_element(clickelmt).perform() webdriver.ActionChains(driver).move_to_element(clickelmt).click().perform() driver.quit() process_one() parser = argparse.ArgumentParser() parser.add_argument('--loop') args = parser.parse_args() if args.loop: schedule.every(6).minutes.do(process_one) while True: schedule.run_pending() time.sleep(1)