import random import sys import dataset from selenium import webdriver import traceback import datetime import codecs import time import urllib import argparse import schedule import logging import sys from logging.handlers import SysLogHandler import socket import pandas as pd _LOG_SERVER = ('hhh.ptt.cx', 514) logger = logging.getLogger('clickbot_100') handler1 = SysLogHandler(address=_LOG_SERVER,socktype=socket.SOCK_DGRAM) logger.addHandler(handler1) logger.debug('[click_and_save][DB]begin') def restart_browser(): options = webdriver.ChromeOptions() options.add_argument('--headless') driver=webdriver.Chrome(options=options) driver.set_window_size(950,6000) return driver def process_one(): db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4') lst=[] table=db['save_result_listclick'] cursor=db.query('select kw,page,domain from seo_clickjobs where category="202204" order by rand()') for c in cursor: lst.append(c) entry=random.choice(lst) term=entry['kw'] print(term) domain=entry['domain'] logger.debug('[clickbot_100]['+term+']') driver=restart_browser() escaped_search_term=urllib.parse.quote(term) googleurl = 'https://www.google.com/search?q={}&num={}&hl={}'.format(escaped_search_term, 100,'zh-TW') print(googleurl) driver.get(googleurl) time.sleep(6) fname=term.replace(' ','_') df=pd.DataFrame() elmts=driver.find_elements_by_xpath("//div[@class='yuRUbf']/a") clickelmt=None cnt=1 datadict={'搜尋詞':[],'結果標題':[],'結果網址':[],'結果名次':[]} for elmt in elmts: try: href=elmt.get_attribute('href') if domain in href: clickelmt=elmt logger.debug('[clickbot_100]['+term+']['+str(cnt)+']') print(href) print(elmt.text) datadict['搜尋詞'].append(term) datadict['結果標題'].append(elmt.text) datadict['結果網址'].append(href) datadict['結果名次'].append(str(cnt)) table.insert({'title':elmt.text,'url':href,'keyword':term,'dt':datetime.datetime.now(),'num':cnt}) cnt+=1 except: print('href2 exception') traceback.print_exc() if clickelmt: webdriver.ActionChains(driver).move_to_element(clickelmt).perform() webdriver.ActionChains(driver).move_to_element(clickelmt).click().perform() if len(datadict['結果標題'])<=0: print('None') driver.quit() sys.exit() df['搜尋詞']=datadict['搜尋詞'] df['結果標題']=datadict['結果標題'] df['結果網址']=datadict['結果網址'] df['結果名次']=datadict['結果名次'] driver.quit() process_one() parser = argparse.ArgumentParser() parser.add_argument('--loop') args = parser.parse_args() if args.loop: # schedule.every(6).minutes.do(process_one) schedule.every(0.4).minutes.do(process_one) while True: schedule.run_pending() time.sleep(1)