| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115 | import randomimport sysimport datasetfrom selenium import webdriverimport tracebackimport datetimeimport codecsimport timeimport urllibimport argparseimport scheduleimport loggingimport sysfrom logging.handlers import SysLogHandlerimport socketimport pandas as pdimport socketimport os_LOG_SERVER = ('hhh.ptt.cx', 514)logger = logging.getLogger('clickbot_100')handler1 = SysLogHandler(address=_LOG_SERVER,socktype=socket.SOCK_DGRAM)logger.addHandler(handler1)#logger.debug('[clickbot_100][清原]begin')hname=socket.gethostname()pid=str(os.getpid())logger.fatal('[clickbot_100]['+hname+']['+pid+']begin')def restart_browser():    options = webdriver.ChromeOptions()    options.add_argument('--headless')     driver=webdriver.Chrome(options=options)    driver.set_window_size(950,6000)    return driverdef process_one():    db = dataset.connect('mysql://root:jondae350@localhost:3306/ig_tags?charset=utf8mb4')    lst=[]    table=db['google_rank']    cursor=db.query('select term from selected_kw and term not in (SELECT distinct(keyword) FROM ig_tags.save_result where url like "%beastparadise.net%" and datediff(now(),dt)=0)')    for c in cursor:        lst.append(c['term'])    term=random.choice(lst)    print(term)    logger.debug('[clickbot_100]['+term+']')    driver=restart_browser()    escaped_search_term=urllib.parse.quote(term)    googleurl = 'https://www.google.com/search?q={}&num={}&hl={}'.format(escaped_search_term, 100,'zh-TW')    print(googleurl)    driver.get(googleurl)    time.sleep(6)    fname=term.replace(' ','_')    # driver.save_screenshot('c:/tmp/seo/'+fname+'.png')    df=pd.DataFrame()    elmts=driver.find_elements_by_xpath("//div[@class='yuRUbf']/a")    clickelmt=None    cnt=1    datadict={'搜尋詞':[],'結果標題':[],'結果網址':[],'結果名次':[]}        for elmt in elmts:        try:            href=elmt.get_attribute('href')            if 'taroboba-yuan.com' in href:                clickelmt=elmt                logger.debug('[clickbot_100]['+term+']['+str(cnt)+']')            print(href)            print(elmt.text)            datadict['搜尋詞'].append(term)            datadict['結果標題'].append(elmt.text)            datadict['結果網址'].append(href)            datadict['結果名次'].append(str(cnt))            table.insert({'title':elmt.text,'url':href,'keyword':term,'dt':datetime.datetime.now(),'num':cnt})            cnt+=1        except:            print('href2 exception')            traceback.print_exc()    if clickelmt:        webdriver.ActionChains(driver).move_to_element(clickelmt).perform()        webdriver.ActionChains(driver).move_to_element(clickelmt).click().perform()    if len(datadict['結果標題'])<=0:        print('None')        driver.quit()        sys.exit()    df['搜尋詞']=datadict['搜尋詞']    df['結果標題']=datadict['結果標題']    df['結果網址']=datadict['結果網址']    df['結果名次']=datadict['結果名次']    df.to_excel('/Users/zooeytsai/'+fname+".xls")    driver.quit()process_one()parser = argparse.ArgumentParser()parser.add_argument('--loop')args = parser.parse_args()if args.loop:#    schedule.every(6).minutes.do(process_one)    schedule.every(0.4).minutes.do(process_one)    while True:        schedule.run_pending()        time.sleep(1)
 |