| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117 | 
							- import random
 
- import sys
 
- import dataset
 
- from selenium import webdriver
 
- import traceback
 
- import datetime
 
- import codecs
 
- import time
 
- import urllib
 
- import argparse
 
- import schedule
 
- import logging
 
- import sys
 
- from logging.handlers import SysLogHandler
 
- import socket
 
- import pandas as pd
 
- import socket
 
- import os
 
- _LOG_SERVER = ('hhh.ptt.cx', 514)
 
- logger = logging.getLogger('clickbot_100')
 
- handler1 = SysLogHandler(address=_LOG_SERVER,socktype=socket.SOCK_DGRAM)
 
- logger.addHandler(handler1)
 
- #logger.debug('[clickbot_100][清原]begin')
 
- hname=socket.gethostname()
 
- pid=str(os.getpid())
 
- logger.fatal('[clickbot_100]['+hname+']['+pid+']begin')
 
- def restart_browser():
 
-     options = webdriver.ChromeOptions()
 
-     options.add_argument('--headless') 
 
-     driver=webdriver.Chrome(options=options)
 
-     driver.set_window_size(950,6000)
 
-     return driver
 
- def process_one():
 
-     db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
 
-     lst=[]
 
-     table=db['google_rank']
 
-     cursor = db.query('select term from seo.selected_kw')
 
-     # cursor=db.query('select term from selected_kw and term not in (SELECT distinct(keyword) FROM ig_tags.save_result where url like "%beastparadise.net%" and datediff(now(),dt)=0)')
 
-     for c in cursor:
 
-         lst.append(c['term'])
 
-     term=random.choice(lst)
 
-     print(term)
 
-     logger.debug('[clickbot_100]['+term+']')
 
-     driver=restart_browser()
 
-     escaped_search_term=urllib.parse.quote(term)
 
-     googleurl = 'https://www.google.com/search?q={}&num={}&hl={}'.format(escaped_search_term, 100,'zh-TW')
 
-     print(googleurl)
 
-     driver.get(googleurl)
 
-     time.sleep(6)
 
-     fname=term.replace(' ','_')
 
-     # driver.save_screenshot('c:/tmp/seo/'+fname+'.png')
 
-     df=pd.DataFrame()
 
-     # driver.get_screenshot_as_file("/Users/zooeytsai/排名100.png")
 
-     elmts=driver.find_elements_by_xpath("//div[@class='yuRUbf']/a")
 
-     clickelmt=None
 
-     cnt=1
 
-     datadict={'搜尋詞':[],'結果標題':[],'結果網址':[],'結果名次':[]}
 
-     
 
-     for elmt in elmts:
 
-         try:
 
-             href=elmt.get_attribute('href')
 
-             if 'taroboba-yuan.com' in href:
 
-                 clickelmt=elmt
 
-                 logger.debug('[clickbot_100]['+term+']['+str(cnt)+']')
 
-             print(href)
 
-             print(elmt.text)
 
-             datadict['搜尋詞'].append(term)
 
-             datadict['結果標題'].append(elmt.text)
 
-             datadict['結果網址'].append(href)
 
-             datadict['結果名次'].append(str(cnt))
 
-             table.insert({'title':elmt.text,'url':href,'keyword':term,'dt':datetime.datetime.now(),'num':cnt})
 
-             cnt+=1
 
-         except:
 
-             print('href2 exception')
 
-             traceback.print_exc()
 
-     if clickelmt:
 
-         webdriver.ActionChains(driver).move_to_element(clickelmt).perform()
 
-         webdriver.ActionChains(driver).move_to_element(clickelmt).click().perform()
 
-     if len(datadict['結果標題'])<=0:
 
-         print('None')
 
-         driver.quit()
 
-         sys.exit()
 
-     df['搜尋詞']=datadict['搜尋詞']
 
-     df['結果標題']=datadict['結果標題']
 
-     df['結果網址']=datadict['結果網址']
 
-     df['結果名次']=datadict['結果名次']
 
- #    df.to_excel('/Users/zooeytsai/'+fname+".xls")
 
-     df.to_excel('c:/tmp/'+fname+".xls")
 
-     driver.quit()
 
- process_one()
 
- parser = argparse.ArgumentParser()
 
- parser.add_argument('--loop')
 
- args = parser.parse_args()
 
- if args.loop:
 
- #    schedule.every(6).minutes.do(process_one)
 
-     schedule.every(0.4).minutes.do(process_one)
 
-     while True:
 
-         schedule.run_pending()
 
-         time.sleep(1)
 
 
  |