|  | @@ -0,0 +1,108 @@
 | 
	
		
			
				|  |  | +from random import randint
 | 
	
		
			
				|  |  | +import sys
 | 
	
		
			
				|  |  | +import os
 | 
	
		
			
				|  |  | +import dataset
 | 
	
		
			
				|  |  | +from selenium import webdriver
 | 
	
		
			
				|  |  | +import traceback
 | 
	
		
			
				|  |  | +import datetime
 | 
	
		
			
				|  |  | +import codecs
 | 
	
		
			
				|  |  | +import time
 | 
	
		
			
				|  |  | +import urllib
 | 
	
		
			
				|  |  | +import argparse
 | 
	
		
			
				|  |  | +from selenium.webdriver.chrome.service import Service
 | 
	
		
			
				|  |  | +from selenium.webdriver.common.by import By
 | 
	
		
			
				|  |  | +from selenium.webdriver.common.keys import Keys
 | 
	
		
			
				|  |  | +import logging
 | 
	
		
			
				|  |  | +import sys
 | 
	
		
			
				|  |  | +from logging.handlers import SysLogHandler
 | 
	
		
			
				|  |  | +import socket
 | 
	
		
			
				|  |  | +import pandas as pd
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +_LOG_SERVER = ('hhh.ptt.cx', 514)
 | 
	
		
			
				|  |  | +logger = logging.getLogger('clickbot_100')
 | 
	
		
			
				|  |  | +handler1 = SysLogHandler(address=_LOG_SERVER, socktype=socket.SOCK_DGRAM)
 | 
	
		
			
				|  |  | +logger.addHandler(handler1)
 | 
	
		
			
				|  |  | +logger.debug('[clickbot_100][火柴星人]begin')
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +def restart_browser(pport):
 | 
	
		
			
				|  |  | +    while True:
 | 
	
		
			
				|  |  | +        try:
 | 
	
		
			
				|  |  | +            os.system('docker container restart tiny4')
 | 
	
		
			
				|  |  | +            time.sleep(1)
 | 
	
		
			
				|  |  | +            break
 | 
	
		
			
				|  |  | +        except:
 | 
	
		
			
				|  |  | +            os.system('docker container restart tiny4')
 | 
	
		
			
				|  |  | +            time.sleep(15)
 | 
	
		
			
				|  |  | +    s = Service('/root/driver/chromedriver')
 | 
	
		
			
				|  |  | +    options = webdriver.ChromeOptions()
 | 
	
		
			
				|  |  | +    options.add_argument('--headless')
 | 
	
		
			
				|  |  | +    options.add_argument('--remote-debugging-port=9222')
 | 
	
		
			
				|  |  | +    options.add_experimental_option("debuggerAddress", f"127.0.0.1:{pport}")
 | 
	
		
			
				|  |  | +    options.add_argument(
 | 
	
		
			
				|  |  | +        "--user-agent=" + "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:76.0) Gecko/20100101 Firefox/76.0")
 | 
	
		
			
				|  |  | +    options.add_argument("--incognito")
 | 
	
		
			
				|  |  | +    driver = webdriver.Chrome(options=options, service=s)
 | 
	
		
			
				|  |  | +    driver.delete_all_cookies()
 | 
	
		
			
				|  |  | +    driver.set_window_size(950, 20000)
 | 
	
		
			
				|  |  | +    return driver
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +def process_one(pport):
 | 
	
		
			
				|  |  | +    db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
 | 
	
		
			
				|  |  | +    lst = ['裝潢預算','工程費','丈量費','裝修預算']
 | 
	
		
			
				|  |  | +    table = db['general_log']
 | 
	
		
			
				|  |  | +    main_url = 'https://hhh.com.tw/columns/detail/2094/'
 | 
	
		
			
				|  |  | +    for term in lst:
 | 
	
		
			
				|  |  | +        print(term)
 | 
	
		
			
				|  |  | +        logger.debug('[clickbot_100][' + term + ']')
 | 
	
		
			
				|  |  | +        driver = restart_browser(pport)
 | 
	
		
			
				|  |  | +        googleurl = 'https://www.google.com/?num=40'
 | 
	
		
			
				|  |  | +        driver.get(googleurl)
 | 
	
		
			
				|  |  | +        time.sleep(6)
 | 
	
		
			
				|  |  | +        send_kw_elmt = driver.find_element(By.XPATH,'/html/body/div[1]/div[3]/form/div[1]/div[1]/div[1]/div/div[2]/input')
 | 
	
		
			
				|  |  | +        send_kw_elmt.send_keys(term)
 | 
	
		
			
				|  |  | +        time.sleep(3)
 | 
	
		
			
				|  |  | +        send_kw_elmt.send_keys(Keys.ENTER)
 | 
	
		
			
				|  |  | +        time.sleep(6)
 | 
	
		
			
				|  |  | +        elmts = driver.find_elements(By.XPATH, "//div[@class='yuRUbf']/a")
 | 
	
		
			
				|  |  | +        idx = 1
 | 
	
		
			
				|  |  | +        print(len(elmts))
 | 
	
		
			
				|  |  | +        for elmt in elmts:
 | 
	
		
			
				|  |  | +            href = elmt.get_attribute('href')
 | 
	
		
			
				|  |  | +            txt = elmt.text
 | 
	
		
			
				|  |  | +            # print(txt)
 | 
	
		
			
				|  |  | +            if len(txt) > 10:
 | 
	
		
			
				|  |  | +                if href == main_url:
 | 
	
		
			
				|  |  | +                    print("ranking", idx)
 | 
	
		
			
				|  |  | +                    table.insert({'kw': term, 'domain':'hhh.com.tw','ranking': idx, 'title': txt, 'url': href,'dt': datetime.datetime.now()})
 | 
	
		
			
				|  |  | +                    print('clicked....')
 | 
	
		
			
				|  |  | +                    webdriver.ActionChains(driver).move_to_element(elmt).perform()
 | 
	
		
			
				|  |  | +                    webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
 | 
	
		
			
				|  |  | +                    time.sleep(5)
 | 
	
		
			
				|  |  | +                    break
 | 
	
		
			
				|  |  | +            idx += 1
 | 
	
		
			
				|  |  | +        
 | 
	
		
			
				|  |  | +        driver.quit()
 | 
	
		
			
				|  |  | +        print('中場休息')
 | 
	
		
			
				|  |  | +        time.sleep(randint(20,30))
 | 
	
		
			
				|  |  | +    db.close()
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +process_one('9927')
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +# parser = argparse.ArgumentParser()
 | 
	
		
			
				|  |  | +# parser.add_argument('--loop', action="store_true")
 | 
	
		
			
				|  |  | +# args = parser.parse_args()
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +# if args.loop:
 | 
	
		
			
				|  |  | +#     schedule.every(0.4).minutes.do(process_one)
 | 
	
		
			
				|  |  | +#     # print('今天開始')
 | 
	
		
			
				|  |  | +#     # schedule.every().day.at('9:30').do(process_one)
 | 
	
		
			
				|  |  | +#
 | 
	
		
			
				|  |  | +#     while True:
 | 
	
		
			
				|  |  | +#         schedule.run_pending()
 | 
	
		
			
				|  |  | +#         time.sleep(1)
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +# >> C:\tmp\seo_line.txt 2>&1
 |