123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114 |
- from random import randint
- import sys
- import os
- import dataset
- from selenium import webdriver
- import traceback
- import datetime
- import codecs
- import time
- import urllib
- import argparse
- from selenium.webdriver.chrome.service import Service
- from selenium.webdriver.common.by import By
- from selenium.webdriver.common.keys import Keys
- import logging
- import sys
- from logging.handlers import SysLogHandler
- import socket
- import pandas as pd
- _LOG_SERVER = ('hhh.ptt.cx', 514)
- logger = logging.getLogger('clickbot_100')
- handler1 = SysLogHandler(address=_LOG_SERVER, socktype=socket.SOCK_DGRAM)
- logger.addHandler(handler1)
- logger.debug('[clickbot_100][火柴星人]begin')
- path = 'C:\portable\chromedriver'
- path_z = '/Users/zooeytsai/Downloads/chromedriver 4'
- def restart_browser(pport):
- while True:
- try:
- os.system('docker container restart tiny4')
- time.sleep(1)
- break
- except:
- os.system('docker container restart tiny4')
- time.sleep(15)
- s = Service('/root/driver/chromedriver')
- options = webdriver.ChromeOptions()
- options.add_argument('--headless')
- options.add_argument('--remote-debugging-port=9222')
- options.add_experimental_option("debuggerAddress", f"127.0.0.1:{pport}")
- options.add_argument("--user-agent=" + "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:76.0) Gecko/20100101 Firefox/76.0")
- options.add_argument("--incognito")
- driver = webdriver.Chrome(options=options, service=s)
- driver.delete_all_cookies()
- driver.set_window_size(950, 20000)
- return driver
- def process_one(pport):
- db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
- lst = []
- table = db['news_log']
- cursor = db.query("select * from seo.news_kw")
- for c in cursor:
- lst.append([c['term']])
- for term in lst:
- print(term)
- logger.debug('[clickbot_100][' + term + ']')
- driver = restart_browser(pport)
- googleurl = 'https://www.google.com/?num=30'
- driver.get(googleurl)
- time.sleep(6)
- send_kw_elmt = driver.find_element(By.XPATH,'/html/body/div[1]/div[3]/form/div[1]/div[1]/div[1]/div/div[2]/input')
- send_kw_elmt.send_keys(term)
- time.sleep(3)
- send_kw_elmt.send_keys(Keys.ENTER)
- time.sleep(6)
- elmts = driver.find_elements(By.XPATH, "//div[@class='yuRUbf']/a")
- idx = 1
- print(len(elmts))
- for elmt in elmts:
- href = elmt.get_attribute('href')
- txt = elmt.text
- print(txt)
- if len(txt) > 10:
- if '炎炎夏日易輾轉難眠' in txt:
- print("ranking", idx)
- table.insert({'kw': term, 'ranking': idx, 'title': txt, 'url': href,
- 'dt': datetime.datetime.now()})
- print('clicked....')
- webdriver.ActionChains(driver).move_to_element(elmt).perform()
- webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
- time.sleep(5)
- break
- idx += 1
-
-
- driver.quit()
- print('中場休息')
- time.sleep(randint(90, 120))
- db.close()
- process_one('9925')
- # parser = argparse.ArgumentParser()
- # parser.add_argument('--loop', action="store_true")
- # args = parser.parse_args()
- # if args.loop:
- # schedule.every(0.4).minutes.do(process_one)
- # # print('今天開始')
- # # schedule.every().day.at('9:30').do(process_one)
- #
- # while True:
- # schedule.run_pending()
- # time.sleep(1)
- # >> C:\tmp\seo_line.txt 2>&1
|