news_clickbot.py 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114
  1. from random import randint
  2. import sys
  3. import os
  4. import dataset
  5. from selenium import webdriver
  6. import traceback
  7. import datetime
  8. import codecs
  9. import time
  10. import urllib
  11. import argparse
  12. from selenium.webdriver.chrome.service import Service
  13. from selenium.webdriver.common.by import By
  14. from selenium.webdriver.common.keys import Keys
  15. import logging
  16. import sys
  17. from logging.handlers import SysLogHandler
  18. import socket
  19. import pandas as pd
  20. _LOG_SERVER = ('hhh.ptt.cx', 514)
  21. logger = logging.getLogger('clickbot_100')
  22. handler1 = SysLogHandler(address=_LOG_SERVER, socktype=socket.SOCK_DGRAM)
  23. logger.addHandler(handler1)
  24. logger.debug('[clickbot_100][火柴星人]begin')
  25. path = 'C:\portable\chromedriver'
  26. path_z = '/Users/zooeytsai/Downloads/chromedriver 4'
  27. def restart_browser(pport):
  28. while True:
  29. try:
  30. os.system('docker container restart tiny4')
  31. time.sleep(1)
  32. break
  33. except:
  34. os.system('docker container restart tiny4')
  35. time.sleep(15)
  36. s = Service('/root/driver/chromedriver')
  37. options = webdriver.ChromeOptions()
  38. options.add_argument('--headless')
  39. options.add_argument('--remote-debugging-port=9222')
  40. options.add_experimental_option("debuggerAddress", f"127.0.0.1:{pport}")
  41. options.add_argument("--user-agent=" + "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:76.0) Gecko/20100101 Firefox/76.0")
  42. options.add_argument("--incognito")
  43. driver = webdriver.Chrome(options=options, service=s)
  44. driver.delete_all_cookies()
  45. driver.set_window_size(950, 20000)
  46. return driver
  47. def process_one(pport):
  48. db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
  49. lst = []
  50. table = db['news_log']
  51. cursor = db.query("select * from seo.news_kw")
  52. for c in cursor:
  53. lst.append([c['term']])
  54. for term in lst:
  55. print(term)
  56. logger.debug('[clickbot_100][' + term + ']')
  57. driver = restart_browser(pport)
  58. googleurl = 'https://www.google.com/?num=30'
  59. driver.get(googleurl)
  60. time.sleep(6)
  61. send_kw_elmt = driver.find_element(By.XPATH,'/html/body/div[1]/div[3]/form/div[1]/div[1]/div[1]/div/div[2]/input')
  62. send_kw_elmt.send_keys(term)
  63. time.sleep(3)
  64. send_kw_elmt.send_keys(Keys.ENTER)
  65. time.sleep(6)
  66. elmts = driver.find_elements(By.XPATH, "//div[@class='yuRUbf']/a")
  67. idx = 1
  68. print(len(elmts))
  69. for elmt in elmts:
  70. href = elmt.get_attribute('href')
  71. txt = elmt.text
  72. print(txt)
  73. if len(txt) > 10:
  74. if '炎炎夏日易輾轉難眠' in txt:
  75. print("ranking", idx)
  76. table.insert({'kw': term, 'ranking': idx, 'title': txt, 'url': href,
  77. 'dt': datetime.datetime.now()})
  78. print('clicked....')
  79. webdriver.ActionChains(driver).move_to_element(elmt).perform()
  80. webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
  81. time.sleep(5)
  82. break
  83. idx += 1
  84. driver.quit()
  85. print('中場休息')
  86. time.sleep(randint(90, 120))
  87. db.close()
  88. process_one('9925')
  89. # parser = argparse.ArgumentParser()
  90. # parser.add_argument('--loop', action="store_true")
  91. # args = parser.parse_args()
  92. # if args.loop:
  93. # schedule.every(0.4).minutes.do(process_one)
  94. # # print('今天開始')
  95. # # schedule.every().day.at('9:30').do(process_one)
  96. #
  97. # while True:
  98. # schedule.run_pending()
  99. # time.sleep(1)
  100. # >> C:\tmp\seo_line.txt 2>&1