news_clickbot.py 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116
  1. from random import randint
  2. import sys
  3. import os
  4. import dataset
  5. from selenium import webdriver
  6. import traceback
  7. import datetime
  8. import codecs
  9. import time
  10. import urllib
  11. import argparse
  12. import schedule
  13. from selenium.webdriver.chrome.service import Service
  14. from selenium.webdriver.common.by import By
  15. from selenium.webdriver.common.keys import Keys
  16. import logging
  17. import sys
  18. from logging.handlers import SysLogHandler
  19. import socket
  20. import pandas as pd
  21. import pymysql
  22. pymysql.install_as_MySQLdb()
  23. _LOG_SERVER = ('hhh.ptt.cx', 514)
  24. logger = logging.getLogger('clickbot_100')
  25. handler1 = SysLogHandler(address=_LOG_SERVER, socktype=socket.SOCK_DGRAM)
  26. logger.addHandler(handler1)
  27. logger.debug('[clickbot_100][火柴星人]begin')
  28. path = 'C:\portable\chromedriver'
  29. path_z = '/Users/zooeytsai/Downloads/chromedriver 4'
  30. def restart_browser(pport):
  31. s = Service('/root/driver/chromedriver')
  32. options = webdriver.ChromeOptions()
  33. options.add_argument('--headless')
  34. options.add_argument('--remote-debugging-port=9222')
  35. options.add_experimental_option("debuggerAddress", f"127.0.0.1:{pport}")
  36. options.add_argument("--user-agent=" + "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:76.0) Gecko/20100101 Firefox/76.0")
  37. options.add_argument("--incognito")
  38. driver = webdriver.Chrome(options=options, service=s)
  39. driver.delete_all_cookies()
  40. driver.set_window_size(950, 20000)
  41. return driver
  42. def process_one(pport):
  43. db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
  44. lst = ['好睡王 引新聞','好睡王 Yahoo','好睡王 HiNet','好睡王 PCHOME','好睡王 蕃新聞','好睡王 新浪','好睡王 台北郵報','好睡王 LIFE','好睡王 match生活網','好睡王 POPDAILY','好睡王 LINE TODAY']
  45. table = db['news_log']
  46. for term in lst:
  47. print(term)
  48. logger.debug('[clickbot_100][' + term + ']')
  49. driver = restart_browser(pport)
  50. googleurl = 'https://www.google.com/?num=30'
  51. driver.get(googleurl)
  52. time.sleep(6)
  53. send_kw_elmt = driver.find_element(By.XPATH,'/html/body/div[1]/div[3]/form/div[1]/div[1]/div[1]/div/div[2]/input')
  54. send_kw_elmt.send_keys(term)
  55. time.sleep(3)
  56. send_kw_elmt.send_keys(Keys.ENTER)
  57. time.sleep(6)
  58. elmts = driver.find_elements(By.XPATH, "//div[@class='yuRUbf']/a")
  59. idx = 1
  60. print(len(elmts))
  61. for elmt in elmts:
  62. href = elmt.get_attribute('href')
  63. txt = elmt.text
  64. print(txt)
  65. if len(txt) > 10:
  66. if '炎炎夏日易輾轉難眠' in txt:
  67. print("ranking", idx)
  68. table.insert({'kw': term, 'ranking': idx, 'title': txt, 'url': href,
  69. 'dt': datetime.datetime.now()})
  70. print('clicked....')
  71. webdriver.ActionChains(driver).move_to_element(elmt).perform()
  72. webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
  73. time.sleep(5)
  74. break
  75. idx += 1
  76. driver.quit()
  77. print('中場休息')
  78. time.sleep(randint(90, 120))
  79. db.close()
  80. while True:
  81. try:
  82. os.system('docker container restart tiny4')
  83. time.sleep(1)
  84. break
  85. except:
  86. os.system('docker container restart tiny4')
  87. time.sleep(15)
  88. process_one('9925')
  89. # parser = argparse.ArgumentParser()
  90. # parser.add_argument('--loop', action="store_true")
  91. # args = parser.parse_args()
  92. # if args.loop:
  93. # schedule.every(0.4).minutes.do(process_one)
  94. # # print('今天開始')
  95. # # schedule.every().day.at('9:30').do(process_one)
  96. #
  97. # while True:
  98. # schedule.run_pending()
  99. # time.sleep(1)
  100. # >> C:\tmp\seo_line.txt 2>&1