single_page_clickbot.py 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108
  1. from random import randint
  2. import sys
  3. import os
  4. import dataset
  5. from selenium import webdriver
  6. import traceback
  7. import datetime
  8. import codecs
  9. import time
  10. import urllib
  11. import argparse
  12. from selenium.webdriver.chrome.service import Service
  13. from selenium.webdriver.common.by import By
  14. from selenium.webdriver.common.keys import Keys
  15. import logging
  16. import sys
  17. from logging.handlers import SysLogHandler
  18. import socket
  19. import pandas as pd
  20. _LOG_SERVER = ('hhh.ptt.cx', 514)
  21. logger = logging.getLogger('clickbot_100')
  22. handler1 = SysLogHandler(address=_LOG_SERVER, socktype=socket.SOCK_DGRAM)
  23. logger.addHandler(handler1)
  24. logger.debug('[clickbot_100][火柴星人]begin')
  25. def restart_browser(pport):
  26. while True:
  27. try:
  28. os.system('docker container restart tiny4')
  29. time.sleep(1)
  30. break
  31. except:
  32. os.system('docker container restart tiny4')
  33. time.sleep(15)
  34. s = Service('/root/driver/chromedriver')
  35. options = webdriver.ChromeOptions()
  36. options.add_argument('--headless')
  37. options.add_argument('--remote-debugging-port=9222')
  38. options.add_experimental_option("debuggerAddress", f"127.0.0.1:{pport}")
  39. options.add_argument(
  40. "--user-agent=" + "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:76.0) Gecko/20100101 Firefox/76.0")
  41. options.add_argument("--incognito")
  42. driver = webdriver.Chrome(options=options, service=s)
  43. driver.delete_all_cookies()
  44. driver.set_window_size(950, 20000)
  45. return driver
  46. def process_one(pport):
  47. db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
  48. lst = ['裝潢預算','工程費','丈量費','裝修預算']
  49. table = db['general_log']
  50. main_url = 'https://hhh.com.tw/columns/detail/2094/'
  51. for term in lst:
  52. print(term)
  53. logger.debug('[clickbot_100][' + term + ']')
  54. driver = restart_browser(pport)
  55. googleurl = 'https://www.google.com/?num=40'
  56. driver.get(googleurl)
  57. time.sleep(6)
  58. send_kw_elmt = driver.find_element(By.XPATH,'/html/body/div[1]/div[3]/form/div[1]/div[1]/div[1]/div/div[2]/input')
  59. send_kw_elmt.send_keys(term)
  60. time.sleep(3)
  61. send_kw_elmt.send_keys(Keys.ENTER)
  62. time.sleep(6)
  63. elmts = driver.find_elements(By.XPATH, "//div[@class='yuRUbf']/a")
  64. idx = 1
  65. print(len(elmts))
  66. for elmt in elmts:
  67. href = elmt.get_attribute('href')
  68. txt = elmt.text
  69. # print(txt)
  70. if len(txt) > 10:
  71. if href == main_url:
  72. print("ranking", idx)
  73. table.insert({'kw': term, 'domain':'hhh.com.tw','ranking': idx, 'title': txt, 'url': href,'dt': datetime.datetime.now()})
  74. print('clicked....')
  75. webdriver.ActionChains(driver).move_to_element(elmt).perform()
  76. webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
  77. time.sleep(5)
  78. break
  79. idx += 1
  80. driver.quit()
  81. print('中場休息')
  82. time.sleep(randint(20,30))
  83. db.close()
  84. process_one('9927')
  85. # parser = argparse.ArgumentParser()
  86. # parser.add_argument('--loop', action="store_true")
  87. # args = parser.parse_args()
  88. # if args.loop:
  89. # schedule.every(0.4).minutes.do(process_one)
  90. # # print('今天開始')
  91. # # schedule.every().day.at('9:30').do(process_one)
  92. #
  93. # while True:
  94. # schedule.run_pending()
  95. # time.sleep(1)
  96. # >> C:\tmp\seo_line.txt 2>&1