general_clickbot_hhh.py 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142
  1. import time
  2. from selenium import webdriver
  3. from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
  4. import time
  5. import os
  6. import urllib.parse
  7. from selenium.webdriver.support.ui import WebDriverWait
  8. from selenium.webdriver.common.by import By
  9. from selenium.webdriver.chrome.service import Service
  10. from selenium.webdriver.support import expected_conditions as EC
  11. import codecs
  12. import random
  13. import datetime
  14. import dataset
  15. import time
  16. import traceback
  17. import sys
  18. import random
  19. import socket
  20. import requests
  21. db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
  22. table=db['general_log']
  23. driver = None
  24. headers = {
  25. "Authorization": "Bearer " + "6SDULL1Ebklduc6TFxa97AFto5Sj21kyJ30CxiLiSoi",
  26. "Content-Type": "application/x-www-form-urlencoded"
  27. }
  28. def rua():
  29. pool = [
  30. "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:73.0) Gecko/20100101 Firefox/73.0",
  31. "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:76.0) Gecko/20100101 Firefox/76.0",
  32. "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36"
  33. "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.101 Safari/537.36",
  34. "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36",
  35. "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36",
  36. "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36 OPR/68.0.3618.125",
  37. ]
  38. return random.choice(pool)
  39. def send_msg(kw):
  40. hname=socket.gethostname()
  41. params = {"message": hname+": "+kw}
  42. r = requests.post("https://notify-api.line.me/api/notify",headers=headers, params=params)
  43. def empty_query(q):
  44. global driver
  45. googleurl='https://www.google.com/search?q='+urllib.parse.quote(q)
  46. driver.get(googleurl)
  47. time.sleep(3)
  48. def process_query(qs):
  49. q=qs[0]
  50. domain=qs[1]
  51. global driver
  52. googleurl = 'https://www.google.com/search?q={}&num={}&hl={}'.format(urllib.parse.quote(q), 100,'zh-TW')
  53. print(googleurl)
  54. driver.get(googleurl)
  55. time.sleep(6)
  56. while True:
  57. try:
  58. elmts = driver.find_elements(By.XPATH, "//div[@class='yuRUbf']/a")
  59. print('尋找')
  60. break
  61. except:
  62. pass
  63. idx=1
  64. ranking=-1
  65. print('搜尋結果數量',len(elmts))
  66. if len(elmts) <=0:
  67. send_msg('network failed...')
  68. # driver.save_screenshot('c:/tmp/test.png')
  69. for elmt in elmts:
  70. href=elmt.get_attribute('href')
  71. txt=elmt.text
  72. if len(txt)>10:
  73. if domain in href:
  74. print('clicked....')
  75. print(href)
  76. print(txt)
  77. print("ranking", idx)
  78. table.insert({'kw':q,'domain':domain,'ranking':idx,'title':txt,'url':href,'dt':datetime.datetime.now(),'num':1})
  79. webdriver.ActionChains(driver).move_to_element(elmt).perform()
  80. webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
  81. break
  82. idx+=1
  83. def run_once(q):
  84. global driver
  85. result=[]
  86. # s = Service('/root/driver/chromedriver')
  87. # s = Service('/Users/zooeytsai/Downloads/chromedriver 2')
  88. user_agent = rua()
  89. options = webdriver.ChromeOptions()
  90. options.add_argument('--headless')
  91. options.add_argument("--no-sandbox")
  92. options.add_argument("--disable-dev-shm-usage")
  93. ### options.add_argument("--proxy-server=socks5://192.168.192.156:1080")
  94. # options.add_argument("--proxy-server=socks5://192.168.192.119:1080")
  95. # options.add_argument("start-maximized")
  96. # options.add_argument('--remote-debugging-port='+str(q[2]))
  97. # options.add_argument('--remote-debugging-port=9222')
  98. # options.add_argument("--user-agent=" +user_agent)
  99. options.add_argument("--incognito")
  100. # driver = webdriver.Chrome(options=options,service=s)
  101. print('before init')
  102. driver = webdriver.Chrome(options=options)
  103. print('after init')
  104. driver.delete_all_cookies()
  105. # driver.set_window_size(1400,1000)
  106. print('到此')
  107. process_query(q)
  108. time.sleep(3)
  109. driver.quit()
  110. sys.exit()
  111. cursor = db.query('SELECT query FROM seo.hhh_gsc_imp where position >=2.5 and position <=8.5 order by rand() limit 1')
  112. query=None
  113. for c in cursor:
  114. query=c['query']
  115. print(query)
  116. break
  117. run_once((query,'hhh.com.tw'))