test_wd.py 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162
  1. import time
  2. import json
  3. from selenium import webdriver
  4. from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
  5. import time
  6. import os
  7. import urllib.parse
  8. from selenium.webdriver.support.ui import WebDriverWait
  9. from selenium.webdriver.common.by import By
  10. from selenium.webdriver.chrome.service import Service
  11. from selenium.webdriver.support import expected_conditions as EC
  12. import codecs
  13. import random
  14. import requests
  15. import datetime
  16. import dataset
  17. import time
  18. import traceback
  19. import sys
  20. import fire
  21. import random
  22. db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
  23. table=db['general_log']
  24. driver = None
  25. headers = {
  26. "Authorization": "Bearer " + "6SDULL1Ebklduc6TFxa97AFto5Sj21kyJ30CxiLiSoi",
  27. "Content-Type": "application/x-www-form-urlencoded"
  28. }
  29. def rua():
  30. pool = [
  31. "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:73.0) Gecko/20100101 Firefox/73.0",
  32. "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:76.0) Gecko/20100101 Firefox/76.0",
  33. "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36"
  34. "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.101 Safari/537.36",
  35. "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36",
  36. "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36",
  37. "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36 OPR/68.0.3618.125",
  38. ]
  39. return random.choice(pool)
  40. def empty_query(q):
  41. global driver
  42. googleurl='https://www.google.com/search?q='+urllib.parse.quote(q)
  43. driver.get(googleurl)
  44. time.sleep(3)
  45. def process_query(qs):
  46. q=qs[0]
  47. domain=qs[1]
  48. global driver
  49. googleurl = 'https://www.google.com/search?q={}&num={}&hl={}'.format(urllib.parse.quote(q), 100,'zh-TW')
  50. print(googleurl)
  51. # stealth(driver,
  52. # languages=["en-US", "en"],
  53. # vendor="Google Inc.",
  54. # platform="Win32",
  55. # webgl_vendor="Intel Inc.",
  56. # renderer="Intel Iris OpenGL Engine",
  57. # fix_hairline=True,
  58. # )
  59. driver.get(googleurl)
  60. time.sleep(6)
  61. while True:
  62. try:
  63. elmts = driver.find_elements(By.XPATH, "//div[@class='yuRUbf']/a")
  64. print('尋找')
  65. break
  66. except:
  67. pass
  68. idx=1
  69. ranking=-1
  70. lenelmts=len(elmts)
  71. print('搜尋結果數量',lenelmts)
  72. if lenelmts <=0:
  73. os.system('python3 tplink_reboot.py')
  74. time.sleep(200)
  75. os.system('netsh wlan connect TP-Link_78E0')
  76. # os.system('docker container restart p4444')
  77. return
  78. # if len(elmts) <=0:
  79. # send_msg('network failed...')
  80. # driver.save_screenshot('c:/tmp/test.png')
  81. for elmt in elmts:
  82. href=elmt.get_attribute('href')
  83. txt=elmt.text
  84. if len(txt)>10:
  85. if domain in href:
  86. print('clicked....')
  87. print(href)
  88. print(txt)
  89. print("ranking", idx)
  90. table.insert({'kw':q,'domain':domain,'ranking':idx,'title':txt,'url':href,'dt':datetime.datetime.now(),'num':1})
  91. webdriver.ActionChains(driver).move_to_element(elmt).perform()
  92. webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
  93. break
  94. idx+=1
  95. def run_once(q):
  96. global driver
  97. result=[]
  98. os.system('docker container restart p4444')
  99. time.sleep(12)
  100. user_agent = rua()
  101. options = webdriver.ChromeOptions()
  102. options.add_argument('--headless')
  103. options.add_argument("--no-sandbox")
  104. options.add_argument("--disable-dev-shm-usage")
  105. options.add_argument('--disable-blink-features=AutomationControlled')
  106. # options.add_argument("--user-agent=" +user_agent)
  107. # options.add_argument("--incognito")
  108. driver = webdriver.Remote(
  109. command_executor='http://127.0.0.1:4444/wd/hub',
  110. options=options
  111. )
  112. driver.delete_all_cookies()
  113. driver.set_window_size(1400,1000)
  114. print('到此')
  115. process_query(q)
  116. time.sleep(3)
  117. driver.quit()
  118. # sys.exit()
  119. cursor = db.query('SELECT query FROM seo.hhh_gsc_imp where position >=2.5 and position <=8.5 order by rand() limit 500;')
  120. lst=[]
  121. for c in cursor:
  122. lst.append(c['query'])
  123. while True:
  124. kw=random.choice(lst)
  125. print(kw)
  126. run_once((kw, 'hhh.com.tw', 4444))
  127. #class JParams(object):
  128. #
  129. # def get(self, kw, domain, port):
  130. # print(kw)
  131. # print(domain)
  132. # run_once((kw, domain, port))
  133. #if __name__ == '__main__':
  134. # fire.Fire(JParams)