general_clickbot_hhh.py 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123
  1. import time
  2. from selenium import webdriver
  3. from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
  4. import time
  5. import os
  6. import urllib.parse
  7. from selenium.webdriver.support.ui import WebDriverWait
  8. from selenium.webdriver.common.by import By
  9. from selenium.webdriver.chrome.service import Service
  10. from selenium.webdriver.support import expected_conditions as EC
  11. import codecs
  12. import random
  13. import datetime
  14. import dataset
  15. import time
  16. import traceback
  17. import sys
  18. import random
  19. db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
  20. table=db['general_log']
  21. driver = None
  22. def rua():
  23. pool = [
  24. "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:73.0) Gecko/20100101 Firefox/73.0",
  25. "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:76.0) Gecko/20100101 Firefox/76.0",
  26. "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36"
  27. "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.101 Safari/537.36",
  28. "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36",
  29. "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36",
  30. "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36 OPR/68.0.3618.125",
  31. ]
  32. return random.choice(pool)
  33. def empty_query(q):
  34. global driver
  35. googleurl='https://www.google.com/search?q='+urllib.parse.quote(q)
  36. driver.get(googleurl)
  37. time.sleep(3)
  38. def process_query(qs):
  39. q=qs[0]
  40. domain=qs[1]
  41. global driver
  42. googleurl = 'https://www.google.com/search?q={}&num={}&hl={}'.format(urllib.parse.quote(q), 100,'zh-TW')
  43. print(googleurl)
  44. driver.get(googleurl)
  45. time.sleep(6)
  46. while True:
  47. try:
  48. elmts = driver.find_elements(By.XPATH, "//div[@class='yuRUbf']/a")
  49. print('尋找')
  50. break
  51. except:
  52. pass
  53. idx=1
  54. ranking=-1
  55. print('搜尋結果數量',len(elmts))
  56. # driver.save_screenshot('c:/tmp/test.png')
  57. for elmt in elmts:
  58. href=elmt.get_attribute('href')
  59. txt=elmt.text
  60. if len(txt)>10:
  61. if domain in href:
  62. print('clicked....')
  63. print(href)
  64. print(txt)
  65. print("ranking", idx)
  66. table.insert({'kw':q,'domain':domain,'ranking':idx,'title':txt,'url':href,'dt':datetime.datetime.now(),'num':1})
  67. webdriver.ActionChains(driver).move_to_element(elmt).perform()
  68. webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
  69. break
  70. idx+=1
  71. def run_once(q):
  72. global driver
  73. result=[]
  74. # s = Service('/root/driver/chromedriver')
  75. # s = Service('/Users/zooeytsai/Downloads/chromedriver 2')
  76. user_agent = rua()
  77. options = webdriver.ChromeOptions()
  78. options.add_argument('--headless')
  79. options.add_argument("--no-sandbox")
  80. options.add_argument("--disable-dev-shm-usage")
  81. # options.add_argument("start-maximized")
  82. # options.add_argument('--remote-debugging-port='+str(q[2]))
  83. # options.add_argument('--remote-debugging-port=9222')
  84. # options.add_argument("--user-agent=" +user_agent)
  85. options.add_argument("--incognito")
  86. # driver = webdriver.Chrome(options=options,service=s)
  87. print('before init')
  88. driver = webdriver.Chrome(options=options)
  89. print('after init')
  90. driver.delete_all_cookies()
  91. # driver.set_window_size(1400,1000)
  92. print('到此')
  93. process_query(q)
  94. time.sleep(3)
  95. driver.quit()
  96. sys.exit()
  97. cursor = db.query('SELECT query FROM seo.hhh_gsc_imp where position >=2.5 and position <=8.5 order by rand() limit 1')
  98. query=None
  99. for c in cursor:
  100. query=c['query']
  101. print(query)
  102. break
  103. run_once((query,'hhh.com.tw'))