general_clickbot_local.py 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117
  1. import time
  2. import json
  3. from selenium import webdriver
  4. from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
  5. import time
  6. import os
  7. import urllib.parse
  8. from selenium.webdriver.support.ui import WebDriverWait
  9. from selenium.webdriver.common.by import By
  10. from selenium.webdriver.chrome.service import Service
  11. from selenium.webdriver.support import expected_conditions as EC
  12. import codecs
  13. import random
  14. import datetime
  15. import dataset
  16. import pymysql
  17. pymysql.install_as_MySQLdb()
  18. import time
  19. import traceback
  20. import sys
  21. import fire
  22. driver = None
  23. def empty_query(q):
  24. global driver
  25. googleurl = 'https://www.google.com/search?q=' + urllib.parse.quote(q)
  26. driver.get(googleurl)
  27. time.sleep(3)
  28. def process_query(qs):
  29. db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
  30. table = db['general_log']
  31. q = qs[0]
  32. domain = qs[1]
  33. client='引新聞'
  34. global driver
  35. googleurl = 'https://www.google.com/search?q={}&num={}&hl={}'.format(urllib.parse.quote(q), 100, 'zh-TW')
  36. print(googleurl)
  37. driver.get(googleurl)
  38. time.sleep(6)
  39. elmts = driver.find_elements(By.XPATH, "//div[@class='yuRUbf']/a")
  40. idx = 1
  41. ranking = -1
  42. print(len(elmts))
  43. # driver.save_screenshot('c:/tmp/test.png')
  44. for elmt in elmts:
  45. href = elmt.get_attribute('href')
  46. txt = elmt.text
  47. # print(txt)
  48. if domain in href:
  49. print(href)
  50. print(txt)
  51. print("ranking", idx)
  52. table.insert({'kw':q,'client':client,'ranking':idx,'title':txt,'url':href,'dt':datetime.datetime.now(),'domain':domain})
  53. db.close()
  54. print('clicked....')
  55. webdriver.ActionChains(driver).move_to_element(elmt).perform()
  56. webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
  57. time.sleep(5)
  58. break
  59. idx += 1
  60. def run_once(q):
  61. global driver
  62. result = []
  63. user_agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36'
  64. s = Service('C:\/Users\/s1301\/Downloads\/chromedriver_110\/chromedriver')
  65. options = webdriver.ChromeOptions()
  66. options.add_argument('--headless')
  67. # options.add_argument('--remote-debugging-port=9222')
  68. # options.add_experimental_option("debuggerAddress", "192.168.192.45:9922")
  69. options.add_argument("--user-agent=" +user_agent)
  70. options.add_argument("--incognito")
  71. # options.add_argument('--proxy-server=socks5://172.104.93.163:41800')
  72. driver = webdriver.Chrome(
  73. options=options, service=s)
  74. str1 = driver.capabilities['browserVersion']
  75. print('版本', str1)
  76. driver.delete_all_cookies()
  77. driver.set_window_size(1400, 1000)
  78. print(q)
  79. process_query(q)
  80. time.sleep(3)
  81. driver.quit()
  82. # for c in lst:ㄕ
  83. # while True:
  84. # try:
  85. # c=random.choice(lst)
  86. # except:
  87. # traceback.print_exc()
  88. # sleepint=random.randint(320,520)
  89. # time.sleep(sleepint)
  90. class JParams(object):
  91. def get(self, kw, domain):
  92. print(kw)
  93. print(domain)
  94. run_once((kw, domain))
  95. if __name__ == '__main__':
  96. fire.Fire(JParams)