general_clickbot_proxy.py.save 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157
  1. import time
  2. import json
  3. from selenium import webdriver
  4. from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
  5. import time
  6. import os
  7. import urllib.parse
  8. from selenium.webdriver.support.ui import WebDriverWait
  9. from selenium.webdriver.common.by import By
  10. from selenium.webdriver.chrome.service import Service
  11. from selenium.webdriver.common.keys import Keys
  12. from selenium.webdriver.support import expected_conditions as EC
  13. import codecs
  14. import random
  15. import requests
  16. import datetime
  17. import dataset
  18. import time
  19. import traceback
  20. import sys
  21. import fire
  22. import redis
  23. driver = None
  24. def rua():
  25. pool = [
  26. "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:73.0) Gecko/20100101 Firefox/73.0",
  27. "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:76.0) Gecko/20100101 Firefox/76.0",
  28. "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36"
  29. "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.101 Safari/537.36",
  30. "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36",
  31. "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36",
  32. "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36 OPR/68.0.3618.125",
  33. ]
  34. return random.choice(pool)
  35. def empty_query(q):
  36. global driver
  37. googleurl='https://www.google.com/search?q='+urllib.parse.quote(q)
  38. driver.get(googleurl)
  39. time.sleep(3)
  40. def process_query(qs):
  41. db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
  42. table=db['general_log']
  43. q=qs[0]
  44. domain=qs[1]
  45. global driver
  46. googleurl = 'https://www.google.com/?num=100'
  47. driver.get(googleurl)
  48. time.sleep(6)
  49. send_kw_elmt = driver.find_element(By.XPATH, '/html/body/div[1]/div[3]/form/div[1]/div[1]/div[1]/div/div[2]/input')
  50. send_kw_elmt.send_keys(q)
  51. time.sleep(3)
  52. send_kw_elmt.send_keys(Keys.ENTER)
  53. time.sleep(6)
  54. print(driver.current_url)
  55. elmts=driver.find_elements(By.XPATH,"//div[@class='yuRUbf']/a")
  56. idx=1
  57. ranking=-1
  58. print('網頁數量',len(elmts))
  59. # driver.save_screenshot('c:/tmp/test.png')
  60. if 'site' in q:
  61. href = elmts[0].get_attribute('href')
  62. txt = elmts[0].text
  63. print('clicked....')
  64. print(href)
  65. print(txt)
  66. print("ranking", idx)
  67. table.insert(
  68. {'kw': q, 'domain': domain, 'ranking': idx, 'title': txt, 'url': href, 'dt': datetime.datetime.now()})
  69. webdriver.ActionChains(driver).move_to_element(elmts[0]).perform()
  70. webdriver.ActionChains(driver).move_to_element(elmts[0]).click().perform()
  71. time.sleep(5)
  72. for elmt in elmts:
  73. href=elmt.get_attribute('href')
  74. txt=elmt.text
  75. if len(txt)>10:
  76. if domain in href:
  77. print('clicked....')
  78. print('點擊網址',href)
  79. print('標題',txt)
  80. print("ranking", idx)
  81. table.insert({'kw':q,'domain':domain,'ranking':idx,'title':txt,'url':href,'dt':datetime.datetime.now()})
  82. webdriver.ActionChains(driver).move_to_element(elmt).perform()
  83. webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
  84. time.sleep(5)
  85. break
  86. idx+=1
  87. db.close()
  88. def run_once(q):
  89. global driver
  90. result=[]
  91. s = Service('/root/driver/chromedriver')
  92. user_agent = rua()
  93. options = webdriver.ChromeOptions()
  94. options.add_argument('--headless')
  95. options.add_argument('--remote-debugging-port=9222')
  96. options.add_experimental_option("debuggerAddress", "127.0.0.1:9922")
  97. # options.add_argument("--user-agent=" +user_agent)
  98. options.add_argument("--incognito")
  99. r = redis.Redis(host='db.ptt.cx', port=6379, db=2,password='choozmo9')
  100. data=r.get('google_proxy')
  101. jstext=data.decode('utf-8')
  102. jsobj=json.loads(jstext)
  103. proxy=random.choice(jsobj)
  104. print('Freeproxy',proxy)
  105. change_ip = ["'--proxy-server='+proxy","--proxy-server=socks5://127.0.0.1:9050","--proxy-server=socks5://192.53.174.202:8180"]
  106. options.add_argument('--proxy-server=socks5://192.53.174.202:8180')
  107. driver = webdriver.Chrome(
  108. options=options,service=s)
  109. if 'sorry' in driver.current_url:
  110. r = redis.Redis(host='db.ptt.cx', port=6379, db=2,password='choozmo9')
  111. data=r.get('google_proxy')
  112. jstext=data.decode('utf-8')
  113. jsobj=json.loads(jstext)
  114. print('Free proxy',jsobj)
  115. proxy=random.choice(jsobj)
  116. change_ip_list = ['--proxy-server=%s' % proxy,"--proxy-server=socks5://127.0.0.1:9050","--proxy-server=socks5://192.53.174.202:8180"]
  117. change_ip = random.choice(change_ip_list)
  118. options.add_argument(change_ip)
  119. print('使用代理ip',change_ip)
  120. driver = webdriver.Chrome(options=options,service=s)
  121. driver.delete_all_cookies()
  122. driver.set_window_size(1400,1000)
  123. process_query(q)
  124. time.sleep(3)
  125. driver.quit()
  126. #for c in lst:
  127. #while True:
  128. # try:
  129. # c=random.choice(lst)
  130. # except:
  131. # traceback.print_exc()
  132. # sleepint=random.randint(320,520)
  133. # time.sleep(sleepint)
  134. class JParams(object):
  135. def get(self, kw,domain,port):
  136. run_once( (kw,domain,port) )
  137. if __name__ == '__main__':
  138. fire.Fire(JParams)