local_general_clickbot.py 3.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131
  1. import time
  2. import json
  3. from selenium import webdriver
  4. from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
  5. import time
  6. import os
  7. import urllib.parse
  8. from selenium.webdriver.support.ui import WebDriverWait
  9. from selenium.webdriver.common.by import By
  10. from selenium.webdriver.chrome.service import Service
  11. from selenium.webdriver.support import expected_conditions as EC
  12. import codecs
  13. import random
  14. import requests
  15. import datetime
  16. import dataset
  17. import time
  18. import traceback
  19. import sys
  20. import fire
  21. db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
  22. table=db['general_log']
  23. driver = None
  24. def empty_query(q):
  25. global driver
  26. googleurl='https://www.google.com/search?q='+urllib.parse.quote(q)
  27. driver.get(googleurl)
  28. time.sleep(3)
  29. def process_query(qs):
  30. q=qs[0]
  31. domain=qs[1]
  32. global driver
  33. googleurl = 'https://www.google.com/search?q={}&num={}&hl={}'.format(urllib.parse.quote(q), 100,'zh-TW')
  34. print(googleurl)
  35. driver.get(googleurl)
  36. time.sleep(6)
  37. # driver.save_screenshot('c:/tmp/test.png')
  38. elmts=driver.find_elements(By.XPATH,"//div[@class='yuRUbf']/a")
  39. idx=1
  40. ranking=-1
  41. print(len(elmts))
  42. # if len(elmts) <=0:
  43. # driver.save_screenshot('c:/tmp/test.png')
  44. clicked=False
  45. for elmt in elmts:
  46. href=elmt.get_attribute('href')
  47. txt=elmt.text
  48. if len(txt)>10:
  49. if domain in href:
  50. print('clicked....')
  51. print(href)
  52. print(txt)
  53. print("ranking", idx)
  54. # table.insert({'kw':q,'domain':domain,'ranking':idx,'title':txt,'url':href,'dt':datetime.datetime.now()})
  55. clicked=True
  56. webdriver.ActionChains(driver).move_to_element(elmt).perform()
  57. webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
  58. time.sleep(5)
  59. break
  60. idx+=1
  61. if not clicked:
  62. print('quit')
  63. driver.quit()
  64. os.exit(-1)
  65. def run_once(q):
  66. global driver
  67. result=[]
  68. options = webdriver.ChromeOptions()
  69. options.add_argument('--headless')
  70. # options.add_argument('--remote-debugging-port=9922')
  71. options.add_argument('--remote-debugging-port='+str(q[2]))
  72. # options.add_experimental_option("debuggerAddress", "127.00:9922")
  73. # options.add_argument("--user-agent=" +user_agent)
  74. options.add_argument("--incognito")
  75. # driver = webdriver.Chrome(executable_path=r'C:\portable\webdriver\chrome98\chromedriver.exe',options=options)
  76. if os.name=='nt':
  77. driver = webdriver.Chrome(executable_path=r'C:\portable\webdriver\chrome98\chromedriver.exe',options=options)
  78. else:
  79. driver = webdriver.Chrome(executable_path='/opt/webdriver/98/chromedriver',options=options)
  80. driver.delete_all_cookies()
  81. driver.set_window_size(1400,1000)
  82. print('到此')
  83. process_query(q)
  84. time.sleep(3)
  85. driver.quit()
  86. #for c in lst:
  87. #while True:
  88. # try:
  89. # c=random.choice(lst)
  90. # except:
  91. # traceback.print_exc()
  92. # sleepint=random.randint(320,520)
  93. # time.sleep(sleepint)
  94. class JParams(object):
  95. def get(self, kw,domain,port):
  96. print(kw)
  97. print(domain)
  98. try:
  99. run_once( (kw,domain,port) )
  100. except:
  101. print('exception, restarting.....')
  102. return -1
  103. # os.system('docker container restart tiny1')
  104. # time.sleep(10)
  105. if __name__ == '__main__':
  106. val=fire.Fire(JParams)