general_remote_click.py 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121
  1. import time
  2. import json
  3. from selenium import webdriver
  4. from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
  5. import time
  6. import os
  7. import urllib.parse
  8. from selenium.webdriver.support.ui import WebDriverWait
  9. from selenium.webdriver.common.by import By
  10. from selenium.webdriver.support import expected_conditions as EC
  11. import codecs
  12. import random
  13. import requests
  14. import datetime
  15. import dataset
  16. import time
  17. import traceback
  18. import sys
  19. import fire
  20. #from INNNews import notifytest
  21. db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb4')
  22. table=db['general_log']
  23. driver=None
  24. headers = {
  25. "Authorization": "Bearer " + "6SDULL1Ebklduc6TFxa97AFto5Sj21kyJ30CxiLiSoi",
  26. "Content-Type": "application/x-www-form-urlencoded"
  27. }
  28. def send_msg(kw):
  29. params = {"message": "error: "+kw}
  30. r = requests.post("https://notify-api.line.me/api/notify",headers=headers, params=params)
  31. def empty_query(q):
  32. global driver
  33. googleurl='https://www.google.com/search?q='+urllib.parse.quote(q)
  34. driver.get(googleurl)
  35. time.sleep(3)
  36. def process_query(qs):
  37. q=qs[0]
  38. domain=qs[1]
  39. global driver
  40. googleurl = 'https://www.google.com/search?q={}&num={}&hl={}'.format(urllib.parse.quote(q), 100,'zh-TW')
  41. print(googleurl)
  42. driver.get(googleurl)
  43. time.sleep(6)
  44. elmts=driver.find_elements(By.XPATH,"//div[@class='yuRUbf']/a")
  45. idx=1
  46. ranking=-1
  47. print(len(elmts))
  48. # driver.save_screenshot('c:/tmp/test.png')
  49. for elmt in elmts:
  50. href=elmt.get_attribute('href')
  51. txt=elmt.text
  52. if len(txt)>10:
  53. if domain in href:
  54. print('clicked....')
  55. print(href)
  56. print(txt)
  57. print("ranking", idx)
  58. table.insert({'kw':q,'domain':domain,'ranking':idx,'title':txt,'url':href,'dt':datetime.datetime.now()})
  59. webdriver.ActionChains(driver).move_to_element(elmt).perform()
  60. webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
  61. break
  62. idx+=1
  63. def run_once(q):
  64. global driver
  65. result=[]
  66. options = webdriver.ChromeOptions()
  67. options.add_argument('--headless')
  68. options.add_argument('--disable-dev-shm-usage')
  69. options.add_argument('--no-sandbox')
  70. # options.add_argument('--remote-debugging-port=9222')
  71. epath=os.environ['WEBDRIVER']
  72. # options.add_experimental_option("debuggerAddress", "192.168.192.45:9922")
  73. options.add_experimental_option("debuggerAddress", q[2])
  74. # options.add_argument("--user-agent=" +user_agent)
  75. options.add_argument("--incognito")
  76. # driver = webdriver.Chrome(executable_path=r'C:\portable\webdriver\chrome98\chromedriver.exe',options=options)
  77. driver = webdriver.Chrome(executable_path=epath,options=options)
  78. driver.delete_all_cookies()
  79. driver.set_window_size(1400,1000)
  80. print(q)
  81. process_query(q)
  82. time.sleep(3)
  83. driver.quit()
  84. class JParams(object):
  85. def get(self, kw,domain,address):
  86. try:
  87. print(kw)
  88. print(domain)
  89. run_once( (kw,domain,address) )
  90. except:
  91. traceback.print_exc()
  92. send_msg('SEO docker exception... stop')
  93. if __name__ == '__main__':
  94. fire.Fire(JParams)