hhh_selenium_cases.py 3.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128
  1. #import redis
  2. import time
  3. #import json
  4. from selenium import webdriver
  5. from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
  6. import time
  7. import os
  8. import urllib.parse
  9. from selenium.webdriver.support.ui import WebDriverWait
  10. from selenium.webdriver.common.by import By
  11. from selenium.webdriver.support import expected_conditions as EC
  12. import codecs
  13. import random
  14. import requests
  15. import time
  16. import sys
  17. import docker
  18. import codecs
  19. import random
  20. import os
  21. import time
  22. driver=None
  23. headers = {
  24. "Authorization": "Bearer " + "t35vhZtWNgvDNWHc3DJh0OKll3mcB9GvC8K2EAkBug2",
  25. "Content-Type": "application/x-www-form-urlencoded"
  26. }
  27. def send_msg(kw):
  28. params = {"message": "處理關鍵字: "+kw}
  29. r = requests.post("https://notify-api.line.me/api/notify",headers=headers, params=params)
  30. def empty_query(q):
  31. global driver
  32. googleurl='https://www.google.com/search?q='+urllib.parse.quote(q)
  33. driver.get(googleurl)
  34. time.sleep(3)
  35. def process_query(qs):
  36. q=qs[0]
  37. domain=qs[2]
  38. cnt=qs[1]
  39. global driver
  40. googleurl='https://www.google.com/search?q='+urllib.parse.quote(q)
  41. driver.get(googleurl)
  42. time.sleep(3)
  43. if cnt > 0:
  44. for i in range(cnt):
  45. elmt=driver.find_element_by_xpath("//a[@id='pnnext']")
  46. webdriver.ActionChains(driver).move_to_element(elmt).perform()
  47. webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
  48. time.sleep(3)
  49. elmts=driver.find_elements_by_xpath("//div[@class='g']//div[@class='yuRUbf']//a")
  50. idx=1
  51. ranking=-1
  52. for elmt in elmts:
  53. href=elmt.get_attribute('href')
  54. txt=elmt.text
  55. if len(txt)>10:
  56. # if 'hhh.com.tw' in href:
  57. # if 'hhh.com.tw' in href:
  58. # if 'ai.choozmo.com' in href:
  59. if domain in href:
  60. # if 'searchome.net' in href:
  61. webdriver.ActionChains(driver).move_to_element(elmt).perform()
  62. webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
  63. break
  64. def re_get_webdriver():
  65. global driver
  66. result=[]
  67. client = docker.from_env()
  68. ls=client.containers.list()
  69. print(ls)
  70. ls[0].restart()
  71. time.sleep(8)
  72. options = webdriver.ChromeOptions()
  73. driver = webdriver.Remote(
  74. command_executor='http://127.0.0.1:4444/wd/hub',
  75. desired_capabilities=options.to_capabilities())
  76. driver.set_window_size(1400,1000)
  77. def run_once(url):
  78. global driver
  79. i=random.randint(0,13)
  80. # if i==0 or driver is None:
  81. if True:
  82. re_get_webdriver()
  83. driver.get(url)
  84. time.sleep(2)
  85. fpath=__file__
  86. # fpath=fpath.replace('hhh_click_selenium.py','cases.csv')
  87. fpath=fpath.replace('hhh_selenium_cases.py','cases.csv')
  88. print(fpath)
  89. lst=[]
  90. fr=codecs.open(fpath,'r','utf-8')
  91. lines=fr.readlines()
  92. for l in lines:
  93. elmts=l.split(',')
  94. lst.append('https://www.hhh.com.tw'+elmts[0])
  95. fr.close()
  96. #for i in range(20):
  97. while True:
  98. l=random.choice(lst)
  99. print(l)
  100. # driver.get(l)
  101. run_once(l)
  102. # time.sleep(2)