phantom_18888.py 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142
  1. #import redis
  2. import time
  3. import traceback
  4. #import json
  5. from selenium import webdriver
  6. from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
  7. import time
  8. import os
  9. import urllib.parse
  10. from selenium.webdriver.support.ui import WebDriverWait
  11. from selenium.webdriver.common.by import By
  12. from selenium.webdriver.support import expected_conditions as EC
  13. import codecs
  14. import random
  15. import requests
  16. import time
  17. import sys
  18. import docker
  19. import codecs
  20. import random
  21. import os
  22. import time
  23. driver=None
  24. headers = {
  25. "Authorization": "Bearer " + "t35vhZtWNgvDNWHc3DJh0OKll3mcB9GvC8K2EAkBug2",
  26. "Content-Type": "application/x-www-form-urlencoded"
  27. }
  28. def send_msg(kw):
  29. params = {"message": "處理關鍵字: "+kw}
  30. r = requests.post("https://notify-api.line.me/api/notify",headers=headers, params=params)
  31. def empty_query(q):
  32. global driver
  33. googleurl='https://www.google.com/search?q='+urllib.parse.quote(q)
  34. driver.get(googleurl)
  35. time.sleep(3)
  36. def process_query(qs):
  37. q=qs[0]
  38. domain=qs[2]
  39. cnt=qs[1]
  40. global driver
  41. googleurl='https://www.google.com/search?q='+urllib.parse.quote(q)
  42. driver.get(googleurl)
  43. time.sleep(3)
  44. if cnt > 0:
  45. for i in range(cnt):
  46. elmt=driver.find_element_by_xpath("//a[@id='pnnext']")
  47. webdriver.ActionChains(driver).move_to_element(elmt).perform()
  48. webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
  49. time.sleep(3)
  50. elmts=driver.find_elements_by_xpath("//div[@class='g']//div[@class='yuRUbf']//a")
  51. idx=1
  52. ranking=-1
  53. for elmt in elmts:
  54. href=elmt.get_attribute('href')
  55. txt=elmt.text
  56. if len(txt)>10:
  57. # if 'hhh.com.tw' in href:
  58. # if 'hhh.com.tw' in href:
  59. # if 'ai.choozmo.com' in href:
  60. if domain in href:
  61. # if 'searchome.net' in href:
  62. webdriver.ActionChains(driver).move_to_element(elmt).perform()
  63. webdriver.ActionChains(driver).move_to_element(elmt).click().perform()
  64. break
  65. def re_get_webdriver():
  66. global driver
  67. result=[]
  68. # client = docker.from_env()
  69. # ls=client.containers.list()
  70. # print(ls)
  71. # for l in ls:
  72. # if 'p17777' in l.name:
  73. # ls[0].restart()
  74. # time.sleep(4)
  75. if driver is not None:
  76. print('closing....')
  77. driver.quit()
  78. # options = webdriver.EdgeOptions()
  79. try:
  80. os.system('docker run -it corbinu/alpine-w3m '+url)
  81. return
  82. except:
  83. import traceback
  84. traceback.print_exc()
  85. driver=None
  86. return None
  87. driver=None
  88. def run_once(url):
  89. try:
  90. os.system('docker run -it corbinu/alpine-w3m '+url)
  91. except:
  92. print('exception')
  93. fpath=__file__
  94. elmts=fpath.split('\\')
  95. fpath=fpath.replace(elmts[-1],'urls.csv')
  96. os.system('docker restart p17777')
  97. print(fpath)
  98. lst=[]
  99. fr=codecs.open(fpath,'r','utf-8')
  100. lines=fr.readlines()
  101. for l in lines:
  102. elmts=l.split(',')
  103. lst.append('https://www.hhh.com.tw'+elmts[0])
  104. fr.close()
  105. #lst=['https://www.hhh.com.tw/columns/detail/3427/index.php']
  106. #for i in range(20):
  107. while True:
  108. l=random.choice(lst)
  109. print(l)
  110. # driver.get(l)
  111. run_once(l)
  112. # time.sleep(2)