redis_1777.py 2.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105
  1. #import redis
  2. import time
  3. import traceback
  4. #import json
  5. from selenium import webdriver
  6. from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
  7. import time
  8. import os
  9. from selenium.webdriver.support.ui import WebDriverWait
  10. from selenium.webdriver.common.by import By
  11. from selenium.webdriver.support import expected_conditions as EC
  12. import dataset
  13. import json
  14. import random
  15. import time
  16. import sys
  17. import codecs
  18. import redis
  19. import random
  20. import os
  21. import time
  22. from userAgentRandomizer import userAgents
  23. driver=None
  24. def re_get_webdriver():
  25. global driver
  26. result=[]
  27. if driver is not None:
  28. print('closing....')
  29. driver.quit()
  30. os.system('killall chrome')
  31. print('quit....')
  32. driver=None
  33. try:
  34. ua = userAgents()
  35. user_agent = ua.random()
  36. options = webdriver.ChromeOptions()
  37. options.add_argument("--no-sandbox")
  38. options.add_argument("--disable-dev-shm-usage")
  39. options.add_argument("--headless")
  40. print(user_agent)
  41. options.add_argument("--user-agent=" +user_agent)
  42. options.add_argument("--incognito")
  43. driver=None
  44. try:
  45. driver = webdriver.Chrome(options=options)
  46. except:
  47. traceback.print_exc()
  48. # driver.quit()
  49. # os.system('pkill -f ')
  50. os.system('kill %d' % os.getpid())
  51. sys.exit()
  52. return
  53. driver.set_window_size(1400,1000)
  54. return
  55. except:
  56. import traceback
  57. traceback.print_exc()
  58. driver=None
  59. return None
  60. def run_once(url):
  61. global driver
  62. i=random.randint(0,7)
  63. if i==0 or driver is None:
  64. time.sleep(8)
  65. re_get_webdriver()
  66. if driver is None:
  67. return
  68. try:
  69. driver.execute_script('window.open("'+url+'","_blank");')
  70. driver.execute_script("window.scrollTo(0, window.scrollY + 400)")
  71. time.sleep(0.5)
  72. except:
  73. traceback.print_exc()
  74. print('exception')
  75. lst=[]
  76. #lst.append('https://innews.com.tw/62183/')
  77. #lst.append('https://innews.com.tw/48338/')
  78. #lst.append('https://innews.com.tw/62326/')
  79. #lst.append('https://innews.com.tw/38246/')
  80. #lst.append('https://innews.com.tw/24843/')
  81. r = redis.Redis(host='db.ptt.cx', port=6379, db=1,password='choozmo9')
  82. data=r.get('innews_five')
  83. js=json.loads(data)
  84. for j in js:
  85. lst.append(j)
  86. #lst=['https://www.hhh.com.tw/columns/detail/3427/index.php']
  87. #for i in range(20):
  88. #while True:
  89. for i in range(500):
  90. l=random.choice(lst)
  91. print(l)
  92. run_once(l)