|
@@ -0,0 +1,104 @@
|
|
|
+#import redis
|
|
|
+import time
|
|
|
+import traceback
|
|
|
+#import json
|
|
|
+from selenium import webdriver
|
|
|
+from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
|
|
|
+import time
|
|
|
+import os
|
|
|
+from selenium.webdriver.support.ui import WebDriverWait
|
|
|
+from selenium.webdriver.common.by import By
|
|
|
+from selenium.webdriver.support import expected_conditions as EC
|
|
|
+import dataset
|
|
|
+import json
|
|
|
+import random
|
|
|
+import time
|
|
|
+import sys
|
|
|
+import codecs
|
|
|
+import redis
|
|
|
+import random
|
|
|
+import os
|
|
|
+import time
|
|
|
+from userAgentRandomizer import userAgents
|
|
|
+
|
|
|
+driver=None
|
|
|
+
|
|
|
+def re_get_webdriver():
|
|
|
+ global driver
|
|
|
+ result=[]
|
|
|
+ if driver is not None:
|
|
|
+ print('closing....')
|
|
|
+ driver.quit()
|
|
|
+ os.system('killall chrome')
|
|
|
+ print('quit....')
|
|
|
+ driver=None
|
|
|
+ try:
|
|
|
+ ua = userAgents()
|
|
|
+
|
|
|
+ user_agent = ua.random()
|
|
|
+
|
|
|
+ options = webdriver.ChromeOptions()
|
|
|
+ options.add_argument("--no-sandbox")
|
|
|
+ options.add_argument("--disable-dev-shm-usage")
|
|
|
+ options.add_argument("--headless")
|
|
|
+
|
|
|
+ print(user_agent)
|
|
|
+ options.add_argument("--user-agent=" +user_agent)
|
|
|
+ options.add_argument("--incognito")
|
|
|
+ driver=None
|
|
|
+ try:
|
|
|
+ driver = webdriver.Chrome(options=options)
|
|
|
+ except:
|
|
|
+ traceback.print_exc()
|
|
|
+# driver.quit()
|
|
|
+# os.system('pkill -f ')
|
|
|
+ os.system('kill %d' % os.getpid())
|
|
|
+ sys.exit()
|
|
|
+ return
|
|
|
+ driver.set_window_size(1400,1000)
|
|
|
+ return
|
|
|
+ except:
|
|
|
+ import traceback
|
|
|
+ traceback.print_exc()
|
|
|
+ driver=None
|
|
|
+ return None
|
|
|
+
|
|
|
+def run_once(url):
|
|
|
+ global driver
|
|
|
+ i=random.randint(0,7)
|
|
|
+ if i==0 or driver is None:
|
|
|
+ time.sleep(8)
|
|
|
+ re_get_webdriver()
|
|
|
+ if driver is None:
|
|
|
+ return
|
|
|
+ try:
|
|
|
+ driver.execute_script('window.open("'+url+'","_blank");')
|
|
|
+ driver.execute_script("window.scrollTo(0, window.scrollY + 400)")
|
|
|
+ time.sleep(0.5)
|
|
|
+ except:
|
|
|
+ print('exception')
|
|
|
+
|
|
|
+lst=[]
|
|
|
+#lst.append('https://innews.com.tw/62183/')
|
|
|
+#lst.append('https://innews.com.tw/48338/')
|
|
|
+#lst.append('https://innews.com.tw/62326/')
|
|
|
+#lst.append('https://innews.com.tw/38246/')
|
|
|
+#lst.append('https://innews.com.tw/24843/')
|
|
|
+
|
|
|
+r = redis.Redis(host='db.ptt.cx', port=6379, db=1,password='choozmo9')
|
|
|
+data=r.get('innews_five')
|
|
|
+js=json.loads(data)
|
|
|
+for j in js:
|
|
|
+ lst.append(j)
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+#lst=['https://www.hhh.com.tw/columns/detail/3427/index.php']
|
|
|
+ #for i in range(20):
|
|
|
+#while True:
|
|
|
+for i in range(500):
|
|
|
+ l=random.choice(lst)
|
|
|
+ print(l)
|
|
|
+ run_once(l)
|
|
|
+
|