|
@@ -10,7 +10,8 @@ import urllib.parse
|
|
from selenium.webdriver.support.ui import WebDriverWait
|
|
from selenium.webdriver.support.ui import WebDriverWait
|
|
from selenium.webdriver.common.by import By
|
|
from selenium.webdriver.common.by import By
|
|
from selenium.webdriver.support import expected_conditions as EC
|
|
from selenium.webdriver.support import expected_conditions as EC
|
|
-
|
|
|
|
|
|
+from userAgentRandomizer import userAgents
|
|
|
|
+from selenium.webdriver.chrome.options import Options
|
|
import codecs
|
|
import codecs
|
|
import random
|
|
import random
|
|
import requests
|
|
import requests
|
|
@@ -88,17 +89,31 @@ def re_get_webdriver():
|
|
# if 'p17777' in l.name:
|
|
# if 'p17777' in l.name:
|
|
# ls[0].restart()
|
|
# ls[0].restart()
|
|
|
|
|
|
-# time.sleep(4)
|
|
|
|
|
|
+ time.sleep(4)
|
|
if driver is not None:
|
|
if driver is not None:
|
|
print('closing....')
|
|
print('closing....')
|
|
# driver.quit()
|
|
# driver.quit()
|
|
os.system('docker container restart p'+portnum)
|
|
os.system('docker container restart p'+portnum)
|
|
- time.sleep(11)
|
|
|
|
|
|
+ time.sleep(10)
|
|
|
|
|
|
# options = webdriver.EdgeOptions()
|
|
# options = webdriver.EdgeOptions()
|
|
try:
|
|
try:
|
|
|
|
+ ua = userAgents()
|
|
|
|
+
|
|
|
|
+ user_agent = ua.random()
|
|
|
|
+# options = webdriver.ChromeOptions()
|
|
|
|
+ options = Options()
|
|
|
|
+ # options.add_argument("--headless") # 視窗背後執行
|
|
|
|
+ print(user_agent)
|
|
|
|
+ options.add_argument("--user-agent=" +user_agent)
|
|
|
|
+ options.add_argument("--incognito")
|
|
|
|
+
|
|
driver = webdriver.Remote(
|
|
driver = webdriver.Remote(
|
|
- command_executor='http://127.0.0.1:'+str(portnum)+'/wd/hub')
|
|
|
|
|
|
+ command_executor='http://127.0.0.1:'+str(portnum)+'/wd/hub',
|
|
|
|
+ options=options
|
|
|
|
+# desired_capabilities=options.to_capabilities(),
|
|
|
|
+ )
|
|
|
|
+ driver.delete_all_cookies()
|
|
driver.set_window_size(1400,1000)
|
|
driver.set_window_size(1400,1000)
|
|
return
|
|
return
|
|
except:
|
|
except:
|
|
@@ -110,7 +125,7 @@ def re_get_webdriver():
|
|
|
|
|
|
def run_once(url):
|
|
def run_once(url):
|
|
global driver
|
|
global driver
|
|
- i=random.randint(0,25)
|
|
|
|
|
|
+ i=random.randint(0,5)
|
|
if i==0 or driver is None:
|
|
if i==0 or driver is None:
|
|
# if True:
|
|
# if True:
|
|
re_get_webdriver()
|
|
re_get_webdriver()
|
|
@@ -145,7 +160,7 @@ db = dataset.connect('mysql://choozmo:pAssw0rd@db.ptt.cx:3306/seo?charset=utf8mb
|
|
|
|
|
|
cursor=db.query('SELECT * FROM columnids order by rand()')
|
|
cursor=db.query('SELECT * FROM columnids order by rand()')
|
|
for c in cursor:
|
|
for c in cursor:
|
|
- lst.append('https://www.hhh.com.tw/columns/detail/'+str(c['cid'])+'/index.php')
|
|
|
|
|
|
+ lst.append('https://www.hhh.com.tw/columns/detail/'+str(c['cid'])+'/')
|
|
|
|
|
|
|
|
|
|
#lst=['https://www.hhh.com.tw/columns/detail/3427/index.php']
|
|
#lst=['https://www.hhh.com.tw/columns/detail/3427/index.php']
|