|
@@ -1,8 +1,24 @@
|
|
|
from selenium import webdriver
|
|
|
import time
|
|
|
import random
|
|
|
+import codecs
|
|
|
|
|
|
-lst=['13781','3649','2116']
|
|
|
+
|
|
|
+
|
|
|
+fpath=__file__
|
|
|
+fpath=fpath.replace('hhh_phantom.py','urls.csv')
|
|
|
+
|
|
|
+print(fpath)
|
|
|
+
|
|
|
+lst=[]
|
|
|
+fr=codecs.open(fpath,'r','utf-8')
|
|
|
+lines=fr.readlines()
|
|
|
+for l in lines:
|
|
|
+ elmts=l.split(',')
|
|
|
+ lst.append('https://www.hhh.com.tw'+elmts[0])
|
|
|
+fr.close()
|
|
|
+
|
|
|
+#lst=['13781','3649','2116']
|
|
|
for i in range(9999):
|
|
|
options = webdriver.ChromeOptions()
|
|
|
options.add_argument('--headless')
|
|
@@ -14,7 +30,8 @@ for i in range(9999):
|
|
|
desired_capabilities=options.to_capabilities())
|
|
|
driver.set_window_size(1400,1000)
|
|
|
l=random.choice(lst)
|
|
|
- driver.get("https://www.hhh.com.tw/cases/detail/"+l+"/index.php")
|
|
|
+# driver.get("https://www.hhh.com.tw/cases/detail/"+l+"/index.php")
|
|
|
+ driver.get(l)
|
|
|
print(driver.current_url)
|
|
|
driver.implicitly_wait (6)
|
|
|
time.sleep(6)
|