hhh_phantom.py 1.4 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950
  1. from selenium import webdriver
  2. import time
  3. import random
  4. import codecs
  5. fpath=__file__
  6. fpath=fpath.replace('hhh_phantom.py','urls.csv')
  7. print(fpath)
  8. lst=[]
  9. fr=codecs.open(fpath,'r','utf-8')
  10. lines=fr.readlines()
  11. for l in lines:
  12. elmts=l.split(',')
  13. lst.append('https://www.hhh.com.tw'+elmts[0])
  14. fr.close()
  15. #lst=['https://www.hhh.com.tw/cases/detail/d/13051/index.php','https://www.hhh.com.tw/columns/detail/5878/index.php']
  16. #lst=['13781','3649','2116']
  17. #for i in range(9999):
  18. if True:
  19. options = webdriver.ChromeOptions()
  20. # options.add_argument('--headless')
  21. options.add_argument('--disable-gpu') # Last I checked this was necessary.
  22. # options.add_argument('--incognito')
  23. options.add_argument('--no-sandbox')
  24. options.add_argument('--disable-dev-shm-usage')
  25. options.add_argument('--user-data-dir=C:\\tmp\\user')
  26. driver = webdriver.Chrome(
  27. desired_capabilities=options.to_capabilities())
  28. driver.set_window_size(1400,1000)
  29. for i in range(10):
  30. l=random.choice(lst)
  31. # driver.get("https://www.hhh.com.tw/cases/detail/"+l+"/index.php")
  32. # driver.get(l)
  33. driver.execute_script('window.open("'+l+'","_blank");')
  34. print(driver.current_url)
  35. time.sleep(4)
  36. driver.execute_script("window.scrollTo(0, window.scrollY + 400)")
  37. time.sleep(2)
  38. # driver.implicitly_wait (2)
  39. time.sleep(5)
  40. driver.quit()