page_screen_long.py 2.3 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970
  1. import time
  2. import pandas as pd
  3. from selenium import webdriver
  4. from selenium.webdriver.chrome.options import Options
  5. from selenium.webdriver.common.by import By
  6. from PIL import Image
  7. from io import BytesIO
  8. # 讀取 CSV
  9. df = pd.read_csv("")
  10. urls = df["url"].tolist()
  11. # 設定 Selenium 無痕模式
  12. chrome_options = Options()
  13. # chrome_options.add_argument("--headless") # 無頭模式
  14. # chrome_options.add_argument("--incognito") # 無痕模式
  15. chrome_options.add_argument("--disable-gpu") # 禁用 GPU 加速
  16. chrome_options.add_argument("--window-size=1200,800") # 視窗大小
  17. # 啟動 Selenium 瀏覽器
  18. driver = webdriver.Chrome(options=chrome_options)
  19. # 截圖 & 轉 PDF
  20. for index, url in enumerate(urls[3:5]):
  21. try:
  22. print(f"[{index+4}/{len(urls)}] 擷取網頁: {url}")
  23. driver.get(url)
  24. time.sleep(20) # 等待網頁載入
  25. # 取得完整網頁高度
  26. total_height = driver.execute_script("return document.body.scrollHeight")
  27. viewport_height = driver.execute_script("return window.innerHeight")
  28. # 存儲所有截圖
  29. stitched_images = []
  30. scroll_position = 0
  31. while scroll_position < total_height:
  32. driver.execute_script(f"window.scrollTo(0, {scroll_position});")
  33. time.sleep(0.5) # 等待畫面刷新
  34. screenshot = driver.get_screenshot_as_png()
  35. stitched_images.append(Image.open(BytesIO(screenshot)))
  36. scroll_position += viewport_height
  37. # 拼接整張長截圖
  38. total_width = stitched_images[0].size[0]
  39. final_height = sum(img.size[1] for img in stitched_images)
  40. final_image = Image.new("RGB", (total_width, final_height))
  41. y_offset = 0
  42. for img in stitched_images:
  43. final_image.paste(img, (0, y_offset))
  44. y_offset += img.size[1]
  45. # 儲存為 PNG
  46. image_filename = f"choozmo/screenshot_{index+4}.png"
  47. final_image.save(image_filename)
  48. # 轉成 PDF
  49. # pdf_filename = f"sing_screenshot_{index+15}.pdf"
  50. # final_image.convert("RGB").save(pdf_filename, "PDF", resolution=100.0)
  51. print(f"✅ 已儲存: {image_filename}")
  52. except Exception as e:
  53. print(f"❌ 擷取失敗: {url},錯誤: {e}")
  54. # 關閉瀏覽器
  55. driver.quit()
  56. print("🎉 所有網頁已擷取完成!")