|
@@ -0,0 +1,70 @@
|
|
|
+import time
|
|
|
+import pandas as pd
|
|
|
+from selenium import webdriver
|
|
|
+from selenium.webdriver.chrome.options import Options
|
|
|
+from selenium.webdriver.common.by import By
|
|
|
+from PIL import Image
|
|
|
+from io import BytesIO
|
|
|
+
|
|
|
+# 讀取 CSV
|
|
|
+df = pd.read_csv("")
|
|
|
+urls = df["url"].tolist()
|
|
|
+
|
|
|
+# 設定 Selenium 無痕模式
|
|
|
+chrome_options = Options()
|
|
|
+# chrome_options.add_argument("--headless") # 無頭模式
|
|
|
+# chrome_options.add_argument("--incognito") # 無痕模式
|
|
|
+chrome_options.add_argument("--disable-gpu") # 禁用 GPU 加速
|
|
|
+chrome_options.add_argument("--window-size=1200,800") # 視窗大小
|
|
|
+
|
|
|
+# 啟動 Selenium 瀏覽器
|
|
|
+driver = webdriver.Chrome(options=chrome_options)
|
|
|
+
|
|
|
+# 截圖 & 轉 PDF
|
|
|
+for index, url in enumerate(urls[3:5]):
|
|
|
+ try:
|
|
|
+ print(f"[{index+4}/{len(urls)}] 擷取網頁: {url}")
|
|
|
+ driver.get(url)
|
|
|
+ time.sleep(20) # 等待網頁載入
|
|
|
+
|
|
|
+ # 取得完整網頁高度
|
|
|
+ total_height = driver.execute_script("return document.body.scrollHeight")
|
|
|
+ viewport_height = driver.execute_script("return window.innerHeight")
|
|
|
+
|
|
|
+ # 存儲所有截圖
|
|
|
+ stitched_images = []
|
|
|
+ scroll_position = 0
|
|
|
+
|
|
|
+ while scroll_position < total_height:
|
|
|
+ driver.execute_script(f"window.scrollTo(0, {scroll_position});")
|
|
|
+ time.sleep(0.5) # 等待畫面刷新
|
|
|
+ screenshot = driver.get_screenshot_as_png()
|
|
|
+ stitched_images.append(Image.open(BytesIO(screenshot)))
|
|
|
+ scroll_position += viewport_height
|
|
|
+
|
|
|
+ # 拼接整張長截圖
|
|
|
+ total_width = stitched_images[0].size[0]
|
|
|
+ final_height = sum(img.size[1] for img in stitched_images)
|
|
|
+ final_image = Image.new("RGB", (total_width, final_height))
|
|
|
+
|
|
|
+ y_offset = 0
|
|
|
+ for img in stitched_images:
|
|
|
+ final_image.paste(img, (0, y_offset))
|
|
|
+ y_offset += img.size[1]
|
|
|
+
|
|
|
+ # 儲存為 PNG
|
|
|
+ image_filename = f"choozmo/screenshot_{index+4}.png"
|
|
|
+ final_image.save(image_filename)
|
|
|
+
|
|
|
+ # 轉成 PDF
|
|
|
+ # pdf_filename = f"sing_screenshot_{index+15}.pdf"
|
|
|
+ # final_image.convert("RGB").save(pdf_filename, "PDF", resolution=100.0)
|
|
|
+
|
|
|
+ print(f"✅ 已儲存: {image_filename}")
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ print(f"❌ 擷取失敗: {url},錯誤: {e}")
|
|
|
+
|
|
|
+# 關閉瀏覽器
|
|
|
+driver.quit()
|
|
|
+print("🎉 所有網頁已擷取完成!")
|