zooey 1 bulan lalu
melakukan
9dec4e75d7
1 mengubah file dengan 70 tambahan dan 0 penghapusan
  1. 70 0
      page_screen_long.py

+ 70 - 0
page_screen_long.py

@@ -0,0 +1,70 @@
+import time
+import pandas as pd
+from selenium import webdriver
+from selenium.webdriver.chrome.options import Options
+from selenium.webdriver.common.by import By
+from PIL import Image
+from io import BytesIO
+
+# 讀取 CSV
+df = pd.read_csv("")
+urls = df["url"].tolist()
+
+# 設定 Selenium 無痕模式
+chrome_options = Options()
+# chrome_options.add_argument("--headless")  # 無頭模式
+# chrome_options.add_argument("--incognito")  # 無痕模式
+chrome_options.add_argument("--disable-gpu")  # 禁用 GPU 加速
+chrome_options.add_argument("--window-size=1200,800")  # 視窗大小
+
+# 啟動 Selenium 瀏覽器
+driver = webdriver.Chrome(options=chrome_options)
+
+# 截圖 & 轉 PDF
+for index, url in enumerate(urls[3:5]):
+    try:
+        print(f"[{index+4}/{len(urls)}] 擷取網頁: {url}")
+        driver.get(url)
+        time.sleep(20)  # 等待網頁載入
+
+        # 取得完整網頁高度
+        total_height = driver.execute_script("return document.body.scrollHeight")
+        viewport_height = driver.execute_script("return window.innerHeight")
+
+        # 存儲所有截圖
+        stitched_images = []
+        scroll_position = 0
+
+        while scroll_position < total_height:
+            driver.execute_script(f"window.scrollTo(0, {scroll_position});")
+            time.sleep(0.5)  # 等待畫面刷新
+            screenshot = driver.get_screenshot_as_png()
+            stitched_images.append(Image.open(BytesIO(screenshot)))
+            scroll_position += viewport_height
+
+        # 拼接整張長截圖
+        total_width = stitched_images[0].size[0]
+        final_height = sum(img.size[1] for img in stitched_images)
+        final_image = Image.new("RGB", (total_width, final_height))
+
+        y_offset = 0
+        for img in stitched_images:
+            final_image.paste(img, (0, y_offset))
+            y_offset += img.size[1]
+
+        # 儲存為 PNG
+        image_filename = f"choozmo/screenshot_{index+4}.png"
+        final_image.save(image_filename)
+
+        # 轉成 PDF
+        # pdf_filename = f"sing_screenshot_{index+15}.pdf"
+        # final_image.convert("RGB").save(pdf_filename, "PDF", resolution=100.0)
+
+        print(f"✅ 已儲存: {image_filename}")
+
+    except Exception as e:
+        print(f"❌ 擷取失敗: {url},錯誤: {e}")
+
+# 關閉瀏覽器
+driver.quit()
+print("🎉 所有網頁已擷取完成!")