import time import pandas as pd from selenium import webdriver from selenium.webdriver.chrome.options import Options from selenium.webdriver.common.by import By from PIL import Image from io import BytesIO # 讀取 CSV df = pd.read_csv("") urls = df["url"].tolist() # 設定 Selenium 無痕模式 chrome_options = Options() # chrome_options.add_argument("--headless") # 無頭模式 # chrome_options.add_argument("--incognito") # 無痕模式 chrome_options.add_argument("--disable-gpu") # 禁用 GPU 加速 chrome_options.add_argument("--window-size=1200,800") # 視窗大小 # 啟動 Selenium 瀏覽器 driver = webdriver.Chrome(options=chrome_options) # 截圖 & 轉 PDF for index, url in enumerate(urls[3:5]): try: print(f"[{index+4}/{len(urls)}] 擷取網頁: {url}") driver.get(url) time.sleep(20) # 等待網頁載入 # 取得完整網頁高度 total_height = driver.execute_script("return document.body.scrollHeight") viewport_height = driver.execute_script("return window.innerHeight") # 存儲所有截圖 stitched_images = [] scroll_position = 0 while scroll_position < total_height: driver.execute_script(f"window.scrollTo(0, {scroll_position});") time.sleep(0.5) # 等待畫面刷新 screenshot = driver.get_screenshot_as_png() stitched_images.append(Image.open(BytesIO(screenshot))) scroll_position += viewport_height # 拼接整張長截圖 total_width = stitched_images[0].size[0] final_height = sum(img.size[1] for img in stitched_images) final_image = Image.new("RGB", (total_width, final_height)) y_offset = 0 for img in stitched_images: final_image.paste(img, (0, y_offset)) y_offset += img.size[1] # 儲存為 PNG image_filename = f"choozmo/screenshot_{index+4}.png" final_image.save(image_filename) # 轉成 PDF # pdf_filename = f"sing_screenshot_{index+15}.pdf" # final_image.convert("RGB").save(pdf_filename, "PDF", resolution=100.0) print(f"✅ 已儲存: {image_filename}") except Exception as e: print(f"❌ 擷取失敗: {url},錯誤: {e}") # 關閉瀏覽器 driver.quit() print("🎉 所有網頁已擷取完成!")