12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970 |
- import time
- import pandas as pd
- from selenium import webdriver
- from selenium.webdriver.chrome.options import Options
- from selenium.webdriver.common.by import By
- from PIL import Image
- from io import BytesIO
- # 讀取 CSV
- df = pd.read_csv("")
- urls = df["url"].tolist()
- # 設定 Selenium 無痕模式
- chrome_options = Options()
- # chrome_options.add_argument("--headless") # 無頭模式
- # chrome_options.add_argument("--incognito") # 無痕模式
- chrome_options.add_argument("--disable-gpu") # 禁用 GPU 加速
- chrome_options.add_argument("--window-size=1200,800") # 視窗大小
- # 啟動 Selenium 瀏覽器
- driver = webdriver.Chrome(options=chrome_options)
- # 截圖 & 轉 PDF
- for index, url in enumerate(urls[3:5]):
- try:
- print(f"[{index+4}/{len(urls)}] 擷取網頁: {url}")
- driver.get(url)
- time.sleep(20) # 等待網頁載入
- # 取得完整網頁高度
- total_height = driver.execute_script("return document.body.scrollHeight")
- viewport_height = driver.execute_script("return window.innerHeight")
- # 存儲所有截圖
- stitched_images = []
- scroll_position = 0
- while scroll_position < total_height:
- driver.execute_script(f"window.scrollTo(0, {scroll_position});")
- time.sleep(0.5) # 等待畫面刷新
- screenshot = driver.get_screenshot_as_png()
- stitched_images.append(Image.open(BytesIO(screenshot)))
- scroll_position += viewport_height
- # 拼接整張長截圖
- total_width = stitched_images[0].size[0]
- final_height = sum(img.size[1] for img in stitched_images)
- final_image = Image.new("RGB", (total_width, final_height))
- y_offset = 0
- for img in stitched_images:
- final_image.paste(img, (0, y_offset))
- y_offset += img.size[1]
- # 儲存為 PNG
- image_filename = f"choozmo/screenshot_{index+4}.png"
- final_image.save(image_filename)
- # 轉成 PDF
- # pdf_filename = f"sing_screenshot_{index+15}.pdf"
- # final_image.convert("RGB").save(pdf_filename, "PDF", resolution=100.0)
- print(f"✅ 已儲存: {image_filename}")
- except Exception as e:
- print(f"❌ 擷取失敗: {url},錯誤: {e}")
- # 關閉瀏覽器
- driver.quit()
- print("🎉 所有網頁已擷取完成!")
|