|
@@ -3,19 +3,23 @@ import time
|
|
|
import os
|
|
|
import urllib
|
|
|
from selenium.webdriver.common.by import By
|
|
|
+from pyvirtualdisplay import Display
|
|
|
import sys
|
|
|
import urlToarticle
|
|
|
-from fastapi import FastAPI
|
|
|
+from fastapi import FastAPI, Form, Request
|
|
|
+from fastapi.responses import FileResponse, HTMLResponse
|
|
|
+from fastapi.templating import Jinja2Templates
|
|
|
from pydantic import BaseModel
|
|
|
from typing import List
|
|
|
-
|
|
|
+import undetected_chromedriver as uc
|
|
|
|
|
|
app = FastAPI()
|
|
|
+templates = Jinja2Templates(directory="templates")
|
|
|
driver = None
|
|
|
|
|
|
-driver_path = 'C:\/Users\/s1301\/Downloads\/132\/chromedriver-win32\/chromedriver.exe'
|
|
|
-user_data_dir = 'C:/Users/s1301/AppData/Local/Google/Chrome/User Data'
|
|
|
-profile_directory = 'Profile 1'
|
|
|
+driver_path = '/usr/local/bin/chromedriver'
|
|
|
+user_data_dir = '/home/ling/.config/google-chrome'
|
|
|
+profile_directory = 'Default'
|
|
|
|
|
|
|
|
|
def re_get_webdriver():
|
|
@@ -30,29 +34,37 @@ def re_get_webdriver():
|
|
|
print('quit....')
|
|
|
driver = None
|
|
|
try:
|
|
|
+ display = Display(visible=0, size=(1920, 1080))
|
|
|
+ display.start()
|
|
|
options = uc.ChromeOptions()
|
|
|
- # options.add_argument("--user-agent=" + "Mozilla/5.0 (Linux; Android 4.2.1; en-us; Nexus 5 Build/JOP40D) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.166 Mobile Safari/535.19")
|
|
|
-
|
|
|
- options.add_argument("--window-size=200,100") # 縮小視窗
|
|
|
- options.add_argument("--window-position=-32000,-32000") # 移到螢幕外
|
|
|
- # for window in gw.getWindowsWithTitle("Chrome"):
|
|
|
- # window.minimize()
|
|
|
- driver = uc.Chrome(options=options)
|
|
|
- driver.delete_all_cookies()
|
|
|
- except:
|
|
|
+ #options.add_argument("--window-size=200,100") # 縮小視窗
|
|
|
+ #options.add_argument("--window-position=-32000,-32000") # 移到螢幕外
|
|
|
+ options.add_argument("--no-sandbox")
|
|
|
+ options.add_argument("--disable-dev-shm-usage")
|
|
|
+ options.add_argument("--disable-blink-features=AutomationControlled")
|
|
|
+
|
|
|
+ driver = uc.Chrome(options=options,version_main=132)
|
|
|
+ driver.execute_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})")
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ print(f"Chrome 啟動失敗: {str(e)}")
|
|
|
driver = None
|
|
|
+
|
|
|
return None
|
|
|
|
|
|
return driver
|
|
|
|
|
|
|
|
|
def get_resource(kw):
|
|
|
- while True:
|
|
|
+ max_attempts = 2
|
|
|
+ attempts = 0
|
|
|
+ while attempts<max_attempts:
|
|
|
driver = re_get_webdriver()
|
|
|
print('re_get_webdriver')
|
|
|
if driver is not None:
|
|
|
break
|
|
|
time.sleep(3)
|
|
|
+ attempts+=1
|
|
|
try:
|
|
|
googleurl = 'https://www.google.com/search?q={}&num={}&hl={}&gl=tw'.format(urllib.parse.quote(kw), 100, 'zh-TW')
|
|
|
# googleurl = 'https://www.google.com/search?q={}&num={}&hl={}&gl=tw&tbm=vid&tbs=vd:m'.format(urllib.parse.quote(kw), 100, 'zh-TW')
|
|
@@ -83,12 +95,13 @@ def get_resource(kw):
|
|
|
|
|
|
driver.quit()
|
|
|
|
|
|
-# urls = get_resource('書房 設計 北歐') # 取得搜尋結果第一頁網址來源
|
|
|
-# urls = ['https://www.100.com.tw/article/4359', 'https://yes-99.com/news-info.asp?id=278', 'https://www.seec.com.tw/content/Goods/List.aspx?SiteID=10&MmmID=655575436061073254&CatId=2015120918304527132']
|
|
|
-# article_generator = urlToarticle.ArticleGenerator(user_data_dir, profile_directory, driver_path)
|
|
|
-# is_succesed = article_generator.generate_article(urls)
|
|
|
-# if is_succesed:
|
|
|
-# print(is_succesed)
|
|
|
+if __name__ == "__main__":
|
|
|
+ urls = get_resource('書房 設計 北歐') # 取得搜尋結果第一頁網址來源
|
|
|
+#urls = ['https://www.100.com.tw/article/4359', 'https://yes-99.com/news-info.asp?id=278']
|
|
|
+#article_generator = urlToarticle.ArticleGenerator(user_data_dir, profile_directory)
|
|
|
+#is_succesed = article_generator.generate_article(urls)
|
|
|
+#if is_succesed:
|
|
|
+# print(is_succesed)
|
|
|
|
|
|
|
|
|
class SearchRequest(BaseModel):
|
|
@@ -96,17 +109,29 @@ class SearchRequest(BaseModel):
|
|
|
|
|
|
class GenerateRequest(BaseModel):
|
|
|
urls: List[str]
|
|
|
+ keyword: str
|
|
|
+
|
|
|
+@app.post("/generate")
|
|
|
+async def generate_article(keyword: str = Form(...)):
|
|
|
+ urls = get_resource(keyword)
|
|
|
+ # 確保搜尋成功
|
|
|
+ if not urls:
|
|
|
+ return HTMLResponse(content="<h2>搜尋失敗,請回上一頁重試!</h2>", status_code=400)
|
|
|
+
|
|
|
+ article_generator = urlToarticle.ArticleGenerator(user_data_dir, profile_directory, keyword)
|
|
|
+ is_success = article_generator.generate_article(urls)
|
|
|
|
|
|
-@app.post("/search/")
|
|
|
-async def search_resource(request: SearchRequest):
|
|
|
- urls = get_resource(request.keyword)
|
|
|
- return {"urls":urls}
|
|
|
+ if not is_success:
|
|
|
+ return HTMLResponse(content="<h2>文章生成失敗!</h2>", status_code=500)
|
|
|
|
|
|
+ file_path = "./article.md"
|
|
|
+ if not os.path.exists(file_path):
|
|
|
+ return HTMLResponse(content="<h2>找不到生成的 Markdown 檔案!</h2>", status_code=404)
|
|
|
|
|
|
-@app.post("/generate/")
|
|
|
-async def generate_article(request: GenerateRequest):
|
|
|
- article_generator = urlToarticle.ArticleGenerator(user_data_dir, profile_directory, driver_path)
|
|
|
- is_success = article_generator.generate_article(request.urls)
|
|
|
- return {"success": is_success}
|
|
|
+ # 回傳 Markdown 檔案
|
|
|
+ return FileResponse(file_path, media_type="text/markdown", filename="article.md")
|
|
|
|
|
|
|
|
|
+@app.get("/search", response_class=HTMLResponse)
|
|
|
+async def search_page(request: Request):
|
|
|
+ return templates.TemplateResponse("search.html", {"request": request})
|