123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137 |
- import undetected_chromedriver as uc
- import time
- import os
- import urllib
- from selenium.webdriver.common.by import By
- from pyvirtualdisplay import Display
- import sys
- import urlToarticle
- from fastapi import FastAPI, Form, Request
- from fastapi.responses import FileResponse, HTMLResponse
- from fastapi.templating import Jinja2Templates
- from pydantic import BaseModel
- from typing import List
- import undetected_chromedriver as uc
- app = FastAPI()
- templates = Jinja2Templates(directory="templates")
- driver = None
- driver_path = '/usr/local/bin/chromedriver'
- user_data_dir = '/home/ling/.config/google-chrome'
- profile_directory = 'Default'
- def re_get_webdriver():
- global port
- global driver
- global portnum
- global is_docker
- result = []
- if driver is not None:
- print('closing....')
- driver.quit()
- print('quit....')
- driver = None
- try:
- display = Display(visible=0, size=(1920, 1080))
- display.start()
- options = uc.ChromeOptions()
- #options.add_argument("--window-size=200,100") # 縮小視窗
- #options.add_argument("--window-position=-32000,-32000") # 移到螢幕外
- options.add_argument("--no-sandbox")
- options.add_argument("--disable-dev-shm-usage")
- options.add_argument("--disable-blink-features=AutomationControlled")
- driver = uc.Chrome(options=options,version_main=132)
- driver.execute_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})")
- except Exception as e:
- print(f"Chrome 啟動失敗: {str(e)}")
- driver = None
-
- return None
- return driver
- def get_resource(kw):
- max_attempts = 2
- attempts = 0
- while attempts<max_attempts:
- driver = re_get_webdriver()
- print('re_get_webdriver')
- if driver is not None:
- break
- time.sleep(3)
- attempts+=1
- try:
- googleurl = 'https://www.google.com/search?q={}&num={}&hl={}&gl=tw'.format(urllib.parse.quote(kw), 100, 'zh-TW')
- # googleurl = 'https://www.google.com/search?q={}&num={}&hl={}&gl=tw&tbm=vid&tbs=vd:m'.format(urllib.parse.quote(kw), 100, 'zh-TW')
- # googleurl = f'https://www.google.co.jp/search?q={kw}&sca_esv=741dc4f98c90c9c4&source=hp&ei=djmOZ8inMYWk2roPk_yMiA4&iflsig=AL9hbdgAAAAAZ45HhiuBAUgi3Vf3Qd5FTyfcyUOySOxk&ved=0ahUKEwjIutTinoSLAxUFklYBHRM-A-EQ4dUDCA8&uact=5&oq=junho&gs_lp=Egdnd3Mtd2l6IgphbmdlbG8ga29vMgUQLhiABDIEEAAYHjIEEAAYHjIEEAAYHjIEEAAYHjIEEAAYHjIEEAAYHjIEEAAYHjIGEAAYChgeSL0YUABYqRZwAXgAkAEAmAGwAaABjQyqAQQwLjExuAEDyAEA-AEBmAIMoALYDMICCxAuGIAEGNEDGMcBwgIFEAAYgATCAgoQLhiABBhDGIoFwgILEC4YgAQYxwEYrwHCAgcQABiABBgKwgIHEC4YgAQYCsICDRAuGIAEGMcBGAoYrwGYAwCSBwQxLjExoAfBqQE&sclient=gws-wiz'
- driver.get(googleurl)
- time.sleep(6)
- print(driver.current_url)
- elmts = driver.find_elements(By.XPATH, "//div[@class='yuRUbf']//a")
- numresults = len(elmts)
- print('搜尋結果數量', numresults)
- if numresults == 0:
- print(driver.current_url)
- print(driver.title)
- sys.exit()
- resources_list = []
- for elmt in elmts[0:11]:
- href = elmt.get_attribute('href')
- resources_list.append(href)
- print(resources_list)
- return resources_list
- except Exception as e:
- print('exception')
- return None
- driver.quit()
- if __name__ == "__main__":
- urls = get_resource('書房 設計 北歐') # 取得搜尋結果第一頁網址來源
- #urls = ['https://www.100.com.tw/article/4359', 'https://yes-99.com/news-info.asp?id=278']
- #article_generator = urlToarticle.ArticleGenerator(user_data_dir, profile_directory)
- #is_succesed = article_generator.generate_article(urls)
- #if is_succesed:
- # print(is_succesed)
- class SearchRequest(BaseModel):
- keyword: str
- class GenerateRequest(BaseModel):
- urls: List[str]
- keyword: str
- @app.post("/generate")
- async def generate_article(keyword: str = Form(...)):
- urls = get_resource(keyword)
- # 確保搜尋成功
- if not urls:
- return HTMLResponse(content="<h2>搜尋失敗,請回上一頁重試!</h2>", status_code=400)
- article_generator = urlToarticle.ArticleGenerator(user_data_dir, profile_directory, keyword)
- is_success = article_generator.generate_article(urls)
- if not is_success:
- return HTMLResponse(content="<h2>文章生成失敗!</h2>", status_code=500)
- file_path = "./article.md"
- if not os.path.exists(file_path):
- return HTMLResponse(content="<h2>找不到生成的 Markdown 檔案!</h2>", status_code=404)
- # 回傳 Markdown 檔案
- return FileResponse(file_path, media_type="text/markdown", filename="article.md")
- @app.get("/search", response_class=HTMLResponse)
- async def search_page(request: Request):
- return templates.TemplateResponse("search.html", {"request": request})
|