zooey
/
article_generate


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137
							import undetected_chromedriver as uc
import time
import os
import urllib
from selenium.webdriver.common.by import By
from pyvirtualdisplay import Display
import sys
import urlToarticle
from fastapi import FastAPI, Form, Request
from fastapi.responses import FileResponse, HTMLResponse
from fastapi.templating import Jinja2Templates
from pydantic import BaseModel
from typing import List
import undetected_chromedriver as uc

app = FastAPI()
templates = Jinja2Templates(directory="templates")
driver = None

driver_path = '/usr/local/bin/chromedriver'
user_data_dir = '/home/ling/.config/google-chrome'
profile_directory = 'Default'


def re_get_webdriver():
    global port
    global driver
    global portnum
    global is_docker
    result = []
    if driver is not None:
        print('closing....')
        driver.quit()
        print('quit....')
        driver = None
    try:
        display = Display(visible=0, size=(1920, 1080))
        display.start()
        options = uc.ChromeOptions()
        #options.add_argument("--window-size=200,100")  # 縮小視窗
        #options.add_argument("--window-position=-32000,-32000")  # 移到螢幕外
        options.add_argument("--no-sandbox")
        options.add_argument("--disable-dev-shm-usage")
        options.add_argument("--disable-blink-features=AutomationControlled")

        driver = uc.Chrome(options=options,version_main=132)
        driver.execute_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})")

    except Exception as e:
        print(f"Chrome 啟動失敗: {str(e)}")
        driver = None
        
        return None

    return driver


def get_resource(kw):
    max_attempts = 2
    attempts = 0
    while attempts<max_attempts:
        driver = re_get_webdriver()
        print('re_get_webdriver')
        if driver is not None:
            break
        time.sleep(3)
        attempts+=1
    try:
        googleurl = 'https://www.google.com/search?q={}&num={}&hl={}&gl=tw'.format(urllib.parse.quote(kw), 100, 'zh-TW')
        # googleurl = 'https://www.google.com/search?q={}&num={}&hl={}&gl=tw&tbm=vid&tbs=vd:m'.format(urllib.parse.quote(kw), 100, 'zh-TW')
        # googleurl = f'https://www.google.co.jp/search?q={kw}&sca_esv=741dc4f98c90c9c4&source=hp&ei=djmOZ8inMYWk2roPk_yMiA4&iflsig=AL9hbdgAAAAAZ45HhiuBAUgi3Vf3Qd5FTyfcyUOySOxk&ved=0ahUKEwjIutTinoSLAxUFklYBHRM-A-EQ4dUDCA8&uact=5&oq=junho&gs_lp=Egdnd3Mtd2l6IgphbmdlbG8ga29vMgUQLhiABDIEEAAYHjIEEAAYHjIEEAAYHjIEEAAYHjIEEAAYHjIEEAAYHjIEEAAYHjIGEAAYChgeSL0YUABYqRZwAXgAkAEAmAGwAaABjQyqAQQwLjExuAEDyAEA-AEBmAIMoALYDMICCxAuGIAEGNEDGMcBwgIFEAAYgATCAgoQLhiABBhDGIoFwgILEC4YgAQYxwEYrwHCAgcQABiABBgKwgIHEC4YgAQYCsICDRAuGIAEGMcBGAoYrwGYAwCSBwQxLjExoAfBqQE&sclient=gws-wiz'
        driver.get(googleurl)

        time.sleep(6)
        print(driver.current_url)
        elmts = driver.find_elements(By.XPATH, "//div[@class='yuRUbf']//a")
        numresults = len(elmts)
        print('搜尋結果數量', numresults)
        if numresults == 0:
            print(driver.current_url)
            print(driver.title)
            sys.exit()

        resources_list = []

        for elmt in elmts[0:11]:
            href = elmt.get_attribute('href')
            resources_list.append(href)
        print(resources_list)
        return resources_list

    except Exception as e:
        print('exception')
        return None

    driver.quit()

if __name__ == "__main__":
    urls = get_resource('書房 設計 北歐') # 取得搜尋結果第一頁網址來源
#urls = ['https://www.100.com.tw/article/4359', 'https://yes-99.com/news-info.asp?id=278']
#article_generator = urlToarticle.ArticleGenerator(user_data_dir, profile_directory)
#is_succesed = article_generator.generate_article(urls)
#if is_succesed:
#    print(is_succesed)


class SearchRequest(BaseModel):
    keyword: str

class GenerateRequest(BaseModel):
    urls: List[str]
    keyword: str

@app.post("/generate")
async def generate_article(keyword: str = Form(...)):
    urls = get_resource(keyword)
    # 確保搜尋成功
    if not urls:
        return HTMLResponse(content="<h2>搜尋失敗，請回上一頁重試！</h2>", status_code=400)

    article_generator = urlToarticle.ArticleGenerator(user_data_dir, profile_directory, keyword)
    is_success = article_generator.generate_article(urls)

    if not is_success:
        return HTMLResponse(content="<h2>文章生成失敗！</h2>", status_code=500)

    file_path = "./article.md"
    if not os.path.exists(file_path):
        return HTMLResponse(content="<h2>找不到生成的 Markdown 檔案！</h2>", status_code=404)

    # 回傳 Markdown 檔案
    return FileResponse(file_path, media_type="text/markdown", filename="article.md")


@app.get("/search", response_class=HTMLResponse)
async def search_page(request: Request):
    return templates.TemplateResponse("search.html", {"request": request})