# -*- coding: utf-8 -*- from selenium.webdriver.common.by import By from selenium.webdriver.common.keys import Keys from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from langchain_openai import ChatOpenAI from langchain_core.output_parsers import StrOutputParser from langchain_community.callbacks import get_openai_callback from langchain_core.prompts import ChatPromptTemplate import time import undetected_chromedriver as uc from dotenv import load_dotenv import os import logging # 加载环境变量 load_dotenv() # 紀錄程式執行狀況 logging.basicConfig(level=logging.INFO) class ArticleGenerator: def __init__(self, user_data_dir, profile_directory): self.user_data_dir = user_data_dir self.profile_directory = profile_directory self.driver = None self.OPEN_API_KEY: str = os.getenv('OPEN_API_KEY') def get_webdriver(self): for attempt in range(3): # 嘗試最多 3 次 try: options = uc.ChromeOptions() # options.add_argument("--disable-blink-features=AutomationControlled") # options.add_experimental_option("excludeSwitches", ["enable-automation"]) # options.add_experimental_option("useAutomationExtension", False) options.add_argument('--ignore-certificate-errors') options.add_argument("--disable-gpu") options.add_argument("--disable-dev-shm-usage") # options.add_argument("headless") options.add_argument(f"user-data-dir={self.user_data_dir}") options.add_argument(f'--profile-directory={self.profile_directory}') # s = Service(self.driver_path) self.driver=uc.Chrome(options=options, version_main=132, use_subprocess=True) self.driver.execute_script("Object.defineProperty(navigator, 'webdriver', {get: () => undefined})") return self.driver except Exception as e: print(f'Error: {e}') print(f"WebDriver 啟動失敗,第 {attempt + 1} 次嘗試...") if attempt == 2: raise e time.sleep(2) # 等待 2 秒後重試 # AI問答 - 取得 openai 回應 def ttm(self, article, language='繁體中文'): with get_openai_callback() as cb: model_name = "gpt-4o" llm = ChatOpenAI(model_name=model_name, temperature=0.7, api_key=self.OPEN_API_KEY, max_tokens=4096) qa_system_prompt = f"""你是一個專門做 md 格式文章的AI助理. 可以從一大串文字整理出高質量的 md 格式文章. 多餘的內容拿掉,只需要回傳文章內容即可. 輸出的語言為{language}.""" qa_prompt = ChatPromptTemplate.from_messages( [ ("system", qa_system_prompt), ("human", "{question}"), ] ) rag_chain = ( qa_prompt | llm | StrOutputParser() ) text = rag_chain.invoke( {"question": article+"請轉成 md 格式"} ) print(f"Total Tokens: {cb.total_tokens}") print(f"Prompt Tokens: {cb.prompt_tokens}") print(f"Completion Tokens: {cb.completion_tokens}") print(f"Total Cost (USD): ${cb.total_cost}") return text def generate_article(self, urls): for attempt in range(3): try: driver = self.get_webdriver() url = "https://notebooklm.google.com/" driver.get(url) time.sleep(3) # 新建專案 new_created = driver.find_element(By.XPATH, '/html/body/labs-tailwind-root/div/welcome-page/div/div[2]/div[1]/div/button/span[2]') while not new_created: time.sleep(0.1) new_created = driver.find_element(By.XPATH, '/html/body/labs-tailwind-root/div/welcome-page/div/div[2]/div[1]/div/button/span[2]') new_created.click() time.sleep(2) # # 點擊 專案 # notebooks = driver.find_element(By.CLASS_NAME, 'project-buttons-flow ng-star-inserted') # # 選取第一個 project # new_notebook = notebooks.find_element(By.TAG_NAME, 'project-button') # while not new_notebook: # time.sleep(0.1) # new_notebook = notebooks.find_element(By.TAG_NAME, 'project-button') # new_notebook.click() time.sleep(1) # 將網址都輸入 for i, url in enumerate(urls): time.sleep(5) if i == 0: upload_urls = driver.find_element(By.CSS_SELECTOR, '.mat-icon.notranslate.upload-icon.google-symbols.mat-icon-no-color') driver.execute_script("arguments[0].scrollIntoView(true);", upload_urls) while not upload_urls: time.sleep(0.1) upload_urls = driver.find_element(By.CSS_SELECTOR, '.mat-icon.notranslate.upload-icon.google-symbols.mat-icon-no-color') driver.execute_script("arguments[0].scrollIntoView(true);", upload_urls) upload_urls.click() time.sleep(3) # 找網站按鈕 text_click = driver.find_element(By.XPATH, '//*[@id="mat-mdc-chip-2"]/span[2]/span/span[2]') driver.execute_script("arguments[0].scrollIntoView(true);", text_click) while not text_click: time.sleep(0.1) text_click = driver.find_element(By.XPATH, '//*[@id="mat-mdc-chip-2"]/span[2]/span/span[2]') driver.execute_script("arguments[0].scrollIntoView(true);", text_click) text_click.click() time.sleep(1) # 找到 textarea 並輸入網址 website_input = driver.find_element(By.ID, 'mat-input-0') while not text_click: time.sleep(0.1) website_input = driver.find_element(By.ID, 'mat-input-0') website_input.send_keys(url) website_input.send_keys(Keys.RETURN) print(f'成功輸入網址: {url}') time.sleep(7) else: try: WebDriverWait(driver, 20).until( EC.presence_of_element_located((By.CLASS_NAME, "mdc-button__label")) ) 新增來源 = driver.find_elements(By.CLASS_NAME, "mdc-button__label")[2] print(新增來源.is_displayed()) while not 新增來源: time.sleep(0.1) 新增來源 = driver.find_element(By.CLASS_NAME, "mdc-button__label")[2] 新增來源.click() except Exception as e: print(f'Error: {e}') # 嘗試使用 JavaScript 來檢查元素是否可用 driver.execute_script("return document.querySelector('button #mat-tab-group-0-content-0 > div > div > div > source-picker > div > div.ng-tns-c2551705568-5.ng-star-inserted > button > span.mat-mdc-button-persistent-ripple.mdc-button__ripple') != null") time.sleep(3) # 找網站按鈕 try: # 取得整個按鈕列表 clicks = driver.find_element(By.CLASS_NAME, 'chip-groups').find_elements(By.CSS_SELECTOR, '.chip-group.ng-star-inserted') print(clicks) text_click = clicks[1].find_element(By.TAG_NAME, 'mat-chip') while not text_click: time.sleep(0.1) text_click = clicks[1].find_element(By.TAG_NAME, 'mat-chip') text_click.click() except Exception as e: print(f'Error: {e}') # 如果捕獲到錯誤,使用 JavaScript 強制點擊 try: text_click = clicks[1].find_element(By.TAG_NAME, 'mat-chip') driver.execute_script("arguments[0].click();", text_click) # 使用 JS 點擊元素 print("使用 JavaScript 成功點擊元素") except Exception as js_error: print(f'JavaScript 點擊錯誤: {js_error}') time.sleep(2) # 找到 textarea 並輸入網址 # 取得上面幾層 div = driver.find_element(By.TAG_NAME, 'website-upload') # print(div.text) website_input = div.find_element(By.TAG_NAME, 'mat-form-field').find_element(By.TAG_NAME, 'input') # 確保元素可以操作 while not website_input.is_displayed() or not website_input.is_enabled(): time.sleep(0.1) website_input = div.find_element(By.TAG_NAME, 'mat-form-field').find_element(By.TAG_NAME, 'input') try: website_input.send_keys(url) website_input.send_keys(Keys.RETURN) except Exception as e: print(f'Error: {e}') # 使用 JavaScript 強制發送鍵盤事件 driver.execute_script("arguments[0].value = arguments[1];", website_input, url) # 將文字輸入到 input driver.execute_script("arguments[0].dispatchEvent(new Event('input'));", website_input) # 觸發 input 事件 print(f'成功輸入網址: {url}') time.sleep(7) time.sleep(5) # 上面幾層 omnibar = driver.find_element(By.TAG_NAME, 'chat-panel').find_element(By.TAG_NAME, 'omnibar') box = omnibar.find_element(By.TAG_NAME, 'query-box') prompt_input = box.find_element(By.TAG_NAME, 'textarea') while not prompt_input.is_displayed() or not prompt_input.is_enabled(): time.sleep(0.1) prompt_input = box.find_element(By.TAG_NAME, 'textarea') j = 1 fore_content = '' # while '##' not in fore_content: # if j > 5: # break # 輸入生成文章的 prompt # complete_prompt = "1. 彙整並生成一篇以常見問題為主的 MarkDown 格式的 專業文章(給客戶看的,最後一句「希望這篇文章能解答...」整句直接刪掉)。2. 文章必須為 MarkDown 格式,也就是必須包含#、##等等。3. 在各個分類或重要段落中加入一些具體的案例或數據。4. 必須使用段落格式,不要條列式。5. 若內容不是中文,要精準翻譯成中文。6. 生成 MarkDown 的 md 檔(須包含大標題及各個段落的標題)。" complete_prompt = "彙整並生成一篇以常見問題為主的 MarkDown 格式的 專業文章。在各個分類或重要段落中加入一些具體的案例或數據。5. 若內容不是中文,要精準翻譯成中文。" try: prompt_input.send_keys(complete_prompt) prompt_input.send_keys(Keys.RETURN) except Exception as e: print(f'Error: {e}') # 使用 JavaScript 強制發送鍵盤事件 driver.execute_script("arguments[0].value = arguments[1];", prompt_input, complete_prompt) # 將文字輸入到 input driver.execute_script("arguments[0].dispatchEvent(new Event('input'));", prompt_input) # 觸發 input 事件 print('等候答案') time.sleep(40) # 等待答案生成 message = driver.find_element(By.TAG_NAME, 'chat-panel').find_elements(By.TAG_NAME, 'chat-message')[j] while not message.is_displayed() or not message.is_enabled(): time.sleep(0.1) message = driver.find_element(By.TAG_NAME, 'chat-panel').find_elements(By.TAG_NAME, 'chat-message')[j] # answers = message.find_elements(By.TAG_NAME, 'labs-tailwind-structural-element-view-v2') # while not answers[1].is_displayed() or not answers[1].is_enabled(): # time.sleep(0.1) # answers = message.find_elements(By.TAG_NAME, 'labs-tailwind-structural-element-view-v2') # fore_content = message.text # j+=2 # 直接生成 md 檔 或是 生成文字? # texts = [] # if '#' not in answers[0].text: # for i, answer in enumerate(answers[1:]): # if '##' not in fore_content: # if i == 0: # texts.append('# ' + answer.text.strip()) # elif i % 2 == 1 and i != len(answers[1:])-1: # texts.append('## ' + answer.text.strip()) # elif i == len(answers[1:])-1: # continue # else: # texts.append(answer.text.strip()) # else: # if '# # ' in answer.text: # t = answer.text.replace('# # ', '# ').strip() # texts.append(t) # else: # texts.append(answer.text.strip()) # else: # for i, answer in enumerate(answers): # if '##' not in fore_content: # if i == 0: # texts.append('# ' + answer.text.strip()) # elif i % 2 == 1 and i != len(answers[1:])-1: # texts.append('## ' + answer.text.strip()) # elif i == len(answers[1:])-1: # continue # else: # texts.append(answer.text.strip()) # else: # if '# # ' in answer.text: # t = answer.text.replace('# # ', '# ').strip() # texts.append(t) # else: # texts.append(answer.text.strip()) # print(texts) # if len(texts) > 1: # content = '\n'.join(texts) # else: # content = texts[0].replace('# # ', '# ') # print(content) text = self.ttm(message.text) is_succesed = self.save_article_as_md(text) if is_succesed: return 'article.md 生成成功' else: return '存檔失敗' except Exception as e: print("出現錯誤: ", str(e)) self.driver.quit() self.driver = None if self.driver: time.sleep(5) self.driver.quit() return False def save_article_as_md(self, content, filename="./article.md"): # 打開或創建一個 .md 文件 try: with open(filename, 'w+', encoding='utf-8-sig') as file: # 將文章內容寫入文件 file.write(content) print(f"文章已成功保存為 {filename}") return True except Exception as e: print(f"保存文章時發生錯誤: {str(e)}") return False # 假設生成的文章內容如下(這會是你的生成內容) # 測試區域(僅在此模組直接執行時執行) if __name__ == "__main__": # driver_path = 'chromedriver-win32/chromedriver.exe' user_data_dir = 'C:/Users/wangy/AppData/Local/Google/Chrome/User Data' profile_directory = 'Profile 20' # 範例用法 article_generator = ArticleGenerator(user_data_dir, profile_directory) urls = [ "https://zh.wikipedia.org/zh-tw/%E8%99%9B%E6%93%AC%E4%BA%BA", "https://www.naipo.com/Portals/1/web_tw/Knowledge_Center/Industry_Insight/IPNC_240515_1501.htm", "https://money.udn.com/money/story/11162/8333646", "https://gitmind.com/tw/digital-human-creator.html", "https://support.microsoft.com/zh-tw/office/%E5%9C%A8-microsoft-teams-%E4%B8%AD%E4%BB%A5%E8%99%9B%E6%93%AC%E4%BA%BA%E5%81%B6%E7%9A%84%E5%BD%A2%E5%BC%8F%E5%8A%A0%E5%85%A5%E6%9C%83%E8%AD%B0-5384e7b7-30c7-4bcb-8065-0c9e830cc8ad", "https://digitaldomain.com/%E8%99%9B%E6%93%AC%E4%BA%BA%E5%AF%A6%E9%A9%97%E5%AE%A4/?lang=zh-hant", "https://www.cdri.org.tw/xcdoc/cont?xsmsid=0H270572678476094046&sid=0N149542836021459905", "https://learn.microsoft.com/zh-tw/azure/ai-services/speech-service/text-to-speech-avatar/what-is-text-to-speech-avatar", "https://www.xfyun.cn/services/VirtualHumans", "https://www.bnext.com.tw/article/65449/virtual-human-subculture" ] is_succesed = article_generator.generate_article(urls) if is_succesed: print(is_succesed) # import notebookllm # # 設置包含網址的 prompt # prompt = "請分析這個網址內容並轉成 md 格式文章: https://zh.wikipedia.org/zh-tw/%E8%99%9B%E6%93%AC%E4%BA%BA" # # 發送 prompt 給模型 # response = notebookllm.Notebook() # # 顯示模型的回應 # print(response)