8 달 전 · e084a98e47
--- a/urlToarticle.py
+++ b/urlToarticle.py
@@ -3,14 +3,27 @@ from selenium.webdriver.common.by import By
 
				 from selenium.webdriver.common.keys import Keys
			
 
				 from selenium.webdriver.support.ui import WebDriverWait
			
 
				 from selenium.webdriver.support import expected_conditions as EC
			
 
				+from langchain_openai import ChatOpenAI
			
 
				+from langchain_core.output_parsers import StrOutputParser
			
 
				+from langchain_community.callbacks import get_openai_callback
			
 
				+from langchain_core.prompts import ChatPromptTemplate
			
 
				 import time
			
 
				 import undetected_chromedriver as uc
			
 
				+from dotenv import load_dotenv
			
 
				+import os
			
 
				+import logging
			
 
				+
			
 
				+# 加载环境变量
			
 
				+load_dotenv()
			
 
				+# 紀錄程式執行狀況
			
 
				+logging.basicConfig(level=logging.INFO)
			
 
				 
			
 
				 class ArticleGenerator:
			
 
				     def __init__(self, user_data_dir, profile_directory):
			
 
				         self.user_data_dir = user_data_dir
			
 
				         self.profile_directory = profile_directory
			
 
				         self.driver = None
			
 
				+        self.OPEN_API_KEY: str = os.getenv('OPEN_API_KEY')
			
 
				 
			
 
				     def get_webdriver(self):
			
 
				         for attempt in range(3):  # 嘗試最多 3 次
			
@@ -37,6 +50,39 @@ class ArticleGenerator:
 
				                     raise e
			
 
				                 time.sleep(2)  # 等待 2 秒後重試
			
 
				 
			
 
				+    # AI問答 - 取得 openai 回應
			
 
				+    def ttm(self, article, language='繁體中文'):
			
 
				+        with get_openai_callback() as cb:
			
 
				+            model_name = "gpt-4o"
			
 
				+            llm = ChatOpenAI(model_name=model_name, temperature=0.7, api_key=self.OPEN_API_KEY, max_tokens=4096)
			
 
				+            qa_system_prompt = f"""你是一個專門做 md 格式文章的AI助理.
			
 
				+可以從一大串文字整理出高質量的 md 格式文章. 多餘的內容拿掉，只需要回傳文章內容即可.
			
 
				+輸出的語言為{language}."""
			
 
				+
			
 
				+            qa_prompt = ChatPromptTemplate.from_messages(
			
 
				+                [
			
 
				+                    ("system", qa_system_prompt),
			
 
				+                    ("human", "{question}"),
			
 
				+                ]
			
 
				+            )
			
 
				+
			
 
				+            rag_chain = (
			
 
				+                qa_prompt
			
 
				+                | llm
			
 
				+                | StrOutputParser()
			
 
				+            )
			
 
				+
			
 
				+            text = rag_chain.invoke(
			
 
				+                {"question": article+"請轉成 md 格式"}
			
 
				+            )
			
 
				+
			
 
				+
			
 
				+        print(f"Total Tokens: {cb.total_tokens}")
			
 
				+        print(f"Prompt Tokens: {cb.prompt_tokens}")
			
 
				+        print(f"Completion Tokens: {cb.completion_tokens}")
			
 
				+        print(f"Total Cost (USD): ${cb.total_cost}")
			
 
				+        return text
			
 
				+
			
 
				     def generate_article(self, urls):
			
 
				         for attempt in range(3):
			
 
				             try:
			
@@ -162,66 +208,93 @@ class ArticleGenerator:
 
				                 
			
 
				                 j = 1
			
 
				                 fore_content = ''
			
 
				-                while '##' not in fore_content:
			
 
				+                # while '##' not in fore_content:
			
 
				+                    # if j > 5:
			
 
				+                    #     break
			
 
				                     # 輸入生成文章的 prompt
			
 
				-                    complete_prompt = "1. 彙整並生成一篇以常見問題為主的 MarkDown 格式的 專業文章(給客戶看的，最後一句「希望這篇文章能解答...」整句直接刪掉)。2. 文章必須為 MarkDown 格式，也就是必須包含#、##等等。3. 在各個分類或重要段落中加入一些具體的案例或數據。4. 必須使用段落格式，不要條列式。5. 若內容不是中文，要精準翻譯成中文。6. 生成 MarkDown 的 md 檔(須包含大標題及各個段落的標題)。"
			
 
				-                    try:
			
 
				-                        prompt_input.send_keys(complete_prompt)
			
 
				-                        prompt_input.send_keys(Keys.RETURN)
			
 
				-                    except Exception as e:
			
 
				-                        print(f'Error: {e}')
			
 
				-                        # 使用 JavaScript 強制發送鍵盤事件
			
 
				-                        driver.execute_script("arguments[0].value = arguments[1];", prompt_input, complete_prompt)  # 將文字輸入到 input
			
 
				-                        driver.execute_script("arguments[0].dispatchEvent(new Event('input'));", prompt_input)  # 觸發 input 事件
			
 
				+                    # complete_prompt = "1. 彙整並生成一篇以常見問題為主的 MarkDown 格式的 專業文章(給客戶看的，最後一句「希望這篇文章能解答...」整句直接刪掉)。2. 文章必須為 MarkDown 格式，也就是必須包含#、##等等。3. 在各個分類或重要段落中加入一些具體的案例或數據。4. 必須使用段落格式，不要條列式。5. 若內容不是中文，要精準翻譯成中文。6. 生成 MarkDown 的 md 檔(須包含大標題及各個段落的標題)。"
			
 
				+                complete_prompt = "彙整並生成一篇以常見問題為主的 MarkDown 格式的 專業文章。在各個分類或重要段落中加入一些具體的案例或數據。5. 若內容不是中文，要精準翻譯成中文。"
			
 
				+                try:
			
 
				+                    prompt_input.send_keys(complete_prompt)
			
 
				+                    prompt_input.send_keys(Keys.RETURN)
			
 
				+                except Exception as e:
			
 
				+                    print(f'Error: {e}')
			
 
				+                    # 使用 JavaScript 強制發送鍵盤事件
			
 
				+                    driver.execute_script("arguments[0].value = arguments[1];", prompt_input, complete_prompt)  # 將文字輸入到 input
			
 
				+                    driver.execute_script("arguments[0].dispatchEvent(new Event('input'));", prompt_input)  # 觸發 input 事件
			
 
				 
			
 
				 
			
 
				-                    print('等候答案')
			
 
				-                    time.sleep(45)  # 等待答案生成
			
 
				+                print('等候答案')
			
 
				+                time.sleep(40)  # 等待答案生成
			
 
				+                message = driver.find_element(By.TAG_NAME, 'chat-panel').find_elements(By.TAG_NAME, 'chat-message')[j]
			
 
				+                while not message.is_displayed() or not message.is_enabled():
			
 
				+                    time.sleep(0.1)
			
 
				                     message = driver.find_element(By.TAG_NAME, 'chat-panel').find_elements(By.TAG_NAME, 'chat-message')[j]
			
 
				-                    answers = message.find_elements(By.TAG_NAME, 'labs-tailwind-structural-element-view-v2')
			
 
				-                    while not answers[1].is_displayed() or not answers[1].is_enabled():
			
 
				-                        time.sleep(0.1)
			
 
				-                        answers = message.find_elements(By.TAG_NAME, 'labs-tailwind-structural-element-view-v2')
			
 
				-                    fore_content = message.text
			
 
				-                    j+=2
			
 
				+                # answers = message.find_elements(By.TAG_NAME, 'labs-tailwind-structural-element-view-v2')
			
 
				+                # while not answers[1].is_displayed() or not answers[1].is_enabled():
			
 
				+                #     time.sleep(0.1)
			
 
				+                #     answers = message.find_elements(By.TAG_NAME, 'labs-tailwind-structural-element-view-v2')
			
 
				+                # fore_content = message.text
			
 
				+                    # j+=2
			
 
				                 
			
 
				                 # 直接生成 md 檔 或是 生成文字?
			
 
				-                texts = []
			
 
				-                for i, answer in enumerate(answers[1:]):
			
 
				-                    if '###' not in fore_content:
			
 
				-                        if i == 0:
			
 
				-                            texts.append('# ' + answer.text.strip())
			
 
				-                        elif i % 2 == 1 and i != len(answers[1:])-1:
			
 
				-                            texts.append('## ' + answer.text.strip())
			
 
				-                        elif i == len(answers[1:])-1:
			
 
				-                            continue
			
 
				-                        else:  
			
 
				-                            texts.append(answer.text.strip())
			
 
				-                    else:
			
 
				-                        if '# # ' in answer.text:
			
 
				-                            t = answer.text.replace('# # ', '# ').strip()
			
 
				-                            texts.append(t)
			
 
				-                        else:
			
 
				-                            texts.append(answer.text.strip())
			
 
				-
			
 
				-                print(texts)
			
 
				-                if len(texts) > 1:
			
 
				-                    content = '\n'.join(texts)
			
 
				-                else:
			
 
				-                    content = texts[0].replace('# # ', '# ')
			
 
				-                print(content)
			
 
				-                is_succesed = self.save_article_as_md(content)
			
 
				+                # texts = []
			
 
				+                # if '#' not in answers[0].text:
			
 
				+                #     for i, answer in enumerate(answers[1:]):
			
 
				+                #         if '##' not in fore_content:
			
 
				+                #             if i == 0:
			
 
				+                #                 texts.append('# ' + answer.text.strip())
			
 
				+                #             elif i % 2 == 1 and i != len(answers[1:])-1:
			
 
				+                #                 texts.append('## ' + answer.text.strip())
			
 
				+                #             elif i == len(answers[1:])-1:
			
 
				+                #                 continue
			
 
				+                #             else:  
			
 
				+                #                 texts.append(answer.text.strip())
			
 
				+                #         else:
			
 
				+                #             if '# # ' in answer.text:
			
 
				+                #                 t = answer.text.replace('# # ', '# ').strip()
			
 
				+                #                 texts.append(t)
			
 
				+                #             else:
			
 
				+                #                 texts.append(answer.text.strip())
			
 
				+                # else:
			
 
				+                #     for i, answer in enumerate(answers):
			
 
				+                #         if '##' not in fore_content:
			
 
				+                #             if i == 0:
			
 
				+                #                 texts.append('# ' + answer.text.strip())
			
 
				+                #             elif i % 2 == 1 and i != len(answers[1:])-1:
			
 
				+                #                 texts.append('## ' + answer.text.strip())
			
 
				+                #             elif i == len(answers[1:])-1:
			
 
				+                #                 continue
			
 
				+                #             else:  
			
 
				+                #                 texts.append(answer.text.strip())
			
 
				+                #         else:
			
 
				+                #             if '# # ' in answer.text:
			
 
				+                #                 t = answer.text.replace('# # ', '# ').strip()
			
 
				+                #                 texts.append(t)
			
 
				+                #             else:
			
 
				+                #                 texts.append(answer.text.strip())
			
 
				+                
			
 
				+                # print(texts)
			
 
				+                # if len(texts) > 1:
			
 
				+                #     content = '\n'.join(texts)
			
 
				+                # else:
			
 
				+                #     content = texts[0].replace('# # ', '# ')
			
 
				+                # print(content)
			
 
				+                text = self.ttm(message.text)
			
 
				+                is_succesed = self.save_article_as_md(text)
			
 
				                 if is_succesed:
			
 
				                     return 'article.md 生成成功'
			
 
				                 else:
			
 
				                     return '存檔失敗'
			
 
				             except Exception as e:
			
 
				                 print("出現錯誤: ", str(e))
			
 
				-                return False
			
 
				+                self.driver.quit()
			
 
				+                self.driver = None
			
 
				 
			
 
				         if self.driver:
			
 
				             time.sleep(5)
			
 
				             self.driver.quit()
			
 
				+            return False
			
 
				     
			
 
				     def save_article_as_md(self, content, filename="./article.md"):
			
 
				         # 打開或創建一個 .md 文件
			
@@ -261,3 +334,13 @@ if __name__ == "__main__":
 
				     if is_succesed:
			
 
				         print(is_succesed)
			
 
				 
			
 
				+    # import notebookllm
			
 
				+    # # 設置包含網址的 prompt
			
 
				+    # prompt = "請分析這個網址內容並轉成 md 格式文章： https://zh.wikipedia.org/zh-tw/%E8%99%9B%E6%93%AC%E4%BA%BA"
			
 
				+
			
 
				+    # # 發送 prompt 給模型
			
 
				+    # response = notebookllm.Notebook()
			
 
				+
			
 
				+    # # 顯示模型的回應
			
 
				+    # print(response)
			
 
				+