|
@@ -3,14 +3,27 @@ from selenium.webdriver.common.by import By
|
|
|
from selenium.webdriver.common.keys import Keys
|
|
|
from selenium.webdriver.support.ui import WebDriverWait
|
|
|
from selenium.webdriver.support import expected_conditions as EC
|
|
|
+from langchain_openai import ChatOpenAI
|
|
|
+from langchain_core.output_parsers import StrOutputParser
|
|
|
+from langchain_community.callbacks import get_openai_callback
|
|
|
+from langchain_core.prompts import ChatPromptTemplate
|
|
|
import time
|
|
|
import undetected_chromedriver as uc
|
|
|
+from dotenv import load_dotenv
|
|
|
+import os
|
|
|
+import logging
|
|
|
+
|
|
|
+# 加载环境变量
|
|
|
+load_dotenv()
|
|
|
+# 紀錄程式執行狀況
|
|
|
+logging.basicConfig(level=logging.INFO)
|
|
|
|
|
|
class ArticleGenerator:
|
|
|
def __init__(self, user_data_dir, profile_directory):
|
|
|
self.user_data_dir = user_data_dir
|
|
|
self.profile_directory = profile_directory
|
|
|
self.driver = None
|
|
|
+ self.OPEN_API_KEY: str = os.getenv('OPEN_API_KEY')
|
|
|
|
|
|
def get_webdriver(self):
|
|
|
for attempt in range(3): # 嘗試最多 3 次
|
|
@@ -37,6 +50,39 @@ class ArticleGenerator:
|
|
|
raise e
|
|
|
time.sleep(2) # 等待 2 秒後重試
|
|
|
|
|
|
+ # AI問答 - 取得 openai 回應
|
|
|
+ def ttm(self, article, language='繁體中文'):
|
|
|
+ with get_openai_callback() as cb:
|
|
|
+ model_name = "gpt-4o"
|
|
|
+ llm = ChatOpenAI(model_name=model_name, temperature=0.7, api_key=self.OPEN_API_KEY, max_tokens=4096)
|
|
|
+ qa_system_prompt = f"""你是一個專門做 md 格式文章的AI助理.
|
|
|
+可以從一大串文字整理出高質量的 md 格式文章. 多餘的內容拿掉,只需要回傳文章內容即可.
|
|
|
+輸出的語言為{language}."""
|
|
|
+
|
|
|
+ qa_prompt = ChatPromptTemplate.from_messages(
|
|
|
+ [
|
|
|
+ ("system", qa_system_prompt),
|
|
|
+ ("human", "{question}"),
|
|
|
+ ]
|
|
|
+ )
|
|
|
+
|
|
|
+ rag_chain = (
|
|
|
+ qa_prompt
|
|
|
+ | llm
|
|
|
+ | StrOutputParser()
|
|
|
+ )
|
|
|
+
|
|
|
+ text = rag_chain.invoke(
|
|
|
+ {"question": article+"請轉成 md 格式"}
|
|
|
+ )
|
|
|
+
|
|
|
+
|
|
|
+ print(f"Total Tokens: {cb.total_tokens}")
|
|
|
+ print(f"Prompt Tokens: {cb.prompt_tokens}")
|
|
|
+ print(f"Completion Tokens: {cb.completion_tokens}")
|
|
|
+ print(f"Total Cost (USD): ${cb.total_cost}")
|
|
|
+ return text
|
|
|
+
|
|
|
def generate_article(self, urls):
|
|
|
for attempt in range(3):
|
|
|
try:
|
|
@@ -162,66 +208,93 @@ class ArticleGenerator:
|
|
|
|
|
|
j = 1
|
|
|
fore_content = ''
|
|
|
- while '##' not in fore_content:
|
|
|
+ # while '##' not in fore_content:
|
|
|
+ # if j > 5:
|
|
|
+ # break
|
|
|
# 輸入生成文章的 prompt
|
|
|
- complete_prompt = "1. 彙整並生成一篇以常見問題為主的 MarkDown 格式的 專業文章(給客戶看的,最後一句「希望這篇文章能解答...」整句直接刪掉)。2. 文章必須為 MarkDown 格式,也就是必須包含#、##等等。3. 在各個分類或重要段落中加入一些具體的案例或數據。4. 必須使用段落格式,不要條列式。5. 若內容不是中文,要精準翻譯成中文。6. 生成 MarkDown 的 md 檔(須包含大標題及各個段落的標題)。"
|
|
|
- try:
|
|
|
- prompt_input.send_keys(complete_prompt)
|
|
|
- prompt_input.send_keys(Keys.RETURN)
|
|
|
- except Exception as e:
|
|
|
- print(f'Error: {e}')
|
|
|
- # 使用 JavaScript 強制發送鍵盤事件
|
|
|
- driver.execute_script("arguments[0].value = arguments[1];", prompt_input, complete_prompt) # 將文字輸入到 input
|
|
|
- driver.execute_script("arguments[0].dispatchEvent(new Event('input'));", prompt_input) # 觸發 input 事件
|
|
|
+ # complete_prompt = "1. 彙整並生成一篇以常見問題為主的 MarkDown 格式的 專業文章(給客戶看的,最後一句「希望這篇文章能解答...」整句直接刪掉)。2. 文章必須為 MarkDown 格式,也就是必須包含#、##等等。3. 在各個分類或重要段落中加入一些具體的案例或數據。4. 必須使用段落格式,不要條列式。5. 若內容不是中文,要精準翻譯成中文。6. 生成 MarkDown 的 md 檔(須包含大標題及各個段落的標題)。"
|
|
|
+ complete_prompt = "彙整並生成一篇以常見問題為主的 MarkDown 格式的 專業文章。在各個分類或重要段落中加入一些具體的案例或數據。5. 若內容不是中文,要精準翻譯成中文。"
|
|
|
+ try:
|
|
|
+ prompt_input.send_keys(complete_prompt)
|
|
|
+ prompt_input.send_keys(Keys.RETURN)
|
|
|
+ except Exception as e:
|
|
|
+ print(f'Error: {e}')
|
|
|
+ # 使用 JavaScript 強制發送鍵盤事件
|
|
|
+ driver.execute_script("arguments[0].value = arguments[1];", prompt_input, complete_prompt) # 將文字輸入到 input
|
|
|
+ driver.execute_script("arguments[0].dispatchEvent(new Event('input'));", prompt_input) # 觸發 input 事件
|
|
|
|
|
|
|
|
|
- print('等候答案')
|
|
|
- time.sleep(45) # 等待答案生成
|
|
|
+ print('等候答案')
|
|
|
+ time.sleep(40) # 等待答案生成
|
|
|
+ message = driver.find_element(By.TAG_NAME, 'chat-panel').find_elements(By.TAG_NAME, 'chat-message')[j]
|
|
|
+ while not message.is_displayed() or not message.is_enabled():
|
|
|
+ time.sleep(0.1)
|
|
|
message = driver.find_element(By.TAG_NAME, 'chat-panel').find_elements(By.TAG_NAME, 'chat-message')[j]
|
|
|
- answers = message.find_elements(By.TAG_NAME, 'labs-tailwind-structural-element-view-v2')
|
|
|
- while not answers[1].is_displayed() or not answers[1].is_enabled():
|
|
|
- time.sleep(0.1)
|
|
|
- answers = message.find_elements(By.TAG_NAME, 'labs-tailwind-structural-element-view-v2')
|
|
|
- fore_content = message.text
|
|
|
- j+=2
|
|
|
+ # answers = message.find_elements(By.TAG_NAME, 'labs-tailwind-structural-element-view-v2')
|
|
|
+ # while not answers[1].is_displayed() or not answers[1].is_enabled():
|
|
|
+ # time.sleep(0.1)
|
|
|
+ # answers = message.find_elements(By.TAG_NAME, 'labs-tailwind-structural-element-view-v2')
|
|
|
+ # fore_content = message.text
|
|
|
+ # j+=2
|
|
|
|
|
|
# 直接生成 md 檔 或是 生成文字?
|
|
|
- texts = []
|
|
|
- for i, answer in enumerate(answers[1:]):
|
|
|
- if '###' not in fore_content:
|
|
|
- if i == 0:
|
|
|
- texts.append('# ' + answer.text.strip())
|
|
|
- elif i % 2 == 1 and i != len(answers[1:])-1:
|
|
|
- texts.append('## ' + answer.text.strip())
|
|
|
- elif i == len(answers[1:])-1:
|
|
|
- continue
|
|
|
- else:
|
|
|
- texts.append(answer.text.strip())
|
|
|
- else:
|
|
|
- if '# # ' in answer.text:
|
|
|
- t = answer.text.replace('# # ', '# ').strip()
|
|
|
- texts.append(t)
|
|
|
- else:
|
|
|
- texts.append(answer.text.strip())
|
|
|
-
|
|
|
- print(texts)
|
|
|
- if len(texts) > 1:
|
|
|
- content = '\n'.join(texts)
|
|
|
- else:
|
|
|
- content = texts[0].replace('# # ', '# ')
|
|
|
- print(content)
|
|
|
- is_succesed = self.save_article_as_md(content)
|
|
|
+ # texts = []
|
|
|
+ # if '#' not in answers[0].text:
|
|
|
+ # for i, answer in enumerate(answers[1:]):
|
|
|
+ # if '##' not in fore_content:
|
|
|
+ # if i == 0:
|
|
|
+ # texts.append('# ' + answer.text.strip())
|
|
|
+ # elif i % 2 == 1 and i != len(answers[1:])-1:
|
|
|
+ # texts.append('## ' + answer.text.strip())
|
|
|
+ # elif i == len(answers[1:])-1:
|
|
|
+ # continue
|
|
|
+ # else:
|
|
|
+ # texts.append(answer.text.strip())
|
|
|
+ # else:
|
|
|
+ # if '# # ' in answer.text:
|
|
|
+ # t = answer.text.replace('# # ', '# ').strip()
|
|
|
+ # texts.append(t)
|
|
|
+ # else:
|
|
|
+ # texts.append(answer.text.strip())
|
|
|
+ # else:
|
|
|
+ # for i, answer in enumerate(answers):
|
|
|
+ # if '##' not in fore_content:
|
|
|
+ # if i == 0:
|
|
|
+ # texts.append('# ' + answer.text.strip())
|
|
|
+ # elif i % 2 == 1 and i != len(answers[1:])-1:
|
|
|
+ # texts.append('## ' + answer.text.strip())
|
|
|
+ # elif i == len(answers[1:])-1:
|
|
|
+ # continue
|
|
|
+ # else:
|
|
|
+ # texts.append(answer.text.strip())
|
|
|
+ # else:
|
|
|
+ # if '# # ' in answer.text:
|
|
|
+ # t = answer.text.replace('# # ', '# ').strip()
|
|
|
+ # texts.append(t)
|
|
|
+ # else:
|
|
|
+ # texts.append(answer.text.strip())
|
|
|
+
|
|
|
+ # print(texts)
|
|
|
+ # if len(texts) > 1:
|
|
|
+ # content = '\n'.join(texts)
|
|
|
+ # else:
|
|
|
+ # content = texts[0].replace('# # ', '# ')
|
|
|
+ # print(content)
|
|
|
+ text = self.ttm(message.text)
|
|
|
+ is_succesed = self.save_article_as_md(text)
|
|
|
if is_succesed:
|
|
|
return 'article.md 生成成功'
|
|
|
else:
|
|
|
return '存檔失敗'
|
|
|
except Exception as e:
|
|
|
print("出現錯誤: ", str(e))
|
|
|
- return False
|
|
|
+ self.driver.quit()
|
|
|
+ self.driver = None
|
|
|
|
|
|
if self.driver:
|
|
|
time.sleep(5)
|
|
|
self.driver.quit()
|
|
|
+ return False
|
|
|
|
|
|
def save_article_as_md(self, content, filename="./article.md"):
|
|
|
# 打開或創建一個 .md 文件
|
|
@@ -261,3 +334,13 @@ if __name__ == "__main__":
|
|
|
if is_succesed:
|
|
|
print(is_succesed)
|
|
|
|
|
|
+ # import notebookllm
|
|
|
+ # # 設置包含網址的 prompt
|
|
|
+ # prompt = "請分析這個網址內容並轉成 md 格式文章: https://zh.wikipedia.org/zh-tw/%E8%99%9B%E6%93%AC%E4%BA%BA"
|
|
|
+
|
|
|
+ # # 發送 prompt 給模型
|
|
|
+ # response = notebookllm.Notebook()
|
|
|
+
|
|
|
+ # # 顯示模型的回應
|
|
|
+ # print(response)
|
|
|
+
|