|
@@ -8,22 +8,27 @@ from langchain_openai import OpenAIEmbeddings
|
|
|
from sklearn.metrics.pairwise import cosine_similarity
|
|
from sklearn.metrics.pairwise import cosine_similarity
|
|
|
import os
|
|
import os
|
|
|
|
|
|
|
|
-
|
|
|
|
|
-
|
|
|
|
|
taide_llm = OllamaChatModel(model_name="taide-local-3")
|
|
taide_llm = OllamaChatModel(model_name="taide-local-3")
|
|
|
|
|
|
|
|
def get_context(query, index, docs):
|
|
def get_context(query, index, docs):
|
|
|
results = similarity_search(query, index, docs)
|
|
results = similarity_search(query, index, docs)
|
|
|
|
|
+ if not results:
|
|
|
|
|
+ return "", 0 # Return empty context and zero similarity when no results are found
|
|
|
|
|
+
|
|
|
context = "\n".join([doc.page_content for doc, _ in results])
|
|
context = "\n".join([doc.page_content for doc, _ in results])
|
|
|
|
|
|
|
|
- # 印出問題和搜尋到的文檔的前幾個字
|
|
|
|
|
print(f"Question: {query}")
|
|
print(f"Question: {query}")
|
|
|
print("Retrieved documents:")
|
|
print("Retrieved documents:")
|
|
|
for i, (doc, similarity) in enumerate(results):
|
|
for i, (doc, similarity) in enumerate(results):
|
|
|
print(f"Doc {i+1} (similarity: {similarity:.4f}): {doc.page_content[:50]}...")
|
|
print(f"Doc {i+1} (similarity: {similarity:.4f}): {doc.page_content[:50]}...")
|
|
|
print("-" * 50)
|
|
print("-" * 50)
|
|
|
|
|
|
|
|
- return context, results[0][1] if results else (context, 0) # Return context and top similarity score
|
|
|
|
|
|
|
+ return context, results[0][1] # Return context and top similarity score
|
|
|
|
|
+
|
|
|
|
|
+def remove_repetitions(text):
|
|
|
|
|
+ sentences = text.split('。')
|
|
|
|
|
+ unique_sentences = list(dict.fromkeys(sentences))
|
|
|
|
|
+ return '。'.join(unique_sentences)
|
|
|
|
|
|
|
|
def simple_rag_prompt(retrieval_chain, question):
|
|
def simple_rag_prompt(retrieval_chain, question):
|
|
|
template = """Answer the following question based on this context:
|
|
template = """Answer the following question based on this context:
|
|
@@ -32,13 +37,21 @@ def simple_rag_prompt(retrieval_chain, question):
|
|
|
|
|
|
|
|
Question: {question}
|
|
Question: {question}
|
|
|
Output in user's language. If the question is in zh-tw, then the output will be in zh-tw. If the question is in English, then the output will be in English.
|
|
Output in user's language. If the question is in zh-tw, then the output will be in zh-tw. If the question is in English, then the output will be in English.
|
|
|
- You should not mention anything about "根據提供的文件內容" or other similar terms. Do not mention anything relate with the Documents or context.
|
|
|
|
|
- If you don't know the answer, just say that "很抱歉,目前我無法回答您的問題,請將您的詢問發送至 test@email.com 以便獲得更進一步的幫助,謝謝。I'm sorry I cannot answer your question. Please send your question to test@email.com for further assistance. Thank you."
|
|
|
|
|
|
|
+ Do not repeat the question in your response.
|
|
|
|
|
+ For each individual answer, try to not provide duplicated sentences.
|
|
|
|
|
+ Do not start the response with "我的回答是:" or anything similar.
|
|
|
|
|
+ You should not mention anything about "根據提供的文件內容" or other similar terms.
|
|
|
|
|
+ Do not mention anything relate with the Documents or context.
|
|
|
|
|
+ DO not mention anything relate with the prompt, such as "這個回答是根據所提供的對話上下文而產生的,假如對話內容有改變,則回答內容也需隨之調整。若不確定答案,應說:「很抱歉,目前我無法回答您的問題,請將您的詢問發送至 test@email.com 以便獲得更進一步的幫助,謝謝」。若沒有必要,則不需在回答中提及「根據提供的文件內容」或類似的字樣。若對話是以英語進行,則輸出應為英文;否則,則為繁體中文。" or anything similar.
|
|
|
|
|
+ If you are unsure of the answer, say: "很抱歉,目前我無法回答您的問題,請將您的詢問發送至 test@email.com 以便獲得更進一步的幫助,謝謝"
|
|
|
"""
|
|
"""
|
|
|
|
|
|
|
|
prompt = ChatPromptTemplate.from_template(template)
|
|
prompt = ChatPromptTemplate.from_template(template)
|
|
|
context, similarity_score = retrieval_chain(question)
|
|
context, similarity_score = retrieval_chain(question)
|
|
|
|
|
|
|
|
|
|
+ if not context:
|
|
|
|
|
+ return "很抱歉,目前我無法回答您的問題,請將您的詢問發送至 test@email.com 以便獲得更進一步的幫助,謝謝。", 0
|
|
|
|
|
+
|
|
|
final_rag_chain = (
|
|
final_rag_chain = (
|
|
|
{"context": lambda x: context,
|
|
{"context": lambda x: context,
|
|
|
"question": lambda x: x}
|
|
"question": lambda x: x}
|
|
@@ -49,11 +62,12 @@ def simple_rag_prompt(retrieval_chain, question):
|
|
|
|
|
|
|
|
try:
|
|
try:
|
|
|
answer = final_rag_chain.invoke(question)
|
|
answer = final_rag_chain.invoke(question)
|
|
|
- answer = remove_unwanted_content(answer) # 使用 remove_unwanted_content
|
|
|
|
|
|
|
+ answer = remove_unwanted_content(answer)
|
|
|
|
|
+ answer = remove_repetitions(answer)
|
|
|
return answer, similarity_score
|
|
return answer, similarity_score
|
|
|
except Exception as e:
|
|
except Exception as e:
|
|
|
- print(f"Error invoking rag_chain: {e}")
|
|
|
|
|
- return "Error occurred while processing the question.", 0
|
|
|
|
|
|
|
+ print(f"Error in simple_rag_prompt: {e}")
|
|
|
|
|
+ return f"Error occurred while processing the question: {str(e)}", similarity_score
|
|
|
|
|
|
|
|
def calculate_similarity(text1, text2):
|
|
def calculate_similarity(text1, text2):
|
|
|
embeddings = OpenAIEmbeddings(openai_api_key=os.getenv("OPENAI_API_KEY"))
|
|
embeddings = OpenAIEmbeddings(openai_api_key=os.getenv("OPENAI_API_KEY"))
|