|
@@ -15,6 +15,7 @@ from langchain_core.output_parsers import StrOutputParser
|
|
|
import pandas as pd
|
|
|
from langchain_core.documents import Document
|
|
|
from langchain.load import dumps, loads
|
|
|
+from langchain_community.chat_models import ChatOllama
|
|
|
|
|
|
# Import from the parent directory
|
|
|
import sys
|
|
@@ -160,24 +161,23 @@ def faiss_multiquery(question: str, retriever: FAISSRetriever, llm):
|
|
|
|
|
|
return docs
|
|
|
|
|
|
-def faiss_query(question: str, retriever: FAISSRetriever, llm, multi_query: bool = False) -> str:
|
|
|
+def faiss_query(retriever, question: str, llm, multi_query: bool = False) -> str:
|
|
|
if multi_query:
|
|
|
docs = faiss_multiquery(question, retriever, llm)
|
|
|
# print(docs)
|
|
|
else:
|
|
|
docs = retriever.get_relevant_documents(question, k=10)
|
|
|
# print(docs)
|
|
|
-
|
|
|
- context = "\n".join(doc.page_content for doc in docs)
|
|
|
+ context = docs
|
|
|
|
|
|
+ system_prompt: str = "你是一個來自台灣的AI助理,樂於以台灣人的立場幫助使用者,會用繁體中文回答問題。"
|
|
|
template = """
|
|
|
<|begin_of_text|>
|
|
|
|
|
|
<|start_header_id|>system<|end_header_id|>
|
|
|
- 你是一個來自台灣的ESG的AI助理,
|
|
|
- 請用繁體中文回答問題 \n
|
|
|
+ 你是一個來自台灣的ESG的AI助理,請用繁體中文回答問題 \n
|
|
|
You should not mention anything about "根據提供的文件內容" or other similar terms.
|
|
|
- Use five sentences maximum and keep the answer concise.
|
|
|
+ 請盡可能的詳細回答問題。
|
|
|
如果你不知道答案請回答:"很抱歉,目前我無法回答您的問題,請將您的詢問發送至 test@systex.com 以便獲得更進一步的幫助,謝謝。"
|
|
|
勿回答無關資訊
|
|
|
<|eot_id|>
|
|
@@ -188,6 +188,9 @@ def faiss_query(question: str, retriever: FAISSRetriever, llm, multi_query: bool
|
|
|
{context}
|
|
|
|
|
|
Question: {question}
|
|
|
+ 用繁體中文回答問題,請用一段話詳細的回答。
|
|
|
+ 如果你不知道答案請回答:"很抱歉,目前我無法回答您的問題,請將您的詢問發送至 test@systex.com 以便獲得更進一步的幫助,謝謝。"
|
|
|
+
|
|
|
<|eot_id|>
|
|
|
|
|
|
<|start_header_id|>assistant<|end_header_id|>
|
|
@@ -196,20 +199,9 @@ def faiss_query(question: str, retriever: FAISSRetriever, llm, multi_query: bool
|
|
|
system_prompt + "\n\n" +
|
|
|
template
|
|
|
)
|
|
|
-
|
|
|
- # prompt = ChatPromptTemplate.from_template(
|
|
|
- # system_prompt + "\n\n" +
|
|
|
- # "Answer the following question based on this context:\n\n"
|
|
|
- # "{context}\n\n"
|
|
|
- # "Question: {question}\n"
|
|
|
- # "Answer in the same language as the question. If you don't know the answer, "
|
|
|
- # "say 'I'm sorry, I don't have enough information to answer that question.'"
|
|
|
- # )
|
|
|
|
|
|
-
|
|
|
- # chain = prompt | taide_llm | StrOutputParser()
|
|
|
- chain = prompt | llm | StrOutputParser()
|
|
|
- return chain.invoke({"context": context, "question": question})
|
|
|
+ rag_chain = prompt | llm | StrOutputParser()
|
|
|
+ return context, rag_chain.invoke({"context": context, "question": question})
|
|
|
|
|
|
|
|
|
def create_faiss_retriever():
|
|
@@ -239,206 +231,86 @@ def create_faiss_retriever():
|
|
|
|
|
|
|
|
|
async def run_evaluation():
|
|
|
- faiss_index_path = "faiss_index.bin"
|
|
|
- metadata_path = "faiss_metadata.pkl"
|
|
|
+ local_llm = "llama3-groq-tool-use:latest"
|
|
|
+ llama3 = ChatOllama(model=local_llm, temperature=0)
|
|
|
+ openai = ChatOpenAI(model_name="gpt-4o-mini", temperature=0)
|
|
|
|
|
|
- index = load_faiss_index(faiss_index_path)
|
|
|
- ids, metadatas, contents = load_metadata(metadata_path)
|
|
|
-
|
|
|
- if index is None or ids is None:
|
|
|
- print("FAISS index or metadata not found. Creating new index...")
|
|
|
- print("Downloading embeddings from Supabase...")
|
|
|
- embeddings_array, ids, metadatas, contents = download_embeddings()
|
|
|
-
|
|
|
- print("Creating FAISS index...")
|
|
|
- index = create_faiss_index(embeddings_array)
|
|
|
-
|
|
|
- save_faiss_index(index, faiss_index_path)
|
|
|
- save_metadata(ids, metadatas, contents, metadata_path)
|
|
|
- else:
|
|
|
- print("Using existing FAISS index and metadata.")
|
|
|
-
|
|
|
- print("Creating FAISS retriever...")
|
|
|
- faiss_retriever = FAISSRetriever(index, ids, metadatas, contents, embeddings)
|
|
|
-
|
|
|
- print("Creating original vector store...")
|
|
|
- original_vector_store = GetVectorStore(embeddings, supabase, document_table)
|
|
|
- original_retriever = original_vector_store.as_retriever(search_kwargs={"k": 4})
|
|
|
+ retriever = create_faiss_retriever()
|
|
|
|
|
|
questions, ground_truths = load_qa_pairs()
|
|
|
|
|
|
- for question, ground_truth in zip(questions, ground_truths):
|
|
|
+ for question, ground_truth in zip(questions[:5], ground_truths[:5]):
|
|
|
print(f"\nQuestion: {question}")
|
|
|
|
|
|
start_time = time()
|
|
|
- faiss_answer = faiss_query(question, faiss_retriever)
|
|
|
- faiss_docs = faiss_retriever.get_relevant_documents(question)
|
|
|
- faiss_time = time() - start_time
|
|
|
- print(f"FAISS Answer: {faiss_answer}")
|
|
|
- print(f"FAISS Time: {faiss_time:.4f} seconds")
|
|
|
-
|
|
|
- start_time = time()
|
|
|
- original_answer, original_docs = multi_query(question, original_retriever, chat_history=[])
|
|
|
- original_time = time() - start_time
|
|
|
- print(f"Original Answer: {original_answer}")
|
|
|
- print(f"Original Time: {original_time:.4f} seconds")
|
|
|
-
|
|
|
- # faiss_datasets = {
|
|
|
- # "question": [question],
|
|
|
- # "answer": [faiss_answer],
|
|
|
- # "contexts": [[doc.page_content for doc in faiss_docs]],
|
|
|
- # "ground_truth": [ground_truth]
|
|
|
- # }
|
|
|
- # faiss_evalsets = Dataset.from_dict(faiss_datasets)
|
|
|
-
|
|
|
- # faiss_result = evaluate(
|
|
|
- # faiss_evalsets,
|
|
|
- # metrics=[
|
|
|
- # context_precision,
|
|
|
- # faithfulness,
|
|
|
- # answer_relevancy,
|
|
|
- # context_recall,
|
|
|
- # ],
|
|
|
- # )
|
|
|
-
|
|
|
- # print("FAISS RAGAS Evaluation:")
|
|
|
- # print(faiss_result.to_pandas())
|
|
|
-
|
|
|
- # original_datasets = {
|
|
|
- # "question": [question],
|
|
|
- # "answer": [original_answer],
|
|
|
- # "contexts": [[doc.page_content for doc in original_docs]],
|
|
|
- # "ground_truth": [ground_truth]
|
|
|
- # }
|
|
|
- # original_evalsets = Dataset.from_dict(original_datasets)
|
|
|
-
|
|
|
- # original_result = evaluate(
|
|
|
- # original_evalsets,
|
|
|
- # metrics=[
|
|
|
- # context_precision,
|
|
|
- # faithfulness,
|
|
|
- # answer_relevancy,
|
|
|
- # context_recall,
|
|
|
- # ],
|
|
|
- # )
|
|
|
-
|
|
|
- # print("Original RAGAS Evaluation:")
|
|
|
- # print(original_result.to_pandas())
|
|
|
-
|
|
|
- print("\nPerformance comparison complete.")
|
|
|
-
|
|
|
-
|
|
|
-async def ask_question():
|
|
|
- faiss_index_path = "faiss_index.bin"
|
|
|
- metadata_path = "faiss_metadata.pkl"
|
|
|
+ llama3_docs, llama3_answer = faiss_query(retriever, question, llama3, multi_query=True)
|
|
|
+ llama3_time = time() - start_time
|
|
|
+ print(f"llama3 Answer: {llama3_answer}")
|
|
|
+ print(f"llama3 Time: {llama3_time:.4f} seconds")
|
|
|
|
|
|
- index = load_faiss_index(faiss_index_path)
|
|
|
- ids, metadatas, contents = load_metadata(metadata_path)
|
|
|
-
|
|
|
- if index is None or ids is None:
|
|
|
- print("FAISS index or metadata not found. Creating new index...")
|
|
|
- print("Downloading embeddings from Supabase...")
|
|
|
- embeddings_array, ids, metadatas, contents = download_embeddings()
|
|
|
-
|
|
|
- print("Creating FAISS index...")
|
|
|
- index = create_faiss_index(embeddings_array)
|
|
|
-
|
|
|
- save_faiss_index(index, faiss_index_path)
|
|
|
- save_metadata(ids, metadatas, contents, metadata_path)
|
|
|
- else:
|
|
|
- print("Using existing FAISS index and metadata.")
|
|
|
-
|
|
|
- print("Creating FAISS retriever...")
|
|
|
- faiss_retriever = FAISSRetriever(index, ids, metadatas, contents, embeddings)
|
|
|
-
|
|
|
- # print("Creating original vector store...")
|
|
|
- # original_vector_store = GetVectorStore(embeddings, supabase, document_table)
|
|
|
- # original_retriever = original_vector_store.as_retriever(search_kwargs={"k": 4})
|
|
|
-
|
|
|
- # questions, ground_truths = load_qa_pairs()
|
|
|
-
|
|
|
- # for question, ground_truth in zip(questions, ground_truths):
|
|
|
- question = ""
|
|
|
- while question != "exit":
|
|
|
- question = input("Question: ")
|
|
|
- print(f"\nQuestion: {question}")
|
|
|
|
|
|
+ llama3_datasets = {
|
|
|
+ "question": [question],
|
|
|
+ "answer": [llama3_answer],
|
|
|
+ "contexts": [[doc.page_content for doc in llama3_docs]],
|
|
|
+ "ground_truth": [ground_truth]
|
|
|
+ }
|
|
|
+ llama3_evalsets = Dataset.from_dict(llama3_datasets)
|
|
|
+
|
|
|
+ llama3_result = evaluate(
|
|
|
+ llama3_evalsets,
|
|
|
+ metrics=[
|
|
|
+ context_precision,
|
|
|
+ faithfulness,
|
|
|
+ answer_relevancy,
|
|
|
+ context_recall,
|
|
|
+ ],
|
|
|
+ )
|
|
|
+
|
|
|
+ print("llama3 RAGAS Evaluation:")
|
|
|
+ llama3_result['time'] = llama3_time
|
|
|
+ df = llama3_result.to_pandas()
|
|
|
+ print(df)
|
|
|
+
|
|
|
+ df.to_csv("llama.csv", mode='a')
|
|
|
+ #############################################################
|
|
|
start_time = time()
|
|
|
- faiss_answer = faiss_query(question, faiss_retriever)
|
|
|
- faiss_docs = faiss_retriever.get_relevant_documents(question)
|
|
|
- faiss_time = time() - start_time
|
|
|
- print(f"FAISS Answer: {faiss_answer}")
|
|
|
- print(f"FAISS Time: {faiss_time:.4f} seconds")
|
|
|
-
|
|
|
- # start_time = time()
|
|
|
- # original_answer, original_docs = multi_query(question, original_retriever, chat_history=[])
|
|
|
- # original_time = time() - start_time
|
|
|
- # print(f"Original Answer: {original_answer}")
|
|
|
- # print(f"Original Time: {original_time:.4f} seconds")
|
|
|
+ openai_docs, openai_answer = faiss_query(retriever, question, openai, multi_query=True)
|
|
|
+ openai_time = time() - start_time
|
|
|
+ print(f"openai Answer: {openai_answer}")
|
|
|
+ print(f"openai Time: {openai_time:.4f} seconds")
|
|
|
|
|
|
-if __name__ == "__main__":
|
|
|
-
|
|
|
- global_retriever = create_faiss_retriever()
|
|
|
|
|
|
- questions, ground_truths = load_qa_pairs()
|
|
|
- results = []
|
|
|
-
|
|
|
- for question, ground_truth in zip(questions, ground_truths):
|
|
|
- # For multi_query=True
|
|
|
- start = time()
|
|
|
- final_answer_multi = faiss_query(question, global_retriever, multi_query=True)
|
|
|
- processing_time_multi = time() - start
|
|
|
- # print(final_answer_multi)
|
|
|
- # print(processing_time_multi)
|
|
|
-
|
|
|
- # For multi_query=False
|
|
|
- start = time()
|
|
|
- final_answer_single = faiss_query(question, global_retriever, multi_query=False)
|
|
|
- processing_time_single = time() - start
|
|
|
- # print(final_answer_single)
|
|
|
- # print(processing_time_single)
|
|
|
-
|
|
|
- # Store results in a dictionary
|
|
|
- result = {
|
|
|
- "question": question,
|
|
|
- "ground_truth": ground_truth,
|
|
|
- "final_answer_multi_query": final_answer_multi,
|
|
|
- "processing_time_multi_query": processing_time_multi,
|
|
|
- "final_answer_single_query": final_answer_single,
|
|
|
- "processing_time_single_query": processing_time_single
|
|
|
+ openai_datasets = {
|
|
|
+ "question": [question],
|
|
|
+ "answer": [openai_answer],
|
|
|
+ "contexts": [[doc.page_content for doc in openai_docs]],
|
|
|
+ "ground_truth": [ground_truth]
|
|
|
}
|
|
|
- print(result)
|
|
|
+ openai_evalsets = Dataset.from_dict(openai_datasets)
|
|
|
+
|
|
|
+ openai_result = evaluate(
|
|
|
+ openai_evalsets,
|
|
|
+ metrics=[
|
|
|
+ context_precision,
|
|
|
+ faithfulness,
|
|
|
+ answer_relevancy,
|
|
|
+ context_recall,
|
|
|
+ ],
|
|
|
+ )
|
|
|
+
|
|
|
+ print("openai RAGAS Evaluation:")
|
|
|
+ openai_result['time'] = llama3_time
|
|
|
+ df = openai_result.to_pandas()
|
|
|
+ print(df)
|
|
|
|
|
|
- results.append(result)
|
|
|
+ df.to_csv("openai.csv", mode='a')
|
|
|
|
|
|
- with open('qa_results.json', 'a', encoding='utf8') as outfile:
|
|
|
- json.dump(result, outfile, indent=4, ensure_ascii=False)
|
|
|
- outfile.write("\n") # Ensure each result is on a new line
|
|
|
-
|
|
|
|
|
|
- # Save results to a JSON file
|
|
|
- with open('qa_results_all.json', 'w', encoding='utf8') as outfile:
|
|
|
- json.dump(results, outfile, indent=4, ensure_ascii=False)
|
|
|
+ print("\nPerformance comparison complete.")
|
|
|
|
|
|
- print('All questions done!')
|
|
|
- # question = ""
|
|
|
- # while question != "exit":
|
|
|
- # # question = "國家溫室氣體長期減量目標"
|
|
|
- # question = input("Question: ")
|
|
|
- # if question.strip().lower == "exit": break
|
|
|
|
|
|
- # start = time()
|
|
|
- # final_answer = faiss_query(question, global_retriever, multi_query=True)
|
|
|
- # print(final_answer)
|
|
|
- # processing_time = time() - start
|
|
|
- # print(processing_time)
|
|
|
+if __name__ == "__main__":
|
|
|
+ asyncio.run(run_evaluation())
|
|
|
|
|
|
-
|
|
|
- # start = time()
|
|
|
- # final_answer = faiss_query(question, global_retriever, multi_query=False)
|
|
|
- # print(final_answer)
|
|
|
- # processing_time = time() - start
|
|
|
- # print(processing_time)
|
|
|
- # print("Chatbot closed!")
|
|
|
-
|
|
|
- # asyncio.run(ask_question())
|
|
|
+
|