Browse Source

update ragas score

ling 4 months ago
parent
commit
789e61e0e5
1 changed files with 75 additions and 203 deletions
  1. 75 203
      faiss_index.py

+ 75 - 203
faiss_index.py

@@ -15,6 +15,7 @@ from langchain_core.output_parsers import StrOutputParser
 import pandas as pd
 from langchain_core.documents import Document
 from langchain.load import dumps, loads
+from langchain_community.chat_models import ChatOllama
 
 # Import from the parent directory
 import sys
@@ -160,24 +161,23 @@ def faiss_multiquery(question: str, retriever: FAISSRetriever, llm):
 
     return docs
 
-def faiss_query(question: str, retriever: FAISSRetriever, llm, multi_query: bool = False) -> str:
+def faiss_query(retriever, question: str, llm, multi_query: bool = False) -> str:
     if multi_query:
         docs = faiss_multiquery(question, retriever, llm)
         # print(docs)
     else:
         docs = retriever.get_relevant_documents(question, k=10)
         # print(docs)
-
-    context = "\n".join(doc.page_content for doc in docs)
+    context = docs
     
+    system_prompt: str = "你是一個來自台灣的AI助理,樂於以台灣人的立場幫助使用者,會用繁體中文回答問題。"
     template = """
     <|begin_of_text|>
     
     <|start_header_id|>system<|end_header_id|>
-    你是一個來自台灣的ESG的AI助理,
-    請用繁體中文回答問題 \n
+    你是一個來自台灣的ESG的AI助理,請用繁體中文回答問題 \n
     You should not mention anything about "根據提供的文件內容" or other similar terms.
-    Use five sentences maximum and keep the answer concise.
+    請盡可能的詳細回答問題。
     如果你不知道答案請回答:"很抱歉,目前我無法回答您的問題,請將您的詢問發送至 test@systex.com 以便獲得更進一步的幫助,謝謝。"
     勿回答無關資訊
     <|eot_id|>
@@ -188,6 +188,9 @@ def faiss_query(question: str, retriever: FAISSRetriever, llm, multi_query: bool
     {context}
 
     Question: {question}
+    用繁體中文回答問題,請用一段話詳細的回答。
+    如果你不知道答案請回答:"很抱歉,目前我無法回答您的問題,請將您的詢問發送至 test@systex.com 以便獲得更進一步的幫助,謝謝。"
+    
     <|eot_id|>
     
     <|start_header_id|>assistant<|end_header_id|>
@@ -196,20 +199,9 @@ def faiss_query(question: str, retriever: FAISSRetriever, llm, multi_query: bool
         system_prompt + "\n\n" +
         template
     )
-    
-    # prompt = ChatPromptTemplate.from_template(
-    #     system_prompt + "\n\n" +
-    #     "Answer the following question based on this context:\n\n"
-    #     "{context}\n\n"
-    #     "Question: {question}\n"
-    #     "Answer in the same language as the question. If you don't know the answer, "
-    #     "say 'I'm sorry, I don't have enough information to answer that question.'"
-    # )
 
-    
-    # chain = prompt | taide_llm | StrOutputParser()
-    chain = prompt | llm | StrOutputParser()
-    return chain.invoke({"context": context, "question": question})
+    rag_chain = prompt | llm | StrOutputParser()
+    return context, rag_chain.invoke({"context": context, "question": question})
 
 
 def create_faiss_retriever():
@@ -239,206 +231,86 @@ def create_faiss_retriever():
 
 
 async def run_evaluation():
-    faiss_index_path = "faiss_index.bin"
-    metadata_path = "faiss_metadata.pkl"
+    local_llm = "llama3-groq-tool-use:latest"
+    llama3 = ChatOllama(model=local_llm, temperature=0)
+    openai = ChatOpenAI(model_name="gpt-4o-mini", temperature=0)
 
-    index = load_faiss_index(faiss_index_path)
-    ids, metadatas, contents = load_metadata(metadata_path)
-
-    if index is None or ids is None:
-        print("FAISS index or metadata not found. Creating new index...")
-        print("Downloading embeddings from Supabase...")
-        embeddings_array, ids, metadatas, contents = download_embeddings()
-
-        print("Creating FAISS index...")
-        index = create_faiss_index(embeddings_array)
-
-        save_faiss_index(index, faiss_index_path)
-        save_metadata(ids, metadatas, contents, metadata_path)
-    else:
-        print("Using existing FAISS index and metadata.")
-
-    print("Creating FAISS retriever...")
-    faiss_retriever = FAISSRetriever(index, ids, metadatas, contents, embeddings)
-
-    print("Creating original vector store...")
-    original_vector_store = GetVectorStore(embeddings, supabase, document_table)
-    original_retriever = original_vector_store.as_retriever(search_kwargs={"k": 4})
+    retriever = create_faiss_retriever()
 
     questions, ground_truths = load_qa_pairs()
 
-    for question, ground_truth in zip(questions, ground_truths):
+    for question, ground_truth in zip(questions[:5], ground_truths[:5]):
         print(f"\nQuestion: {question}")
 
         start_time = time()
-        faiss_answer = faiss_query(question, faiss_retriever)
-        faiss_docs = faiss_retriever.get_relevant_documents(question)
-        faiss_time = time() - start_time
-        print(f"FAISS Answer: {faiss_answer}")
-        print(f"FAISS Time: {faiss_time:.4f} seconds")
-
-        start_time = time()
-        original_answer, original_docs = multi_query(question, original_retriever, chat_history=[])
-        original_time = time() - start_time
-        print(f"Original Answer: {original_answer}")
-        print(f"Original Time: {original_time:.4f} seconds")
-
-        # faiss_datasets = {
-        #     "question": [question],
-        #     "answer": [faiss_answer],
-        #     "contexts": [[doc.page_content for doc in faiss_docs]],
-        #     "ground_truth": [ground_truth]
-        # }
-        # faiss_evalsets = Dataset.from_dict(faiss_datasets)
-
-        # faiss_result = evaluate(
-        #     faiss_evalsets,
-        #     metrics=[
-        #         context_precision,
-        #         faithfulness,
-        #         answer_relevancy,
-        #         context_recall,
-        #     ],
-        # )
-
-        # print("FAISS RAGAS Evaluation:")
-        # print(faiss_result.to_pandas())
-
-        # original_datasets = {
-        #     "question": [question],
-        #     "answer": [original_answer],
-        #     "contexts": [[doc.page_content for doc in original_docs]],
-        #     "ground_truth": [ground_truth]
-        # }
-        # original_evalsets = Dataset.from_dict(original_datasets)
-
-        # original_result = evaluate(
-        #     original_evalsets,
-        #     metrics=[
-        #         context_precision,
-        #         faithfulness,
-        #         answer_relevancy,
-        #         context_recall,
-        #     ],
-        # )
-
-        # print("Original RAGAS Evaluation:")
-        # print(original_result.to_pandas())
-
-    print("\nPerformance comparison complete.")
-
-
-async def ask_question():
-    faiss_index_path = "faiss_index.bin"
-    metadata_path = "faiss_metadata.pkl"
+        llama3_docs, llama3_answer = faiss_query(retriever, question, llama3, multi_query=True)
+        llama3_time = time() - start_time
+        print(f"llama3 Answer: {llama3_answer}")
+        print(f"llama3 Time: {llama3_time:.4f} seconds")
 
-    index = load_faiss_index(faiss_index_path)
-    ids, metadatas, contents = load_metadata(metadata_path)
-
-    if index is None or ids is None:
-        print("FAISS index or metadata not found. Creating new index...")
-        print("Downloading embeddings from Supabase...")
-        embeddings_array, ids, metadatas, contents = download_embeddings()
-
-        print("Creating FAISS index...")
-        index = create_faiss_index(embeddings_array)
-
-        save_faiss_index(index, faiss_index_path)
-        save_metadata(ids, metadatas, contents, metadata_path)
-    else:
-        print("Using existing FAISS index and metadata.")
-
-    print("Creating FAISS retriever...")
-    faiss_retriever = FAISSRetriever(index, ids, metadatas, contents, embeddings)
-
-    # print("Creating original vector store...")
-    # original_vector_store = GetVectorStore(embeddings, supabase, document_table)
-    # original_retriever = original_vector_store.as_retriever(search_kwargs={"k": 4})
-
-    # questions, ground_truths = load_qa_pairs()
-
-    # for question, ground_truth in zip(questions, ground_truths):
-    question = ""
-    while question != "exit":
-        question = input("Question: ")
-        print(f"\nQuestion: {question}")
 
+        llama3_datasets = {
+            "question": [question],
+            "answer": [llama3_answer],
+            "contexts": [[doc.page_content for doc in llama3_docs]],
+            "ground_truth": [ground_truth]
+        }
+        llama3_evalsets = Dataset.from_dict(llama3_datasets)
+
+        llama3_result = evaluate(
+            llama3_evalsets,
+            metrics=[
+                context_precision,
+                faithfulness,
+                answer_relevancy,
+                context_recall,
+            ],
+        )
+
+        print("llama3 RAGAS Evaluation:")
+        llama3_result['time'] = llama3_time
+        df = llama3_result.to_pandas()
+        print(df)
+        
+        df.to_csv("llama.csv", mode='a')
+        #############################################################
         start_time = time()
-        faiss_answer = faiss_query(question, faiss_retriever)
-        faiss_docs = faiss_retriever.get_relevant_documents(question)
-        faiss_time = time() - start_time
-        print(f"FAISS Answer: {faiss_answer}")
-        print(f"FAISS Time: {faiss_time:.4f} seconds")
-
-        # start_time = time()
-        # original_answer, original_docs = multi_query(question, original_retriever, chat_history=[])
-        # original_time = time() - start_time
-        # print(f"Original Answer: {original_answer}")
-        # print(f"Original Time: {original_time:.4f} seconds")
+        openai_docs, openai_answer = faiss_query(retriever, question, openai, multi_query=True)
+        openai_time = time() - start_time
+        print(f"openai Answer: {openai_answer}")
+        print(f"openai Time: {openai_time:.4f} seconds")
 
-if __name__ == "__main__":
-
-    global_retriever = create_faiss_retriever()
 
-    questions, ground_truths = load_qa_pairs()
-    results = []
-
-    for question, ground_truth in zip(questions, ground_truths):
-        # For multi_query=True
-        start = time()
-        final_answer_multi = faiss_query(question, global_retriever, multi_query=True)
-        processing_time_multi = time() - start
-        # print(final_answer_multi)
-        # print(processing_time_multi)
-
-        # For multi_query=False
-        start = time()
-        final_answer_single = faiss_query(question, global_retriever, multi_query=False)
-        processing_time_single = time() - start
-        # print(final_answer_single)
-        # print(processing_time_single)
-
-        # Store results in a dictionary
-        result = {
-            "question": question,
-            "ground_truth": ground_truth,
-            "final_answer_multi_query": final_answer_multi,
-            "processing_time_multi_query": processing_time_multi,
-            "final_answer_single_query": final_answer_single,
-            "processing_time_single_query": processing_time_single
+        openai_datasets = {
+            "question": [question],
+            "answer": [openai_answer],
+            "contexts": [[doc.page_content for doc in openai_docs]],
+            "ground_truth": [ground_truth]
         }
-        print(result)
+        openai_evalsets = Dataset.from_dict(openai_datasets)
+
+        openai_result = evaluate(
+            openai_evalsets,
+            metrics=[
+                context_precision,
+                faithfulness,
+                answer_relevancy,
+                context_recall,
+            ],
+        )
+
+        print("openai RAGAS Evaluation:")
+        openai_result['time'] = llama3_time
+        df = openai_result.to_pandas()
+        print(df)
         
-        results.append(result)
+        df.to_csv("openai.csv", mode='a')
 
-        with open('qa_results.json', 'a', encoding='utf8') as outfile:
-            json.dump(result, outfile, indent=4, ensure_ascii=False)
-            outfile.write("\n")  # Ensure each result is on a new line
-        
 
-    # Save results to a JSON file
-    with open('qa_results_all.json', 'w', encoding='utf8') as outfile:
-        json.dump(results, outfile, indent=4, ensure_ascii=False)
+    print("\nPerformance comparison complete.")
 
-    print('All questions done!')
-    # question = ""
-    # while question != "exit":
-    #     # question = "國家溫室氣體長期減量目標" 
-    #     question = input("Question: ")
-    #     if question.strip().lower == "exit": break
 
-    #     start = time()
-    #     final_answer = faiss_query(question, global_retriever, multi_query=True)
-    #     print(final_answer)
-    #     processing_time = time() - start
-    #     print(processing_time)
+if __name__ == "__main__":
+    asyncio.run(run_evaluation())
 
-        
-    #     start = time() 
-    #     final_answer = faiss_query(question, global_retriever, multi_query=False)
-    #     print(final_answer)
-    #     processing_time = time() - start
-    #     print(processing_time)
-    # print("Chatbot closed!")
-
-    # asyncio.run(ask_question())
+