SHA1
--- a/ai_agent.py
+++ b/ai_agent.py
@@ -240,6 +240,8 @@ def Router():
 
															         You are an expert at routing a user question to a 專業知識 or 自有數據. 
														
 
															         Use company private data for questions about the informations about a company's greenhouse gas emissions data.
														
 
															         Otherwise, use the 專業知識 for questions on ESG field knowledge or news about ESG. 
														
 
															+        你需要分辨使用者問題是否在詢問公司的自有數據，例如想了解公司的碳排放源數據等等，如果判斷為是，則使用"自有數據"，
														
 
															+        若使用者的問題是想了解碳盤查或碳管理等等的 ESG 知識和相關新聞，請使用"專業知識"。
														
 
															         You do not need to be stringent with the keywords in the question related to these topics. 
														
 
															         Give a binary choice '自有數據' or '專業知識' based on the question. 
														
 
															         Return the a JSON with a single key 'datasource' and no premable or explanation. 
														
--- a/faiss_index.py
+++ b/faiss_index.py
@@ -15,6 +15,7 @@ from langchain_core.output_parsers import StrOutputParser
 
															 import pandas as pd
														
 
															 from langchain_core.documents import Document
														
 
															 from langchain.load import dumps, loads
														
 
															+from langchain_community.chat_models import ChatOllama
														
 
															 # Import from the parent directory
														
 
															 import sys
														
@@ -160,24 +161,23 @@ def faiss_multiquery(question: str, retriever: FAISSRetriever, llm):
 
															     return docs
														
 
															-def faiss_query(question: str, retriever: FAISSRetriever, llm, multi_query: bool = False) -> str:
														
 
															+def faiss_query(retriever, question: str, llm, multi_query: bool = False) -> str:
														
 
															     if multi_query:
														
 
															         docs = faiss_multiquery(question, retriever, llm)
														
 
															         # print(docs)
														
 
															     else:
														
 
															         docs = retriever.get_relevant_documents(question, k=10)
														
 
															         # print(docs)
														
 
															-
														
 
															-    context = "\n".join(doc.page_content for doc in docs)
														
 
															+    context = docs
														
 
															+    system_prompt: str = "你是一個來自台灣的AI助理，樂於以台灣人的立場幫助使用者，會用繁體中文回答問題。"
														
 
															     template = """
														
 
															     <|begin_of_text|>
														
 
															     <|start_header_id|>system<|end_header_id|>
														
 
															-    你是一個來自台灣的ESG的AI助理，
														
 
															-    請用繁體中文回答問題 \n
														
 
															+    你是一個來自台灣的ESG的AI助理，請用繁體中文回答問題 \n
														
 
															     You should not mention anything about "根據提供的文件內容" or other similar terms.
														
 
															-    Use five sentences maximum and keep the answer concise.
														
 
															+    請盡可能的詳細回答問題。
														
 
															     如果你不知道答案請回答："很抱歉，目前我無法回答您的問題，請將您的詢問發送至 test@systex.com 以便獲得更進一步的幫助，謝謝。"
														
 
															     勿回答無關資訊
														
 
															     <|eot_id|>
														
@@ -188,6 +188,9 @@ def faiss_query(question: str, retriever: FAISSRetriever, llm, multi_query: bool
 
															     {context}
														
 
															     Question: {question}
														
 
															+    用繁體中文回答問題，請用一段話詳細的回答。
														
 
															+    如果你不知道答案請回答："很抱歉，目前我無法回答您的問題，請將您的詢問發送至 test@systex.com 以便獲得更進一步的幫助，謝謝。"
														
 
															+    
														
 
															     <|eot_id|>
														
 
															     <|start_header_id|>assistant<|end_header_id|>
														
@@ -196,20 +199,9 @@ def faiss_query(question: str, retriever: FAISSRetriever, llm, multi_query: bool
 
															         system_prompt + "\n\n" +
														
 
															         template
														
 
															     )
														
 
															-    
														
 
															-    # prompt = ChatPromptTemplate.from_template(
														
 
															-    #     system_prompt + "\n\n" +
														
 
															-    #     "Answer the following question based on this context:\n\n"
														
 
															-    #     "{context}\n\n"
														
 
															-    #     "Question: {question}\n"
														
 
															-    #     "Answer in the same language as the question. If you don't know the answer, "
														
 
															-    #     "say 'I'm sorry, I don't have enough information to answer that question.'"
														
 
															-    # )
														
 
															-    
														
 
															-    # chain = prompt | taide_llm | StrOutputParser()
														
 
															-    chain = prompt | llm | StrOutputParser()
														
 
															-    return chain.invoke({"context": context, "question": question})
														
 
															+    rag_chain = prompt | llm | StrOutputParser()
														
 
															+    return context, rag_chain.invoke({"context": context, "question": question})
														
 
															 def create_faiss_retriever():
														
@@ -239,206 +231,86 @@ def create_faiss_retriever():
 
															 async def run_evaluation():
														
 
															-    faiss_index_path = "faiss_index.bin"
														
 
															-    metadata_path = "faiss_metadata.pkl"
														
 
															+    local_llm = "llama3-groq-tool-use:latest"
														
 
															+    llama3 = ChatOllama(model=local_llm, temperature=0)
														
 
															+    openai = ChatOpenAI(model_name="gpt-4o-mini", temperature=0)
														
 
															-    index = load_faiss_index(faiss_index_path)
														
 
															-    ids, metadatas, contents = load_metadata(metadata_path)
														
 
															-
														
 
															-    if index is None or ids is None:
														
 
															-        print("FAISS index or metadata not found. Creating new index...")
														
 
															-        print("Downloading embeddings from Supabase...")
														
 
															-        embeddings_array, ids, metadatas, contents = download_embeddings()
														
 
															-
														
 
															-        print("Creating FAISS index...")
														
 
															-        index = create_faiss_index(embeddings_array)
														
 
															-
														
 
															-        save_faiss_index(index, faiss_index_path)
														
 
															-        save_metadata(ids, metadatas, contents, metadata_path)
														
 
															-    else:
														
 
															-        print("Using existing FAISS index and metadata.")
														
 
															-
														
 
															-    print("Creating FAISS retriever...")
														
 
															-    faiss_retriever = FAISSRetriever(index, ids, metadatas, contents, embeddings)
														
 
															-
														
 
															-    print("Creating original vector store...")
														
 
															-    original_vector_store = GetVectorStore(embeddings, supabase, document_table)
														
 
															-    original_retriever = original_vector_store.as_retriever(search_kwargs={"k": 4})
														
 
															+    retriever = create_faiss_retriever()
														
 
															     questions, ground_truths = load_qa_pairs()
														
 
															-    for question, ground_truth in zip(questions, ground_truths):
														
 
															+    for question, ground_truth in zip(questions[:5], ground_truths[:5]):
														
 
															         print(f"\nQuestion: {question}")
														
 
															         start_time = time()
														
 
															-        faiss_answer = faiss_query(question, faiss_retriever)
														
 
															-        faiss_docs = faiss_retriever.get_relevant_documents(question)
														
 
															-        faiss_time = time() - start_time
														
 
															-        print(f"FAISS Answer: {faiss_answer}")
														
 
															-        print(f"FAISS Time: {faiss_time:.4f} seconds")
														
 
															-
														
 
															-        start_time = time()
														
 
															-        original_answer, original_docs = multi_query(question, original_retriever, chat_history=[])
														
 
															-        original_time = time() - start_time
														
 
															-        print(f"Original Answer: {original_answer}")
														
 
															-        print(f"Original Time: {original_time:.4f} seconds")
														
 
															-
														
 
															-        # faiss_datasets = {
														
 
															-        #     "question": [question],
														
 
															-        #     "answer": [faiss_answer],
														
 
															-        #     "contexts": [[doc.page_content for doc in faiss_docs]],
														
 
															-        #     "ground_truth": [ground_truth]
														
 
															-        # }
														
 
															-        # faiss_evalsets = Dataset.from_dict(faiss_datasets)
														
 
															-
														
 
															-        # faiss_result = evaluate(
														
 
															-        #     faiss_evalsets,
														
 
															-        #     metrics=[
														
 
															-        #         context_precision,
														
 
															-        #         faithfulness,
														
 
															-        #         answer_relevancy,
														
 
															-        #         context_recall,
														
 
															-        #     ],
														
 
															-        # )
														
 
															-
														
 
															-        # print("FAISS RAGAS Evaluation:")
														
 
															-        # print(faiss_result.to_pandas())
														
 
															-
														
 
															-        # original_datasets = {
														
 
															-        #     "question": [question],
														
 
															-        #     "answer": [original_answer],
														
 
															-        #     "contexts": [[doc.page_content for doc in original_docs]],
														
 
															-        #     "ground_truth": [ground_truth]
														
 
															-        # }
														
 
															-        # original_evalsets = Dataset.from_dict(original_datasets)
														
 
															-
														
 
															-        # original_result = evaluate(
														
 
															-        #     original_evalsets,
														
 
															-        #     metrics=[
														
 
															-        #         context_precision,
														
 
															-        #         faithfulness,
														
 
															-        #         answer_relevancy,
														
 
															-        #         context_recall,
														
 
															-        #     ],
														
 
															-        # )
														
 
															-
														
 
															-        # print("Original RAGAS Evaluation:")
														
 
															-        # print(original_result.to_pandas())
														
 
															-
														
 
															-    print("\nPerformance comparison complete.")
														
 
															-
														
 
															-
														
 
															-async def ask_question():
														
 
															-    faiss_index_path = "faiss_index.bin"
														
 
															-    metadata_path = "faiss_metadata.pkl"
														
 
															+        llama3_docs, llama3_answer = faiss_query(retriever, question, llama3, multi_query=True)
														
 
															+        llama3_time = time() - start_time
														
 
															+        print(f"llama3 Answer: {llama3_answer}")
														
 
															+        print(f"llama3 Time: {llama3_time:.4f} seconds")
														
 
															-    index = load_faiss_index(faiss_index_path)
														
 
															-    ids, metadatas, contents = load_metadata(metadata_path)
														
 
															-
														
 
															-    if index is None or ids is None:
														
 
															-        print("FAISS index or metadata not found. Creating new index...")
														
 
															-        print("Downloading embeddings from Supabase...")
														
 
															-        embeddings_array, ids, metadatas, contents = download_embeddings()
														
 
															-
														
 
															-        print("Creating FAISS index...")
														
 
															-        index = create_faiss_index(embeddings_array)
														
 
															-
														
 
															-        save_faiss_index(index, faiss_index_path)
														
 
															-        save_metadata(ids, metadatas, contents, metadata_path)
														
 
															-    else:
														
 
															-        print("Using existing FAISS index and metadata.")
														
 
															-
														
 
															-    print("Creating FAISS retriever...")
														
 
															-    faiss_retriever = FAISSRetriever(index, ids, metadatas, contents, embeddings)
														
 
															-
														
 
															-    # print("Creating original vector store...")
														
 
															-    # original_vector_store = GetVectorStore(embeddings, supabase, document_table)
														
 
															-    # original_retriever = original_vector_store.as_retriever(search_kwargs={"k": 4})
														
 
															-
														
 
															-    # questions, ground_truths = load_qa_pairs()
														
 
															-
														
 
															-    # for question, ground_truth in zip(questions, ground_truths):
														
 
															-    question = ""
														
 
															-    while question != "exit":
														
 
															-        question = input("Question: ")
														
 
															-        print(f"\nQuestion: {question}")
														
 
															+        llama3_datasets = {
														
 
															+            "question": [question],
														
 
															+            "answer": [llama3_answer],
														
 
															+            "contexts": [[doc.page_content for doc in llama3_docs]],
														
 
															+            "ground_truth": [ground_truth]
														
 
															+        }
														
 
															+        llama3_evalsets = Dataset.from_dict(llama3_datasets)
														
 
															+
														
 
															+        llama3_result = evaluate(
														
 
															+            llama3_evalsets,
														
 
															+            metrics=[
														
 
															+                context_precision,
														
 
															+                faithfulness,
														
 
															+                answer_relevancy,
														
 
															+                context_recall,
														
 
															+            ],
														
 
															+        )
														
 
															+
														
 
															+        print("llama3 RAGAS Evaluation:")
														
 
															+        llama3_result['time'] = llama3_time
														
 
															+        df = llama3_result.to_pandas()
														
 
															+        print(df)
														
 
															+        
														
 
															+        df.to_csv("llama.csv", mode='a')
														
 
															+        #############################################################
														
 
															         start_time = time()
														
 
															-        faiss_answer = faiss_query(question, faiss_retriever)
														
 
															-        faiss_docs = faiss_retriever.get_relevant_documents(question)
														
 
															-        faiss_time = time() - start_time
														
 
															-        print(f"FAISS Answer: {faiss_answer}")
														
 
															-        print(f"FAISS Time: {faiss_time:.4f} seconds")
														
 
															-
														
 
															-        # start_time = time()
														
 
															-        # original_answer, original_docs = multi_query(question, original_retriever, chat_history=[])
														
 
															-        # original_time = time() - start_time
														
 
															-        # print(f"Original Answer: {original_answer}")
														
 
															-        # print(f"Original Time: {original_time:.4f} seconds")
														
 
															+        openai_docs, openai_answer = faiss_query(retriever, question, openai, multi_query=True)
														
 
															+        openai_time = time() - start_time
														
 
															+        print(f"openai Answer: {openai_answer}")
														
 
															+        print(f"openai Time: {openai_time:.4f} seconds")
														
 
															-if __name__ == "__main__":
														
 
															-
														
 
															-    global_retriever = create_faiss_retriever()
														
 
															-    questions, ground_truths = load_qa_pairs()
														
 
															-    results = []
														
 
															-
														
 
															-    for question, ground_truth in zip(questions, ground_truths):
														
 
															-        # For multi_query=True
														
 
															-        start = time()
														
 
															-        final_answer_multi = faiss_query(question, global_retriever, multi_query=True)
														
 
															-        processing_time_multi = time() - start
														
 
															-        # print(final_answer_multi)
														
 
															-        # print(processing_time_multi)
														
 
															-
														
 
															-        # For multi_query=False
														
 
															-        start = time()
														
 
															-        final_answer_single = faiss_query(question, global_retriever, multi_query=False)
														
 
															-        processing_time_single = time() - start
														
 
															-        # print(final_answer_single)
														
 
															-        # print(processing_time_single)
														
 
															-
														
 
															-        # Store results in a dictionary
														
 
															-        result = {
														
 
															-            "question": question,
														
 
															-            "ground_truth": ground_truth,
														
 
															-            "final_answer_multi_query": final_answer_multi,
														
 
															-            "processing_time_multi_query": processing_time_multi,
														
 
															-            "final_answer_single_query": final_answer_single,
														
 
															-            "processing_time_single_query": processing_time_single
														
 
															+        openai_datasets = {
														
 
															+            "question": [question],
														
 
															+            "answer": [openai_answer],
														
 
															+            "contexts": [[doc.page_content for doc in openai_docs]],
														
 
															+            "ground_truth": [ground_truth]
														
 
															         }
														
 
															-        print(result)
														
 
															+        openai_evalsets = Dataset.from_dict(openai_datasets)
														
 
															+
														
 
															+        openai_result = evaluate(
														
 
															+            openai_evalsets,
														
 
															+            metrics=[
														
 
															+                context_precision,
														
 
															+                faithfulness,
														
 
															+                answer_relevancy,
														
 
															+                context_recall,
														
 
															+            ],
														
 
															+        )
														
 
															+
														
 
															+        print("openai RAGAS Evaluation:")
														
 
															+        openai_result['time'] = llama3_time
														
 
															+        df = openai_result.to_pandas()
														
 
															+        print(df)
														
 
															-        results.append(result)
														
 
															+        df.to_csv("openai.csv", mode='a')
														
 
															-        with open('qa_results.json', 'a', encoding='utf8') as outfile:
														
 
															-            json.dump(result, outfile, indent=4, ensure_ascii=False)
														
 
															-            outfile.write("\n")  # Ensure each result is on a new line
														
 
															-        
														
 
															-    # Save results to a JSON file
														
 
															-    with open('qa_results_all.json', 'w', encoding='utf8') as outfile:
														
 
															-        json.dump(results, outfile, indent=4, ensure_ascii=False)
														
 
															+    print("\nPerformance comparison complete.")
														
 
															-    print('All questions done!')
														
 
															-    # question = ""
														
 
															-    # while question != "exit":
														
 
															-    #     # question = "國家溫室氣體長期減量目標" 
														
 
															-    #     question = input("Question: ")
														
 
															-    #     if question.strip().lower == "exit": break
														
 
															-    #     start = time()
														
 
															-    #     final_answer = faiss_query(question, global_retriever, multi_query=True)
														
 
															-    #     print(final_answer)
														
 
															-    #     processing_time = time() - start
														
 
															-    #     print(processing_time)
														
 
															+if __name__ == "__main__":
														
 
															+    asyncio.run(run_evaluation())
														
 
															-        
														
 
															-    #     start = time() 
														
 
															-    #     final_answer = faiss_query(question, global_retriever, multi_query=False)
														
 
															-    #     print(final_answer)
														
 
															-    #     processing_time = time() - start
														
 
															-    #     print(processing_time)
														
 
															-    # print("Chatbot closed!")
														
 
															-
														
 
															-    # asyncio.run(ask_question())
														
 
															+    
														
--- a/systex_app.py
+++ b/systex_app.py
@@ -44,7 +44,9 @@ def agent(question: str, chat_history: List[ChatHistoryItem] = Body(...)):
 
															     start = time.time()
														
 
															     # TODO rewrite query
														
 
															     # _search_query = get_search_query()
														
 
															-    # chat_history = [(item.q, item.a) for item in chat_history[-5:] if item.a != "" and item.a != "string"]
														
 
															+    # chat_history = [item for item in chat_history if question != item.q]
														
 
															+    # chat_history = [(item.q, item.a) for item in chat_history[-5:] if item.a != "" and item.a != "string" ]
														
 
															+    # print(chat_history)
														
 
															     # modified_question = _search_query.invoke({"question": question, "chat_history": chat_history})
														
 
															     with get_openai_callback() as cb:
														
--- a/text_to_sql_private.py
+++ b/text_to_sql_private.py
@@ -316,8 +316,8 @@ def sql_to_nl_chain(llm):
 
															         SQL Result: [(1102.3712,)]
														
 
															         Answer: 建準廣興廠去年的類別1總排放量是1102.3712
														
 
															-        如果你不知道答案或SQL query 出現錯誤請回答："很抱歉，目前我無法回答您的問題，請將您的詢問發送至 test@systex.com 以便獲得更進一步的幫助，謝謝。"
														
 
															-        
														
 
															+        如果你不知道答案或SQL query 出現Error請回答："很抱歉，目前我無法回答您的問題，請將您的詢問發送至 test@systex.com 以便獲得更進一步的幫助，謝謝。"
														
 
															+        若 SQL Result 為 0 代表數據為0。
														
 
															         勿回答無關資訊
														
 
															         <|eot_id|>
Автор	SHA1 Съобщение	Дата
ling	889552f0c0 update route and sql prompt	преди 1 година
ling	789e61e0e5 update ragas score	преди 1 година