SHA1
--- a/ai_agent.ipynb
+++ b/ai_agent.ipynb
@@ -601,7 +601,7 @@
 
															   },
														
 
															   {
														
 
															    "cell_type": "code",
														
 
															-   "execution_count": 19,
														
 
															+   "execution_count": 8,
														
 
															    "metadata": {},
														
 
															    "outputs": [],
														
 
															    "source": [
														
@@ -643,7 +643,7 @@
 
															     "    AND \"年度\" = EXTRACT(YEAR FROM CURRENT_DATE)-1;\"\n",
														
 
															     "    For the above example, we can find that user asked for \"建準\", but the PostgreSQL query gives \"事業名稱\" like '%台積電%' in WHERE statement, which means the PostgreSQL query is incorrect for the user question.\n",
														
 
															     "    \n",
														
 
															-    "    and so on. You need to strictly examine whether the sql PostgreSQL query matches the user question.\n",
														
 
															+    "    and so on. You need to examine whether the sql PostgreSQL query matches the user question.\n",
														
 
															     "    \n",
														
 
															     "    If the PostgreSQL query do not exactly matches the user question, grade it as incorrect. \n",
														
 
															     "    You need to strictly examine whether the sql PostgreSQL query matches the user question.\n",
														
@@ -663,30 +663,122 @@
 
															   },
														
 
															   {
														
 
															    "cell_type": "code",
														
 
															-   "execution_count": 20,
														
 
															+   "execution_count": 16,
														
 
															+   "metadata": {},
														
 
															+   "outputs": [],
														
 
															+   "source": [
														
 
															+    "\n",
														
 
															+    "### SQL Grader\n",
														
 
															+    "\n",
														
 
															+    "from langchain_community.chat_models import ChatOllama\n",
														
 
															+    "from langchain_core.output_parsers import JsonOutputParser\n",
														
 
															+    "from langchain_core.prompts import PromptTemplate\n",
														
 
															+    "\n",
														
 
															+    "# LLM\n",
														
 
															+    "# llm_json = ChatOllama(model=local_llm, format=\"json\", temperature=0)\n",
														
 
															+    "\n",
														
 
															+    "prompt = PromptTemplate(\n",
														
 
															+    "    template=\"\"\"<|begin_of_text|><|start_header_id|>system<|end_header_id|> \n",
														
 
															+    "    You are a SQL query grader assessing correctness of PostgreSQL query to a user question. \n",
														
 
															+    "    Based on following database description, you need to grade whether the PostgreSQL query exactly matches the user question.\n",
														
 
															+    "    \n",
														
 
															+    "    Here is database description:\n",
														
 
															+    "    {table_info}\n",
														
 
															+    "    \n",
														
 
															+    "    You need to check that each where statement is correctly filtered out what user question need.\n",
														
 
															+    "    You need to check if PostgreSQL query WHERE clause correctly filter records according to user question\n",
														
 
															+    "    You need to examine whether the sql PostgreSQL query matches the user question.\n",
														
 
															+    "    \n",
														
 
															+    "    If the PostgreSQL query do not exactly matches the user question, grade it as incorrect. \n",
														
 
															+    "    You need to strictly examine whether the sql PostgreSQL query matches the user question.\n",
														
 
															+    "    Give a binary score 'yes' or 'no' score to indicate whether the PostgreSQL query is correct to the question. \\n\n",
														
 
															+    "    Provide the binary score as a JSON with a single key 'score' and no premable or explanation.\n",
														
 
															+    "    <|eot_id|>\n",
														
 
															+    "    \n",
														
 
															+    "    <|start_header_id|>user<|end_header_id|>\n",
														
 
															+    "    Here is the PostgreSQL query: \\n\\n {sql_query} \\n\\n\n",
														
 
															+    "    Here is the user question: {question} \\n <|eot_id|><|start_header_id|>assistant<|end_header_id|>\n",
														
 
															+    "    \"\"\",\n",
														
 
															+    "    input_variables=[\"table_info\", \"question\", \"sql_query\"],\n",
														
 
															+    ")\n",
														
 
															+    "\n",
														
 
															+    "sql_query_grader = prompt | llm_json | JsonOutputParser()"
														
 
															+   ]
														
 
															+  },
														
 
															+  {
														
 
															+   "cell_type": "code",
														
 
															+   "execution_count": 26,
														
 
															+   "metadata": {},
														
 
															+   "outputs": [],
														
 
															+   "source": [
														
 
															+    "question = \"建準廣興廠去年的綠電使用量是多少?\"\n",
														
 
															+    "sql_query = \"\"\"\n",
														
 
															+    "\n",
														
 
															+    "SELECT SUM(\"用電度數(kwh)\") AS \"自產電力綠電使用量\"\n",
														
 
															+    "FROM \"用電度數\"\n",
														
 
															+    "WHERE \"項目\" = '自產電力(綠電)'\n",
														
 
															+    "AND \"盤查標準\" = 'GHG'\n",
														
 
															+    "AND \"年度\" = EXTRACT(YEAR FROM CURRENT_DATE)-1\n",
														
 
															+    "\"\"\""
														
 
															+   ]
														
 
															+  },
														
 
															+  {
														
 
															+   "cell_type": "code",
														
 
															+   "execution_count": 27,
														
 
															+   "metadata": {},
														
 
															+   "outputs": [],
														
 
															+   "source": [
														
 
															+    "from text_to_sql_private import get_query\n",
														
 
															+    "selected_table = ['用水度數', '用水度數', '建準碳排放清冊數據']"
														
 
															+   ]
														
 
															+  },
														
 
															+  {
														
 
															+   "cell_type": "code",
														
 
															+   "execution_count": 28,
														
 
															+   "metadata": {},
														
 
															+   "outputs": [
														
 
															+    {
														
 
															+     "ename": "NameError",
														
 
															+     "evalue": "name 'db' is not defined",
														
 
															+     "output_type": "error",
														
 
															+     "traceback": [
														
 
															+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
														
 
															+      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
														
 
															+      "Cell \u001b[0;32mIn[28], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m get_query(\u001b[43mdb\u001b[49m, question, selected_table, llm)\n",
														
 
															+      "\u001b[0;31mNameError\u001b[0m: name 'db' is not defined"
														
 
															+     ]
														
 
															+    }
														
 
															+   ],
														
 
															+   "source": [
														
 
															+    "get_query(db, question, selected_table, llm)"
														
 
															+   ]
														
 
															+  },
														
 
															+  {
														
 
															+   "cell_type": "code",
														
 
															+   "execution_count": 24,
														
 
															    "metadata": {},
														
 
															    "outputs": [
														
 
															     {
														
 
															      "name": "stdout",
														
 
															      "output_type": "stream",
														
 
															      "text": [
														
 
															-      "{'score': 'no'}\n"
														
 
															+      "{'score': 'yes'}\n"
														
 
															      ]
														
 
															     }
														
 
															    ],
														
 
															    "source": [
														
 
															-    "from text_to_sql2 import table_description\n",
														
 
															-    "question = \"建準去年的類別一排放量\"\n",
														
 
															+    "from text_to_sql_private import table_description\n",
														
 
															+    "# question = \"建準去年的類別一排放量\"\n",
														
 
															     "# sql_query = \"\"\"\n",
														
 
															     "# SELECT SUM(\"高雄總部及運通廠\" + \"台北辦事處\" + \"昆山廣興廠\" + \"北海建準廠\" + \"北海立準廠\" + \"菲律賓建準廠\" + \"Inc\" + \"SAS\" + \"India\") AS \"類別一排放量\"\n",
														
 
															     "# FROM \"2023 清冊數據(GHG)\"\n",
														
 
															     "# WHERE \"類別\" = '類別一-直接排放'\n",
														
 
															     "# \"\"\"\n",
														
 
															-    "question = \"台積電去年的固定燃燒總排放量是多少?\"\n",
														
 
															+    "question = \"建準去年的固定燃燒總排放量是多少?\"\n",
														
 
															     "sql_query = \"\"\"\n",
														
 
															     "SELECT SUM(\"排放量(公噸CO2e)\") AS \"固定燃燒總排放量\"\n",
														
 
															-    "FROM \"104_112碳排放公開及建準資料\"\n",
														
 
															-    "WHERE \"事業名稱\" like '%建準%'\n",
														
 
															+    "FROM \"建準碳排放清冊數據\"\n",
														
 
															+    "WHERE \"事業名稱\" like '%台積電%'\n",
														
 
															     "AND \"排放源\" = '固定燃燒'\n",
														
 
															     "AND \"盤查標準\" = 'GHG'\n",
														
 
															     "AND \"年度\" = EXTRACT(YEAR FROM CURRENT_DATE)-1;\n",
														
@@ -874,7 +966,7 @@
 
															   },
														
 
															   {
														
 
															    "cell_type": "code",
														
 
															-   "execution_count": 29,
														
 
															+   "execution_count": 34,
														
 
															    "metadata": {},
														
 
															    "outputs": [],
														
 
															    "source": [
														
@@ -911,7 +1003,7 @@
 
															   },
														
 
															   {
														
 
															    "cell_type": "code",
														
 
															-   "execution_count": 42,
														
 
															+   "execution_count": 35,
														
 
															    "metadata": {},
														
 
															    "outputs": [],
														
 
															    "source": [
														
@@ -1061,7 +1153,7 @@
 
															   },
														
 
															   {
														
 
															    "cell_type": "code",
														
 
															-   "execution_count": 43,
														
 
															+   "execution_count": 36,
														
 
															    "metadata": {},
														
 
															    "outputs": [],
														
 
															    "source": [
														
@@ -1191,7 +1283,7 @@
 
															   },
														
 
															   {
														
 
															    "cell_type": "code",
														
 
															-   "execution_count": 44,
														
 
															+   "execution_count": 37,
														
 
															    "metadata": {},
														
 
															    "outputs": [],
														
 
															    "source": [
														
@@ -1337,6 +1429,20 @@
 
															     "print(app.get_graph().draw_mermaid())"
														
 
															    ]
														
 
															   },
														
 
															+  {
														
 
															+   "cell_type": "code",
														
 
															+   "execution_count": null,
														
 
															+   "metadata": {},
														
 
															+   "outputs": [],
														
 
															+   "source": [
														
 
															+    "Image(\n",
														
 
															+    "    app.get_graph().draw_mermaid_png(\n",
														
 
															+    "        draw_method=MermaidDrawMethod.API,\n",
														
 
															+    "        output_file_path=\"agent_workflow.png\",\n",
														
 
															+    "    )\n",
														
 
															+    ")"
														
 
															+   ]
														
 
															+  },
														
 
															   {
														
 
															    "cell_type": "code",
														
 
															    "execution_count": null,
														
--- a/ai_agent.py
+++ b/ai_agent.py
@@ -36,7 +36,6 @@ retriever = create_faiss_retriever()
 
															 from text_to_sql_private import run, get_query, query_to_nl, table_description
														
 
															 from post_processing_sqlparse import get_query_columns, parse_sql_where, get_table_name
														
 
															 progress_bar = []
														
 
															-
														
 
															 def faiss_query(question: str, docs, llm, multi_query: bool = False) -> str:
														
 
															     context = docs
														
@@ -60,6 +59,7 @@ def faiss_query(question: str, docs, llm, multi_query: bool = False) -> str:
 
															     Question: {question}
														
 
															     用繁體中文回答問題
														
 
															+    如果你不知道答案請回答："很抱歉，目前我無法回答您的問題，請將您的詢問發送至 test@systex.com 以便獲得更進一步的幫助，謝謝。"
														
 
															     <|eot_id|>
														
 
															     <|start_header_id|>assistant<|end_header_id|>
														
@@ -121,19 +121,19 @@ def Answer_Grader():
 
															 # Text-to-SQL
														
 
															 def run_text_to_sql(question: str):
														
 
															-    selected_table = ['104_112碳排放公開及建準資料', '水電使用量(GHG)', '水電使用量(ISO)']
														
 
															+    selected_table = ['用水度數', '用水度數', '建準碳排放清冊數據new']
														
 
															     # question = "建準去年的固定燃燒總排放量是多少?"
														
 
															     query, result, answer = run(db, question, selected_table, llm)
														
 
															     return  answer, query
														
 
															 def _get_query(question: str):
														
 
															-    selected_table = ['104_112碳排放公開及建準資料', '水電使用量(GHG)', '水電使用量(ISO)']
														
 
															-    query = get_query(db, question, selected_table, llm)
														
 
															-    return  query
														
 
															+    selected_table = ['用水度數', '用水度數', '建準碳排放清冊數據new']
														
 
															+    query, result = get_query(db, question, selected_table, llm)
														
 
															+    return  query, result
														
 
															-def _query_to_nl(question: str, query: str):
														
 
															-    answer = query_to_nl(db, question, query, llm)
														
 
															+def _query_to_nl(question: str, query: str, result):
														
 
															+    answer = query_to_nl(question, query, result, llm)
														
 
															     return  answer
														
 
															 def generate_additional_question(sql_query):
														
@@ -150,15 +150,17 @@ def generate_additional_question(sql_query):
 
															 def generate_additional_detail(sql_query):
														
 
															     terms = parse_sql_where(sql_query)
														
 
															     answer = ""
														
 
															-    for term in terms:
														
 
															+    for term in list(set(terms)):
														
 
															         if term is None: continue
														
 
															-        question_format = [f"什麼是{term}？", f"{term}的用途是什麼", f"如何計算{term}？"]
														
 
															+        question_format = [f"請解釋什麼是{term}？"]
														
 
															         for question in question_format:
														
 
															             # question = f"什麼是{term}？"
														
 
															-            documents = retriever.get_relevant_documents(question, k=30)
														
 
															-            generation = faiss_query(question, documents, llm)
														
 
															+            documents = retriever.get_relevant_documents(question, k=5)
														
 
															+            generation = faiss_query(question, documents, llm) + "\n"
														
 
															+            if "test@systex.com" in generation:
														
 
															+                generation = ""
														
 
															+            
														
 
															             answer += generation
														
 
															-            answer += "\n"
														
 
															             # print(question)
														
 
															             # print(generation)
														
 
															     return answer
														
@@ -177,7 +179,7 @@ def SQL_Grader():
 
															         For example, if user question is "建準去年的固定燃燒總排放量是多少?", and the PostgreSQL query is 
														
 
															         "SELECT SUM("排放量(公噸CO2e)") AS "下游租賃總排放量"
														
 
															-        FROM "104_112碳排放公開及建準資料"
														
 
															+        FROM "建準碳排放清冊數據new"
														
 
															         WHERE "事業名稱" like '%建準%'
														
 
															         AND "排放源" = '下游租賃'
														
 
															         AND "盤查標準" = 'GHG'
														
@@ -186,7 +188,7 @@ def SQL_Grader():
 
															         Another example like "建準去年的固定燃燒總排放量是多少?", and the PostgreSQL query is 
														
 
															         "SELECT SUM("排放量(公噸CO2e)") AS "固定燃燒總排放量"
														
 
															-        FROM "104_112碳排放公開及建準資料"
														
 
															+        FROM "建準碳排放清冊數據new"
														
 
															         WHERE "事業名稱" like '%台積電%'
														
 
															         AND "排放源" = '固定燃燒'
														
 
															         AND "盤查標準" = 'GHG'
														
@@ -251,6 +253,7 @@ class GraphState(TypedDict):
 
															     documents: List[str]
														
 
															     retry: int
														
 
															     sql_query: str
														
 
															+    sql_result: str
														
 
															 # Node
														
 
															 def show_progress(state, progress: str):
														
@@ -289,7 +292,10 @@ def retrieve_and_generation(state):
 
															     if not question_list:
														
 
															         # documents = retriever.invoke(question)
														
 
															         # TODO: correct Retrieval function
														
 
															-        documents = retriever.get_relevant_documents(question, k=30)
														
 
															+        documents = retriever.get_relevant_documents(question, k=5)
														
 
															+        for doc in documents:
														
 
															+            print(doc)
														
 
															+            
														
 
															         # docs_documents = "\n\n".join(doc.page_content for doc in documents)
														
 
															         # print(documents)
														
 
															         generation = faiss_query(question, documents, llm)
														
@@ -297,10 +303,13 @@ def retrieve_and_generation(state):
 
															         generation = state["generation"]
														
 
															         for sub_question in list(set(question_list)):
														
 
															+            print(sub_question)
														
 
															             documents = retriever.get_relevant_documents(sub_question, k=10)
														
 
															             generation += faiss_query(sub_question, documents, llm)
														
 
															             generation += "\n"
														
 
															+    print(generation)
														
 
															+            
														
 
															     return {"progress_bar": progress_bar, "route": route, "documents": documents, "question": question, "generation": generation}
														
 
															 def company_private_data_get_sql_query(state):
														
@@ -328,9 +337,10 @@ def company_private_data_get_sql_query(state):
 
															         retry = 0
														
 
															     # print("RETRY: ", retry)
														
 
															-    sql_query = _get_query(question)
														
 
															+    sql_query, sql_result = _get_query(question)
														
 
															+    print(type(sql_result))
														
 
															-    return {"progress_bar": progress_bar, "route": route,"sql_query": sql_query, "question": question, "retry": retry}
														
 
															+    return {"progress_bar": progress_bar, "route": route, "sql_query": sql_query, "sql_result": sql_result, "question": question, "retry": retry}
														
 
															 def company_private_data_search(state):
														
 
															     """
														
@@ -348,7 +358,8 @@ def company_private_data_search(state):
 
															     # print(state)
														
 
															     question = state["question"]
														
 
															     sql_query = state["sql_query"]
														
 
															-    generation = _query_to_nl(question, sql_query)
														
 
															+    sql_result = state["sql_result"]
														
 
															+    generation = _query_to_nl(question, sql_query, sql_result)
														
 
															     # generation = [company_private_data_result]
														
@@ -371,11 +382,12 @@ def additional_explanation_question(state):
 
															     sql_query = state["sql_query"]
														
 
															     # print(sql_query)
														
 
															     generation = state["generation"]
														
 
															-    question_list = generate_additional_question(sql_query)
														
 
															-    # print(question_list)
														
 
															-    # generation += "\n"
														
 
															-    # generation += generate_additional_detail(sql_query)
														
 
															+    generation += "\n"
														
 
															+    generation += generate_additional_detail(sql_query)
														
 
															+    question_list = []    
														
 
															+    # question_list = generate_additional_question(sql_query)
														
 
															+    # print(question_list)
														
 
															     # generation = [company_private_data_result]
														
@@ -408,6 +420,9 @@ def route_question(state):
 
															     # print(question)
														
 
															     question_router = Router()
														
 
															     source = question_router.invoke({"question": question})
														
 
															+    if "建準" in question:
														
 
															+        source["datasource"] = "自有數據"
														
 
															+        
														
 
															     # print(source)
														
 
															     print(source["datasource"])
														
 
															     if source["datasource"] == "自有數據":
														
@@ -431,43 +446,56 @@ def grade_generation_v_documents_and_question(state):
 
															     """
														
 
															     # print("---CHECK HALLUCINATIONS---")
														
 
															-    progress_bar = show_progress(state, "---CHECK HALLUCINATIONS---")
														
 
															     question = state["question"]
														
 
															     documents = state["documents"]
														
 
															     generation = state["generation"]
														
 
															-    
														
 
															-    # print(docs_documents)
														
 
															-    # print(generation)
														
 
															-    hallucination_grader = Hallucination_Grader()
														
 
															-    score = hallucination_grader.invoke(
														
 
															-        {"documents": documents, "generation": generation}
														
 
															-    )
														
 
															-    # print(score)
														
 
															+    progress_bar = show_progress(state, "---GRADE GENERATION vs QUESTION---")
														
 
															+    answer_grader = Answer_Grader()
														
 
															+    score = answer_grader.invoke({"question": question, "generation": generation})
														
 
															     grade = score["score"]
														
 
															-
														
 
															-    # Check hallucination
														
 
															     if grade in ["yes", "true", 1, "1"]:
														
 
															-        # print("---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---")
														
 
															-        progress_bar = show_progress(state, "---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---")
														
 
															-        # Check question-answering
														
 
															-        # print("---GRADE GENERATION vs QUESTION---")
														
 
															-        progress_bar = show_progress(state, "---GRADE GENERATION vs QUESTION---")
														
 
															-        answer_grader = Answer_Grader()
														
 
															-        score = answer_grader.invoke({"question": question, "generation": generation})
														
 
															-        grade = score["score"]
														
 
															-        if grade in ["yes", "true", 1, "1"]:
														
 
															-            # print("---DECISION: GENERATION ADDRESSES QUESTION---")
														
 
															-            progress_bar = show_progress(state, "---DECISION: GENERATION ADDRESSES QUESTION---")
														
 
															-            return "useful"
														
 
															-        else:
														
 
															-            # print("---DECISION: GENERATION DOES NOT ADDRESS QUESTION---")
														
 
															-            progress_bar = show_progress(state, "---DECISION: GENERATION DOES NOT ADDRESS QUESTION---")
														
 
															-            return "not useful"
														
 
															+        # print("---DECISION: GENERATION ADDRESSES QUESTION---")
														
 
															+        progress_bar = show_progress(state, "---DECISION: GENERATION ADDRESSES QUESTION---")
														
 
															+        return "useful"
														
 
															     else:
														
 
															-        # pprint("---DECISION: GENERATION IS NOT GROUNDED IN DOCUMENTS, RE-TRY---")
														
 
															-        progress_bar = show_progress(state, "---DECISION: GENERATION IS NOT GROUNDED IN DOCUMENTS, RE-TRY---")
														
 
															-        return "not supported"
														
 
															+        # print("---DECISION: GENERATION DOES NOT ADDRESS QUESTION---")
														
 
															+        progress_bar = show_progress(state, "---DECISION: GENERATION DOES NOT ADDRESS QUESTION---")
														
 
															+        return "not useful"
														
 
															+    
														
 
															+    
														
 
															+    # progress_bar = show_progress(state, "---CHECK HALLUCINATIONS---")
														
 
															+    # # print(docs_documents)
														
 
															+    # # print(generation)
														
 
															+    # hallucination_grader = Hallucination_Grader()
														
 
															+    # score = hallucination_grader.invoke(
														
 
															+    #     {"documents": documents, "generation": generation}
														
 
															+    # )
														
 
															+    # # print(score)
														
 
															+    # grade = score["score"]
														
 
															+
														
 
															+    # # Check hallucination
														
 
															+    # if grade in ["yes", "true", 1, "1"]:
														
 
															+    #     # print("---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---")
														
 
															+    #     progress_bar = show_progress(state, "---DECISION: GENERATION IS GROUNDED IN DOCUMENTS---")
														
 
															+    #     # Check question-answering
														
 
															+    #     # print("---GRADE GENERATION vs QUESTION---")
														
 
															+    #     progress_bar = show_progress(state, "---GRADE GENERATION vs QUESTION---")
														
 
															+    #     answer_grader = Answer_Grader()
														
 
															+    #     score = answer_grader.invoke({"question": question, "generation": generation})
														
 
															+    #     grade = score["score"]
														
 
															+    #     if grade in ["yes", "true", 1, "1"]:
														
 
															+    #         # print("---DECISION: GENERATION ADDRESSES QUESTION---")
														
 
															+    #         progress_bar = show_progress(state, "---DECISION: GENERATION ADDRESSES QUESTION---")
														
 
															+    #         return "useful"
														
 
															+    #     else:
														
 
															+    #         # print("---DECISION: GENERATION DOES NOT ADDRESS QUESTION---")
														
 
															+    #         progress_bar = show_progress(state, "---DECISION: GENERATION DOES NOT ADDRESS QUESTION---")
														
 
															+    #         return "not useful"
														
 
															+    # else:
														
 
															+    #     # pprint("---DECISION: GENERATION IS NOT GROUNDED IN DOCUMENTS, RE-TRY---")
														
 
															+    #     progress_bar = show_progress(state, "---DECISION: GENERATION IS NOT GROUNDED IN DOCUMENTS, RE-TRY---")
														
 
															+    #     return "not supported"
														
 
															 def grade_sql_query(state):
														
 
															     """
														
@@ -484,25 +512,34 @@ def grade_sql_query(state):
 
															     progress_bar = show_progress(state, "---CHECK SQL CORRECTNESS TO QUESTION---")
														
 
															     question = state["question"]
														
 
															     sql_query = state["sql_query"]
														
 
															-    retry = state["retry"]
														
 
															-
														
 
															-    # Score each doc
														
 
															-    sql_query_grader = SQL_Grader()
														
 
															-    score = sql_query_grader.invoke({"table_info": table_description(), "question": question, "sql_query": sql_query})
														
 
															-    grade = score["score"]
														
 
															-    # Document relevant
														
 
															-    if grade in ["yes", "true", 1, "1"]:
														
 
															-        # print("---GRADE: CORRECT SQL QUERY---")
														
 
															-        progress_bar = show_progress(state, "---GRADE: CORRECT SQL QUERY---")
														
 
															-        return "correct"
														
 
															-    elif retry >= 5:
														
 
															-        # print("---GRADE: INCORRECT SQL QUERY AND REACH RETRY LIMIT---")
														
 
															-        progress_bar = show_progress(state, "---GRADE: INCORRECT SQL QUERY AND REACH RETRY LIMIT---")
														
 
															-        return "failed"
														
 
															-    else:
														
 
															-        # print("---GRADE: INCORRECT SQL QUERY---")
														
 
															-        progress_bar = show_progress(state, "---GRADE: INCORRECT SQL QUERY---")
														
 
															+    sql_result = state["sql_result"]
														
 
															+    if "None" in sql_result:
														
 
															+        progress_bar = show_progress(state, "---INCORRECT SQL QUERY---")
														
 
															         return "incorrect"
														
 
															+    else:
														
 
															+        progress_bar = show_progress(state, "---CORRECT SQL QUERY---")
														
 
															+        return "correct"
														
 
															+    # retry = state["retry"]
														
 
															+
														
 
															+    # # Score each doc
														
 
															+    # sql_query_grader = SQL_Grader()
														
 
															+    # score = sql_query_grader.invoke({"table_info": table_description(), "question": question, "sql_query": sql_query})
														
 
															+    # grade = score["score"]
														
 
															+    
														
 
															+    
														
 
															+    # # Document relevant
														
 
															+    # if grade in ["yes", "true", 1, "1"]:
														
 
															+    #     # print("---GRADE: CORRECT SQL QUERY---")
														
 
															+    #     progress_bar = show_progress(state, "---GRADE: CORRECT SQL QUERY---")
														
 
															+    #     return "correct"
														
 
															+    # elif retry >= 5:
														
 
															+    #     # print("---GRADE: INCORRECT SQL QUERY AND REACH RETRY LIMIT---")
														
 
															+    #     progress_bar = show_progress(state, "---GRADE: INCORRECT SQL QUERY AND REACH RETRY LIMIT---")
														
 
															+    #     return "failed"
														
 
															+    # else:
														
 
															+    #     # print("---GRADE: INCORRECT SQL QUERY---")
														
 
															+    #     progress_bar = show_progress(state, "---GRADE: INCORRECT SQL QUERY---")
														
 
															+    #     return "incorrect"
														
 
															 def build_graph():
														
 
															     workflow = StateGraph(GraphState)
														
@@ -527,7 +564,6 @@ def build_graph():
 
															         "RAG",
														
 
															         grade_generation_v_documents_and_question,
														
 
															         {
														
 
															-            "not supported": "ERROR",
														
 
															             "useful": END,
														
 
															             "not useful": "ERROR",
														
 
															         },
														
@@ -537,21 +573,26 @@ def build_graph():
 
															         grade_sql_query,
														
 
															         {
														
 
															             "correct": "SQL Answer",
														
 
															-            "incorrect": "ERROR",
														
 
															-            "failed": "RAG"
														
 
															+            "incorrect": "RAG",
														
 
															         },
														
 
															     )
														
 
															     workflow.add_edge("SQL Answer", "Additoinal Explanation")
														
 
															-    workflow.add_edge("Additoinal Explanation", "RAG")
														
 
															+    workflow.add_edge("Additoinal Explanation", END)
														
 
															     app = workflow.compile()    
														
 
															     return app
														
 
															+app = build_graph()
														
 
															+draw_mermaid = app.get_graph().draw_mermaid()
														
 
															+print(draw_mermaid)
														
 
															+
														
 
															 def main(question: str):
														
 
															-    app = build_graph()
														
 
															+    # app = build_graph()
														
 
															+    # draw_mermaid = app.get_graph().draw_mermaid()
														
 
															+    # print(draw_mermaid)
														
 
															     #建準去年的類別一排放量?
														
 
															     # inputs = {"question": "溫室氣體是什麼"}
														
 
															     inputs = {"question": question, "progress_bar": None}
														
@@ -561,12 +602,14 @@ def main(question: str):
 
															     # pprint(value["generation"])
														
 
															     # pprint(value)
														
 
															     value["progress_bar"] = progress_bar
														
 
															-    pprint(value["progress_bar"])
														
 
															+    # pprint(value["progress_bar"])
														
 
															     return value["generation"]
														
 
															 if __name__ == "__main__":
														
 
															     # result = main("建準去年的逸散排放總排放量是多少?")
														
 
															-    result = main("建準去年的綠電使用量是多少?")
														
 
															+    result = main("建準夏威夷去年的綠電使用量是多少?")
														
 
															+    # result = main("溫室氣體是什麼?")
														
 
															+    # result = main("什麼是外購電力(綠電)？")
														
 
															     print("------------------------------------------------------")
														
 
															     print(result)
														
--- a/faiss_index.py
+++ b/faiss_index.py
@@ -44,7 +44,7 @@ load_dotenv('../../.env')
 
															 supabase_url = os.getenv("SUPABASE_URL")
														
 
															 supabase_key = os.getenv("SUPABASE_KEY")
														
 
															 openai_api_key = os.getenv("OPENAI_API_KEY")
														
 
															-document_table = "documents"
														
 
															+document_table = "documents2"
														
 
															 # Initialize Supabase client
														
 
															 supabase: Client = create_client(supabase_url, supabase_key)
														
--- a/file_loader/news_vectordb.py
+++ b/file_loader/news_vectordb.py
@@ -13,7 +13,7 @@ from add_vectordb import GetVectorStore
 
															 load_dotenv("../.env")
														
 
															 supabase_url = os.environ.get("SUPABASE_URL")
														
 
															 supabase_key = os.environ.get("SUPABASE_KEY")
														
 
															-document_table = "documents"
														
 
															+document_table = "documents2"
														
 
															 supabase: Client = create_client(supabase_url, supabase_key)
														
 
															 embeddings = OpenAIEmbeddings()
														
--- a/post_processing_sqlparse.py
+++ b/post_processing_sqlparse.py
@@ -60,7 +60,9 @@ def parse_sql_where(sql):
 
															     column_dict = {
														
 
															         "排放源": None,
														
 
															         "類別": None,
														
 
															-        "項目": None
														
 
															+        "類別項目": None,
														
 
															+        "項目": None,
														
 
															+        
														
 
															     }
														
 
															     def get_column_details(token, column_args):
														
--- a/rewrite_question.py
+++ b/rewrite_question.py
@@ -0,0 +1,110 @@
 
															+from langchain_core.output_parsers import StrOutputParser
														
 
															+from langchain_openai import ChatOpenAI
														
 
															+from langchain_core.runnables import RunnablePassthrough
														
 
															+from langchain import PromptTemplate
														
 
															+from langchain_community.chat_models import ChatOllama
														
 
															+
														
 
															+
														
 
															+from langchain_core.runnables import (
														
 
															+    RunnableBranch,
														
 
															+    RunnableLambda,
														
 
															+    RunnableParallel,
														
 
															+    RunnablePassthrough,
														
 
															+)
														
 
															+from typing import Tuple, List, Optional
														
 
															+from langchain_core.messages import AIMessage, HumanMessage
														
 
															+
														
 
															+local_llm = "llama3-groq-tool-use:latest"
														
 
															+# llm_json = ChatOllama(model=local_llm, format="json", temperature=0)
														
 
															+llm = ChatOllama(model=local_llm, temperature=0)
														
 
															+
														
 
															+def get_search_query():
														
 
															+    # Condense a chat history and follow-up question into a standalone question
														
 
															+    # 
														
 
															+    # _template = """Given the following conversation and a follow up question, 
														
 
															+    # rephrase the follow up question to be a standalone question to help others understand the question without having to go back to the conversation transcript.
														
 
															+    # Generate standalone question in its original language.
														
 
															+    # Chat History:
														
 
															+    # {chat_history}
														
 
															+    # Follow Up Input: {question}
														
 
															+
														
 
															+    # Hint:
														
 
															+    # * Refer to chat history and add the subject to the question
														
 
															+    # * Replace the pronouns in the question with the correct person or thing, please refer to chat history
														
 
															+    
														
 
															+    # Standalone question:"""  # noqa: E501
														
 
															+    _template = """
														
 
															+    <|begin_of_text|>
														
 
															+    
														
 
															+    <|start_header_id|>system<|end_header_id|>
														
 
															+    Rewrite the following query by incorporating relevant context from the conversation history.
														
 
															+    The rewritten query should:
														
 
															+    
														
 
															+    - Preserve the core intent and meaning of the original query
														
 
															+    - Expand and clarify the query to make it more specific and informative for retrieving relevant context
														
 
															+    - Avoid introducing new topics or queries that deviate from the original query
														
 
															+    - DONT EVER ANSWER the Original query, but instead focus on rephrasing and expanding it into a new query
														
 
															+    - The rewritten query should be in its original language.
														
 
															+    
														
 
															+    Return ONLY the rewritten query text, without any additional formatting or explanations.
														
 
															+    
														
 
															+    <|eot_id|>
														
 
															+        
														
 
															+    <|begin_of_text|><|start_header_id|>user<|end_header_id|>
														
 
															+    Conversation History:
														
 
															+    {chat_history}
														
 
															+    
														
 
															+    Original query: [{question}]
														
 
															+    
														
 
															+    Hint:
														
 
															+    * Refer to chat history and add the subject to the question
														
 
															+    * Replace the pronouns in the question with the correct person or thing, please refer to chat history
														
 
															+    
														
 
															+    Rewritten query: 
														
 
															+    <|eot_id|>
														
 
															+    
														
 
															+    <|start_header_id|>assistant<|end_header_id|>
														
 
															+    """
														
 
															+    CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template)
														
 
															+
														
 
															+    def _format_chat_history(chat_history: List[Tuple[str, str]]) -> List:
														
 
															+        buffer = []
														
 
															+        for human, ai in chat_history:
														
 
															+            buffer.append(HumanMessage(content=human))
														
 
															+            buffer.append(AIMessage(content=ai))
														
 
															+        return buffer
														
 
															+
														
 
															+    _search_query = RunnableBranch(
														
 
															+        # If input includes chat_history, we condense it with the follow-up question
														
 
															+        (
														
 
															+            RunnableLambda(lambda x: bool(x.get("chat_history"))).with_config(
														
 
															+                run_name="HasChatHistoryCheck"
														
 
															+            ),  # Condense follow-up question and chat into a standalone_question
														
 
															+            RunnablePassthrough.assign(
														
 
															+                chat_history=lambda x: _format_chat_history(x["chat_history"])
														
 
															+            )
														
 
															+            | CONDENSE_QUESTION_PROMPT
														
 
															+            | llm
														
 
															+            | StrOutputParser(),
														
 
															+        ),
														
 
															+        # Else, we have no chat history, so just pass through the question
														
 
															+        RunnableLambda(lambda x : x["question"]),
														
 
															+    )
														
 
															+
														
 
															+    return _search_query
														
 
															+
														
 
															+if __name__ == "__main__":
														
 
															+    _search_query = get_search_query()
														
 
															+    chat_history = [
														
 
															+        {
														
 
															+            "q": "北海建準廠2023年的類別3排放量是多少？",
														
 
															+            "a": """根據北海建準廠2023年的數據，類別3的排放量是2,162.62公噸CO2e。
														
 
															+                類別3指的是溫室氣體排放量盤查作業中的一個範疇，該範疇涵蓋了事業之溫室氣體排放量的盤查和登錄。"""
														
 
															+        }
														
 
															+        ]
														
 
															+    chat_history = [(history["q"] , history["a"] ) for history in chat_history if history["a"] != "" and history["a"]  != "string"]
														
 
															+    print(chat_history)
														
 
															+    
														
 
															+    question = "類別2呢"
														
 
															+    modified_question = _search_query.invoke({"question": question, "chat_history": chat_history})
														
 
															+    print(modified_question)
														
--- a/systex_app.py
+++ b/systex_app.py
@@ -39,20 +39,21 @@ class ChatHistoryItem(BaseModel):
 
															 @app.post("/agents")
														
 
															 def agent(question: str, chat_history: List[ChatHistoryItem] = Body(...)):
														
 
															+    print(question)
														
 
															     start = time.time()
														
 
															     with get_openai_callback() as cb:
														
 
															-        cache_question, cache_answer = semantic_cache(supabase, question)
														
 
															+        # cache_question, cache_answer = semantic_cache(supabase, question)
														
 
															+        cache_answer = None
														
 
															         if cache_answer:
														
 
															-            processing_time = time.time() - start
														
 
															-            save_history(question, cache_answer, cb, processing_time)
														
 
															-
														
 
															-            return {"Answer": cache_answer}
														
 
															-    
														
 
															-        answer = main(question)
														
 
															-        
														
 
															+            answer = cache_answer
														
 
															+        else:
														
 
															+            answer = main(question)
														
 
															     processing_time = time.time() - start
														
 
															     save_history(question, answer, cb, processing_time)
														
 
															+    if "test@systex.com" in answer:
														
 
															+        answer = "很抱歉，目前我無法回答您的問題，請將您的詢問發送至 test@systex.com 以便獲得更進一步的幫助，謝謝。"
														
 
															+    print(answer)
														
 
															     return {"Answer": answer}  
														
 
															 def save_history(question, answer, cb, processing_time):
														
--- a/text_to_sql_private.py
+++ b/text_to_sql_private.py
@@ -76,39 +76,51 @@ llm = ChatOllama(model=local_llm, temperature=0)
 
															 def get_examples():
														
 
															     examples = [
														
 
															         {
														
 
															-            "input": "建準廣興廠2023年的自產電力的綠電使用量是多少?",
														
 
															-            "query": """SELECT SUM("用電度數(kwh)") AS "自產電力綠電使用量"
														
 
															+            "input": "建準廣興廠去年的自產電力的綠電使用量是多少?",
														
 
															+            "query": """SELECT SUM("用電度數(kwh)") AS "綠電使用量"
														
 
															                         FROM "用電度數"
														
 
															-                        WHERE "項目" = '自產電力(綠電)'
														
 
															+                        WHERE "項目" like '%綠電%'
														
 
															+                        AND "事業名稱" like '%建準%'
														
 
															+                        AND "事業名稱" like '%廣興廠%'
														
 
															                         AND "盤查標準" = 'GHG'
														
 
															-                        AND "年度" = 2023;""",
														
 
															+                        AND "年度" = EXTRACT(YEAR FROM CURRENT_DATE)-1;""",
														
 
															         },
														
 
															         {
														
 
															-            "input": "建準廣興廠去年的類別1總排放量是多少?",
														
 
															+            "input": "建準北海廠去年的類別1總排放量是多少?",
														
 
															             "query": """SELECT SUM("排放量(公噸CO2e)") AS "類別1總排放量"
														
 
															-                        FROM "建準碳排放清冊數據"
														
 
															+                        FROM "建準碳排放清冊數據new"
														
 
															                         WHERE "事業名稱" like '%建準%'
														
 
															-                        AND "事業名稱" like '%廣興廠%'
														
 
															-                        AND ("類別" like '%類別1-直接排放%' OR "排放源" like '%類別1-直接排放%')
														
 
															+                        AND "事業名稱" like '%北海%'
														
 
															+                        AND "類別" = '類別1'
														
 
															+                        AND "盤查標準" = 'GHG'
														
 
															+                        AND "年度" = EXTRACT(YEAR FROM CURRENT_DATE)-1;""",
														
 
															+        },
														
 
															+        {
														
 
															+            "input": "建準廣興廠去年的直接排放總排放量是多少?",
														
 
															+            "query": """SELECT SUM("排放量(公噸CO2e)") AS "類別1總排放量"
														
 
															+                        FROM "建準碳排放清冊數據new"
														
 
															+                        WHERE "事業名稱" like '%建準%'
														
 
															+                        AND "事業名稱" like '%廣興%'
														
 
															+                        AND ("類別項目" like '%直接排放%' OR "排放源" like '%直接排放%')
														
 
															                         AND "盤查標準" = 'GHG'
														
 
															                         AND "年度" = EXTRACT(YEAR FROM CURRENT_DATE)-1;""",
														
 
															         },
														
 
															         {
														
 
															-            "input": "建準台北辦事處2022年的能源間接排放總排放量是多少?",
														
 
															+            "input": "建準台北辦事處2022年的類別2總排放量是多少?",
														
 
															             "query": """SELECT SUM("排放量(公噸CO2e)") AS "直接排放總排放量"
														
 
															-                        FROM "建準碳排放清冊數據"
														
 
															+                        FROM "建準碳排放清冊數據new"
														
 
															                         WHERE "事業名稱" like '%建準%'
														
 
															-                        AND "事業名稱" like '%台北辦事處%'
														
 
															-                        AND ("類別" like '%類別2-能源間接排放%' OR "排放源" like '%類別2-能源間接排放%')
														
 
															+                        AND "事業名稱" like '%台北%'
														
 
															+                        AND "類別" = '類別2'
														
 
															                         AND "盤查標準" = 'GHG'
														
 
															                         AND "年度" = 2022;""",
														
 
															         },
														
 
															         {
														
 
															             "input": "建準去年的固定燃燒總排放量是多少?",
														
 
															             "query": """SELECT SUM("排放量(公噸CO2e)") AS "固定燃燒總排放量"
														
 
															-                        FROM "建準碳排放清冊數據"
														
 
															+                        FROM "建準碳排放清冊數據new"
														
 
															                         WHERE "事業名稱" like '%建準%'
														
 
															-                        AND ("類別" like '%固定燃燒%' OR "排放源" like '%固定燃燒%')
														
 
															+                        AND ("類別項目" like '%固定燃燒%' OR "排放源" like '%固定燃燒%')
														
 
															                         AND "盤查標準" = 'GHG'
														
 
															                         AND "年度" = EXTRACT(YEAR FROM CURRENT_DATE)-1;""",
														
 
															         },
														
@@ -120,9 +132,9 @@ def get_examples():
 
															 def table_description():
														
 
															     database_description = (
														
 
															-        "The database consists of following table: `用水度數`, `用水度數`, `建準碳排放清冊數據`. "
														
 
															+        "The database consists of following table: `用水度數`, `用水度數`, `建準碳排放清冊數據new`."
														
 
															         "This is a PostgreSQL database, so you need to use postgres-related queries.\n\n"
														
 
															-        "The `建準碳排放清冊數據` table 描述了不同事業單位或廠房分別在 ISO 14064-1:2018 與 GHG Protocol 標準下的溫室氣體排放量，並依類別1至類別6劃分。"
														
 
															+        "The `建準碳排放清冊數據new` table 描述了建準電機工業股份有限公司不同據點分別在 ISO 14064-1:2018 與 GHG Protocol 標準下的溫室氣體排放量，並依類別1至類別6劃分。"
														
 
															         "It includes the following columns:\n"
														
 
															         "- `年度`: 盤查年度\n"
														
 
															         "- `事業名稱`: 建準據點"
														
@@ -142,7 +154,7 @@ def table_description():
 
															         "- `盤查標準`: ISO or GHG\n"
														
 
															-        "The `用電度數` 描述了不同廠房分別在 ISO 14064-1:2018 與 GHG Protocol 標準下的水電使用量。"
														
 
															+        "The `用電度數` 描述了建準電機工業股份有限公司不同據點分別在 ISO 14064-1:2018 與 GHG Protocol 標準下的水電使用量。"
														
 
															         "It includes the following columns:\n"
														
 
															         "- `年度`: 盤查年度\n"
														
 
															         "- `事業名稱`: 建準據點"
														
@@ -154,7 +166,7 @@ def table_description():
 
															         "- `用電度數(kwh)`: 用電度數，單位為kwh\n"
														
 
															         "- `盤查標準`: ISO or GHG\n"
														
 
															-        "The `用水度數` 描述了不同廠房分別在 ISO 14064-1:2018 與 GHG Protocol 標準下的水電使用量。"
														
 
															+        "The `用水度數` 描述了建準電機工業股份有限公司不同據點分別在 ISO 14064-1:2018 與 GHG Protocol 標準下的水電使用量。"
														
 
															         "It includes the following columns:\n"
														
 
															         "- `年度`: 盤查年度\n"
														
 
															         "- `事業名稱`: 建準據點"
														
@@ -230,8 +242,22 @@ def sql_to_nl_chain(llm):
 
															         <|begin_of_text|><|start_header_id|>system<|end_header_id|>
														
 
															         Given the following user question, corresponding SQL query, and SQL result, answer the user question.
														
 
															         根據使用者的問題、對應的 SQL 查詢和 SQL 結果，以繁體中文回答使用者問題。
														
 
															-
														
 
															+        ** 請務必在回答中表達是建準的資料，即便問句中並未提及建準。
														
 
															+        The following shows some example:
														
 
															+        Question: 廣興廠去年的類別1總排放量是多少?
														
 
															+        SQL Query: SELECT SUM("排放量(公噸CO2e)") AS "類別1總排放量"
														
 
															+                        FROM "建準碳排放清冊數據new"
														
 
															+                        WHERE "事業名稱" like '%建準%'
														
 
															+                        AND "事業名稱" like '%廣興%'
														
 
															+                        AND "類別" = '類別1'
														
 
															+                        AND "盤查標準" = 'GHG'
														
 
															+                        AND "年度" = EXTRACT(YEAR FROM CURRENT_DATE)-1;,
														
 
															+        SQL Result: [(1102.3712,)]
														
 
															+        Answer: 建準廣興廠去年的類別1總排放量是1102.3712
														
 
															+
														
 
															+        如果你不知道答案或SQL query 出現錯誤請回答："很抱歉，目前我無法回答您的問題，請將您的詢問發送至 test@systex.com 以便獲得更進一步的幫助，謝謝。"
														
 
															+        勿回答無關資訊
														
 
															         <|eot_id|>
														
 
															         <|begin_of_text|><|start_header_id|>user<|end_header_id|>
														
@@ -259,14 +285,19 @@ def get_query(db, question, selected_table, llm):
 
															     # query = query.replace("104_112碰排放公開及建準資料","104_112碳排放公開及建準資料")
														
 
															     print(query)
														
 
															-    return query
														
 
															-
														
 
															-def query_to_nl(db, question, query, llm):
														
 
															     execute_query = QuerySQLDataBaseTool(db=db)
														
 
															     result = execute_query.invoke(query)
														
 
															     print(result)
														
 
															+    return query, result
														
 
															+
														
 
															+def query_to_nl(question, query, result, llm):
														
 
															+    # execute_query = QuerySQLDataBaseTool(db=db)
														
 
															+    # result = execute_query.invoke(query)
														
 
															+    # print(result)
														
 
															+
														
 
															     chain = sql_to_nl_chain(llm)
														
 
															+    print(result)
														
 
															     answer = chain.invoke({"question": question, "query": query, "result": result})
														
 
															     return answer
														
@@ -295,7 +326,7 @@ if __name__ == "__main__":
 
															     start = time.time()
														
 
															-    selected_table = ['用水度數', '用水度數', '建準碳排放清冊數據']
														
 
															+    selected_table = ['用水度數', '用水度數', '建準碳排放清冊數據new']
														
 
															     question = "建準去年的上游運輸總排放量是多少?"
														
 
															     # question = "台積電2022年的直接排放總排放量是多少?"
														
 
															     # question = "建準廣興廠去年的灰電使用量"
Pembuat	SHA1 Pesan	Tanggal
ling	e46d4b62c3 update agent flow	1 tahun lalu
ling	b3e8ddabe9 update text-to-sql prompt	1 tahun lalu