Browse Source

update text-to-sql prompt

ling 4 months ago
parent
commit
b3e8ddabe9
2 changed files with 57 additions and 24 deletions
  1. 3 1
      post_processing_sqlparse.py
  2. 54 23
      text_to_sql_private.py

+ 3 - 1
post_processing_sqlparse.py

@@ -60,7 +60,9 @@ def parse_sql_where(sql):
     column_dict = {
     column_dict = {
         "排放源": None,
         "排放源": None,
         "類別": None,
         "類別": None,
-        "項目": None
+        "類別項目": None,
+        "項目": None,
+        
     }
     }
 
 
     def get_column_details(token, column_args):
     def get_column_details(token, column_args):

+ 54 - 23
text_to_sql_private.py

@@ -76,39 +76,51 @@ llm = ChatOllama(model=local_llm, temperature=0)
 def get_examples():
 def get_examples():
     examples = [
     examples = [
         {
         {
-            "input": "建準廣興廠2023年的自產電力的綠電使用量是多少?",
-            "query": """SELECT SUM("用電度數(kwh)") AS "自產電力綠電使用量"
+            "input": "建準廣興廠年的自產電力的綠電使用量是多少?",
+            "query": """SELECT SUM("用電度數(kwh)") AS "綠電使用量"
                         FROM "用電度數"
                         FROM "用電度數"
-                        WHERE "項目" = '自產電力(綠電)'
+                        WHERE "項目" like '%綠電%'
+                        AND "事業名稱" like '%建準%'
+                        AND "事業名稱" like '%廣興廠%'
                         AND "盤查標準" = 'GHG'
                         AND "盤查標準" = 'GHG'
-                        AND "年度" = 2023;""",
+                        AND "年度" = EXTRACT(YEAR FROM CURRENT_DATE)-1;""",
         },
         },
         {
         {
-            "input": "建準廣興廠去年的類別1總排放量是多少?",
+            "input": "建準北海廠去年的類別1總排放量是多少?",
             "query": """SELECT SUM("排放量(公噸CO2e)") AS "類別1總排放量"
             "query": """SELECT SUM("排放量(公噸CO2e)") AS "類別1總排放量"
-                        FROM "建準碳排放清冊數據"
+                        FROM "建準碳排放清冊數據new"
                         WHERE "事業名稱" like '%建準%'
                         WHERE "事業名稱" like '%建準%'
-                        AND "事業名稱" like '%廣興廠%'
-                        AND ("類別" like '%類別1-直接排放%' OR "排放源" like '%類別1-直接排放%')
+                        AND "事業名稱" like '%北海%'
+                        AND "類別" = '類別1'
+                        AND "盤查標準" = 'GHG'
+                        AND "年度" = EXTRACT(YEAR FROM CURRENT_DATE)-1;""",
+        },
+        {
+            "input": "建準廣興廠去年的直接排放總排放量是多少?",
+            "query": """SELECT SUM("排放量(公噸CO2e)") AS "類別1總排放量"
+                        FROM "建準碳排放清冊數據new"
+                        WHERE "事業名稱" like '%建準%'
+                        AND "事業名稱" like '%廣興%'
+                        AND ("類別項目" like '%直接排放%' OR "排放源" like '%直接排放%')
                         AND "盤查標準" = 'GHG'
                         AND "盤查標準" = 'GHG'
                         AND "年度" = EXTRACT(YEAR FROM CURRENT_DATE)-1;""",
                         AND "年度" = EXTRACT(YEAR FROM CURRENT_DATE)-1;""",
         },
         },
         {
         {
-            "input": "建準台北辦事處2022年的能源間接排放總排放量是多少?",
+            "input": "建準台北辦事處2022年的類別2總排放量是多少?",
             "query": """SELECT SUM("排放量(公噸CO2e)") AS "直接排放總排放量"
             "query": """SELECT SUM("排放量(公噸CO2e)") AS "直接排放總排放量"
-                        FROM "建準碳排放清冊數據"
+                        FROM "建準碳排放清冊數據new"
                         WHERE "事業名稱" like '%建準%'
                         WHERE "事業名稱" like '%建準%'
-                        AND "事業名稱" like '%台北辦事處%'
-                        AND ("類別" like '%類別2-能源間接排放%' OR "排放源" like '%類別2-能源間接排放%')
+                        AND "事業名稱" like '%台北%'
+                        AND "類別" = '類別2'
                         AND "盤查標準" = 'GHG'
                         AND "盤查標準" = 'GHG'
                         AND "年度" = 2022;""",
                         AND "年度" = 2022;""",
         },
         },
         {
         {
             "input": "建準去年的固定燃燒總排放量是多少?",
             "input": "建準去年的固定燃燒總排放量是多少?",
             "query": """SELECT SUM("排放量(公噸CO2e)") AS "固定燃燒總排放量"
             "query": """SELECT SUM("排放量(公噸CO2e)") AS "固定燃燒總排放量"
-                        FROM "建準碳排放清冊數據"
+                        FROM "建準碳排放清冊數據new"
                         WHERE "事業名稱" like '%建準%'
                         WHERE "事業名稱" like '%建準%'
-                        AND ("類別" like '%固定燃燒%' OR "排放源" like '%固定燃燒%')
+                        AND ("類別項目" like '%固定燃燒%' OR "排放源" like '%固定燃燒%')
                         AND "盤查標準" = 'GHG'
                         AND "盤查標準" = 'GHG'
                         AND "年度" = EXTRACT(YEAR FROM CURRENT_DATE)-1;""",
                         AND "年度" = EXTRACT(YEAR FROM CURRENT_DATE)-1;""",
         },
         },
@@ -120,9 +132,9 @@ def get_examples():
 
 
 def table_description():
 def table_description():
     database_description = (
     database_description = (
-        "The database consists of following table: `用水度數`, `用水度數`, `建準碳排放清冊數據`. "
+        "The database consists of following table: `用水度數`, `用水度數`, `建準碳排放清冊數據new`."
         "This is a PostgreSQL database, so you need to use postgres-related queries.\n\n"
         "This is a PostgreSQL database, so you need to use postgres-related queries.\n\n"
-        "The `建準碳排放清冊數據` table 描述了不同事業單位或廠房分別在 ISO 14064-1:2018 與 GHG Protocol 標準下的溫室氣體排放量,並依類別1至類別6劃分。"
+        "The `建準碳排放清冊數據new` table 描述了建準電機工業股份有限公司不同據點分別在 ISO 14064-1:2018 與 GHG Protocol 標準下的溫室氣體排放量,並依類別1至類別6劃分。"
         "It includes the following columns:\n"
         "It includes the following columns:\n"
         "- `年度`: 盤查年度\n"
         "- `年度`: 盤查年度\n"
         "- `事業名稱`: 建準據點"
         "- `事業名稱`: 建準據點"
@@ -142,7 +154,7 @@ def table_description():
         "- `盤查標準`: ISO or GHG\n"
         "- `盤查標準`: ISO or GHG\n"
         
         
 
 
-        "The `用電度數` 描述了不同廠房分別在 ISO 14064-1:2018 與 GHG Protocol 標準下的水電使用量。"
+        "The `用電度數` 描述了建準電機工業股份有限公司不同據點分別在 ISO 14064-1:2018 與 GHG Protocol 標準下的水電使用量。"
         "It includes the following columns:\n"
         "It includes the following columns:\n"
         "- `年度`: 盤查年度\n"
         "- `年度`: 盤查年度\n"
         "- `事業名稱`: 建準據點"
         "- `事業名稱`: 建準據點"
@@ -154,7 +166,7 @@ def table_description():
         "- `用電度數(kwh)`: 用電度數,單位為kwh\n"
         "- `用電度數(kwh)`: 用電度數,單位為kwh\n"
         "- `盤查標準`: ISO or GHG\n"
         "- `盤查標準`: ISO or GHG\n"
         
         
-        "The `用水度數` 描述了不同廠房分別在 ISO 14064-1:2018 與 GHG Protocol 標準下的水電使用量。"
+        "The `用水度數` 描述了建準電機工業股份有限公司不同據點分別在 ISO 14064-1:2018 與 GHG Protocol 標準下的水電使用量。"
         "It includes the following columns:\n"
         "It includes the following columns:\n"
         "- `年度`: 盤查年度\n"
         "- `年度`: 盤查年度\n"
         "- `事業名稱`: 建準據點"
         "- `事業名稱`: 建準據點"
@@ -230,8 +242,22 @@ def sql_to_nl_chain(llm):
         <|begin_of_text|><|start_header_id|>system<|end_header_id|>
         <|begin_of_text|><|start_header_id|>system<|end_header_id|>
         Given the following user question, corresponding SQL query, and SQL result, answer the user question.
         Given the following user question, corresponding SQL query, and SQL result, answer the user question.
         根據使用者的問題、對應的 SQL 查詢和 SQL 結果,以繁體中文回答使用者問題。
         根據使用者的問題、對應的 SQL 查詢和 SQL 結果,以繁體中文回答使用者問題。
-
+        ** 請務必在回答中表達是建準的資料,即便問句中並未提及建準。
         
         
+        The following shows some example:
+        Question: 廣興廠去年的類別1總排放量是多少?
+        SQL Query: SELECT SUM("排放量(公噸CO2e)") AS "類別1總排放量"
+                        FROM "建準碳排放清冊數據new"
+                        WHERE "事業名稱" like '%建準%'
+                        AND "事業名稱" like '%廣興%'
+                        AND "類別" = '類別1'
+                        AND "盤查標準" = 'GHG'
+                        AND "年度" = EXTRACT(YEAR FROM CURRENT_DATE)-1;,
+        SQL Result: [(1102.3712,)]
+        Answer: 建準廣興廠去年的類別1總排放量是1102.3712
+
+        如果你不知道答案或SQL query 出現錯誤請回答:"很抱歉,目前我無法回答您的問題,請將您的詢問發送至 test@systex.com 以便獲得更進一步的幫助,謝謝。"
+        勿回答無關資訊
         <|eot_id|>
         <|eot_id|>
 
 
         <|begin_of_text|><|start_header_id|>user<|end_header_id|>
         <|begin_of_text|><|start_header_id|>user<|end_header_id|>
@@ -259,14 +285,19 @@ def get_query(db, question, selected_table, llm):
     # query = query.replace("104_112碰排放公開及建準資料","104_112碳排放公開及建準資料")
     # query = query.replace("104_112碰排放公開及建準資料","104_112碳排放公開及建準資料")
     print(query)
     print(query)
     
     
-    return query
-
-def query_to_nl(db, question, query, llm):
     execute_query = QuerySQLDataBaseTool(db=db)
     execute_query = QuerySQLDataBaseTool(db=db)
     result = execute_query.invoke(query)
     result = execute_query.invoke(query)
     print(result)
     print(result)
 
 
+    return query, result
+
+def query_to_nl(question, query, result, llm):
+    # execute_query = QuerySQLDataBaseTool(db=db)
+    # result = execute_query.invoke(query)
+    # print(result)
+
     chain = sql_to_nl_chain(llm)
     chain = sql_to_nl_chain(llm)
+    print(result)
     answer = chain.invoke({"question": question, "query": query, "result": result})
     answer = chain.invoke({"question": question, "query": query, "result": result})
 
 
     return answer
     return answer
@@ -295,7 +326,7 @@ if __name__ == "__main__":
     
     
     start = time.time()
     start = time.time()
     
     
-    selected_table = ['用水度數', '用水度數', '建準碳排放清冊數據']
+    selected_table = ['用水度數', '用水度數', '建準碳排放清冊數據new']
     question = "建準去年的上游運輸總排放量是多少?"
     question = "建準去年的上游運輸總排放量是多少?"
     # question = "台積電2022年的直接排放總排放量是多少?"
     # question = "台積電2022年的直接排放總排放量是多少?"
     # question = "建準廣興廠去年的灰電使用量"
     # question = "建準廣興廠去年的灰電使用量"