Browse Source

update text-to-sql prompt

ling 4 months ago
parent
commit
b3e8ddabe9
2 changed files with 57 additions and 24 deletions
  1. 3 1
      post_processing_sqlparse.py
  2. 54 23
      text_to_sql_private.py

+ 3 - 1
post_processing_sqlparse.py

@@ -60,7 +60,9 @@ def parse_sql_where(sql):
     column_dict = {
         "排放源": None,
         "類別": None,
-        "項目": None
+        "類別項目": None,
+        "項目": None,
+        
     }
 
     def get_column_details(token, column_args):

+ 54 - 23
text_to_sql_private.py

@@ -76,39 +76,51 @@ llm = ChatOllama(model=local_llm, temperature=0)
 def get_examples():
     examples = [
         {
-            "input": "建準廣興廠2023年的自產電力的綠電使用量是多少?",
-            "query": """SELECT SUM("用電度數(kwh)") AS "自產電力綠電使用量"
+            "input": "建準廣興廠年的自產電力的綠電使用量是多少?",
+            "query": """SELECT SUM("用電度數(kwh)") AS "綠電使用量"
                         FROM "用電度數"
-                        WHERE "項目" = '自產電力(綠電)'
+                        WHERE "項目" like '%綠電%'
+                        AND "事業名稱" like '%建準%'
+                        AND "事業名稱" like '%廣興廠%'
                         AND "盤查標準" = 'GHG'
-                        AND "年度" = 2023;""",
+                        AND "年度" = EXTRACT(YEAR FROM CURRENT_DATE)-1;""",
         },
         {
-            "input": "建準廣興廠去年的類別1總排放量是多少?",
+            "input": "建準北海廠去年的類別1總排放量是多少?",
             "query": """SELECT SUM("排放量(公噸CO2e)") AS "類別1總排放量"
-                        FROM "建準碳排放清冊數據"
+                        FROM "建準碳排放清冊數據new"
                         WHERE "事業名稱" like '%建準%'
-                        AND "事業名稱" like '%廣興廠%'
-                        AND ("類別" like '%類別1-直接排放%' OR "排放源" like '%類別1-直接排放%')
+                        AND "事業名稱" like '%北海%'
+                        AND "類別" = '類別1'
+                        AND "盤查標準" = 'GHG'
+                        AND "年度" = EXTRACT(YEAR FROM CURRENT_DATE)-1;""",
+        },
+        {
+            "input": "建準廣興廠去年的直接排放總排放量是多少?",
+            "query": """SELECT SUM("排放量(公噸CO2e)") AS "類別1總排放量"
+                        FROM "建準碳排放清冊數據new"
+                        WHERE "事業名稱" like '%建準%'
+                        AND "事業名稱" like '%廣興%'
+                        AND ("類別項目" like '%直接排放%' OR "排放源" like '%直接排放%')
                         AND "盤查標準" = 'GHG'
                         AND "年度" = EXTRACT(YEAR FROM CURRENT_DATE)-1;""",
         },
         {
-            "input": "建準台北辦事處2022年的能源間接排放總排放量是多少?",
+            "input": "建準台北辦事處2022年的類別2總排放量是多少?",
             "query": """SELECT SUM("排放量(公噸CO2e)") AS "直接排放總排放量"
-                        FROM "建準碳排放清冊數據"
+                        FROM "建準碳排放清冊數據new"
                         WHERE "事業名稱" like '%建準%'
-                        AND "事業名稱" like '%台北辦事處%'
-                        AND ("類別" like '%類別2-能源間接排放%' OR "排放源" like '%類別2-能源間接排放%')
+                        AND "事業名稱" like '%台北%'
+                        AND "類別" = '類別2'
                         AND "盤查標準" = 'GHG'
                         AND "年度" = 2022;""",
         },
         {
             "input": "建準去年的固定燃燒總排放量是多少?",
             "query": """SELECT SUM("排放量(公噸CO2e)") AS "固定燃燒總排放量"
-                        FROM "建準碳排放清冊數據"
+                        FROM "建準碳排放清冊數據new"
                         WHERE "事業名稱" like '%建準%'
-                        AND ("類別" like '%固定燃燒%' OR "排放源" like '%固定燃燒%')
+                        AND ("類別項目" like '%固定燃燒%' OR "排放源" like '%固定燃燒%')
                         AND "盤查標準" = 'GHG'
                         AND "年度" = EXTRACT(YEAR FROM CURRENT_DATE)-1;""",
         },
@@ -120,9 +132,9 @@ def get_examples():
 
 def table_description():
     database_description = (
-        "The database consists of following table: `用水度數`, `用水度數`, `建準碳排放清冊數據`. "
+        "The database consists of following table: `用水度數`, `用水度數`, `建準碳排放清冊數據new`."
         "This is a PostgreSQL database, so you need to use postgres-related queries.\n\n"
-        "The `建準碳排放清冊數據` table 描述了不同事業單位或廠房分別在 ISO 14064-1:2018 與 GHG Protocol 標準下的溫室氣體排放量,並依類別1至類別6劃分。"
+        "The `建準碳排放清冊數據new` table 描述了建準電機工業股份有限公司不同據點分別在 ISO 14064-1:2018 與 GHG Protocol 標準下的溫室氣體排放量,並依類別1至類別6劃分。"
         "It includes the following columns:\n"
         "- `年度`: 盤查年度\n"
         "- `事業名稱`: 建準據點"
@@ -142,7 +154,7 @@ def table_description():
         "- `盤查標準`: ISO or GHG\n"
         
 
-        "The `用電度數` 描述了不同廠房分別在 ISO 14064-1:2018 與 GHG Protocol 標準下的水電使用量。"
+        "The `用電度數` 描述了建準電機工業股份有限公司不同據點分別在 ISO 14064-1:2018 與 GHG Protocol 標準下的水電使用量。"
         "It includes the following columns:\n"
         "- `年度`: 盤查年度\n"
         "- `事業名稱`: 建準據點"
@@ -154,7 +166,7 @@ def table_description():
         "- `用電度數(kwh)`: 用電度數,單位為kwh\n"
         "- `盤查標準`: ISO or GHG\n"
         
-        "The `用水度數` 描述了不同廠房分別在 ISO 14064-1:2018 與 GHG Protocol 標準下的水電使用量。"
+        "The `用水度數` 描述了建準電機工業股份有限公司不同據點分別在 ISO 14064-1:2018 與 GHG Protocol 標準下的水電使用量。"
         "It includes the following columns:\n"
         "- `年度`: 盤查年度\n"
         "- `事業名稱`: 建準據點"
@@ -230,8 +242,22 @@ def sql_to_nl_chain(llm):
         <|begin_of_text|><|start_header_id|>system<|end_header_id|>
         Given the following user question, corresponding SQL query, and SQL result, answer the user question.
         根據使用者的問題、對應的 SQL 查詢和 SQL 結果,以繁體中文回答使用者問題。
-
+        ** 請務必在回答中表達是建準的資料,即便問句中並未提及建準。
         
+        The following shows some example:
+        Question: 廣興廠去年的類別1總排放量是多少?
+        SQL Query: SELECT SUM("排放量(公噸CO2e)") AS "類別1總排放量"
+                        FROM "建準碳排放清冊數據new"
+                        WHERE "事業名稱" like '%建準%'
+                        AND "事業名稱" like '%廣興%'
+                        AND "類別" = '類別1'
+                        AND "盤查標準" = 'GHG'
+                        AND "年度" = EXTRACT(YEAR FROM CURRENT_DATE)-1;,
+        SQL Result: [(1102.3712,)]
+        Answer: 建準廣興廠去年的類別1總排放量是1102.3712
+
+        如果你不知道答案或SQL query 出現錯誤請回答:"很抱歉,目前我無法回答您的問題,請將您的詢問發送至 test@systex.com 以便獲得更進一步的幫助,謝謝。"
+        勿回答無關資訊
         <|eot_id|>
 
         <|begin_of_text|><|start_header_id|>user<|end_header_id|>
@@ -259,14 +285,19 @@ def get_query(db, question, selected_table, llm):
     # query = query.replace("104_112碰排放公開及建準資料","104_112碳排放公開及建準資料")
     print(query)
     
-    return query
-
-def query_to_nl(db, question, query, llm):
     execute_query = QuerySQLDataBaseTool(db=db)
     result = execute_query.invoke(query)
     print(result)
 
+    return query, result
+
+def query_to_nl(question, query, result, llm):
+    # execute_query = QuerySQLDataBaseTool(db=db)
+    # result = execute_query.invoke(query)
+    # print(result)
+
     chain = sql_to_nl_chain(llm)
+    print(result)
     answer = chain.invoke({"question": question, "query": query, "result": result})
 
     return answer
@@ -295,7 +326,7 @@ if __name__ == "__main__":
     
     start = time.time()
     
-    selected_table = ['用水度數', '用水度數', '建準碳排放清冊數據']
+    selected_table = ['用水度數', '用水度數', '建準碳排放清冊數據new']
     question = "建準去年的上游運輸總排放量是多少?"
     # question = "台積電2022年的直接排放總排放量是多少?"
     # question = "建準廣興廠去年的灰電使用量"