|
@@ -74,21 +74,62 @@ def get_examples():
|
|
|
},
|
|
|
{
|
|
|
"input": "建準廣興廠去年的類別1總排放量是多少?",
|
|
|
- "query": 'SELECT SUM("昆山廣興廠") AS "建準廣興廠類別1總排放量"\nFROM "2023 清冊數據(GHG)"\nWHERE "類別" like \'%類別1%\'',
|
|
|
+ "query": 'SELECT SUM("昆山廣興廠") AS "建準廣興廠類別1總排放量"\nFROM "2023 清冊數據(GHG)"\nWHERE "類別" = \'類別1-直接排放\'',
|
|
|
},
|
|
|
{
|
|
|
"input": "建準廣興廠去年的直接排放總排放量是多少?",
|
|
|
- "query": 'SELECT SUM("昆山廣興廠") AS "建準廣興廠直接排放總排放量"\nFROM "2023 清冊數據(GHG)"\nWHERE "類別" like \'%直接排放%\'',
|
|
|
+ "query": 'SELECT SUM("昆山廣興廠") AS "建準廣興廠直接排放總排放量"\nFROM "2023 清冊數據(GHG)"\nWHERE "類別" = \'類別1-直接排放\'',
|
|
|
},
|
|
|
+ {
|
|
|
+ "input": "建準廣興廠去年的能源間接排放總排放量是多少?",
|
|
|
+ "query": 'SELECT SUM("昆山廣興廠") AS "建準廣興廠直接排放總排放量"\nFROM "2023 清冊數據(GHG)"\nWHERE "類別" = \'類別2-能源間接排放\'',
|
|
|
+ },
|
|
|
+
|
|
|
|
|
|
]
|
|
|
|
|
|
return examples
|
|
|
|
|
|
-def write_query_chain(db):
|
|
|
+def table_description():
|
|
|
+ database_description = (
|
|
|
+ "The database consists of following tables: `2022 清冊數據(ISO)`, `2023 清冊數據(ISO)`, `2022 清冊數據(GHG)`, `2023 清冊數據(GHG)`, `水電使用量(ISO)` and `水電使用量(GHG)`. "
|
|
|
+ "This is a PostgreSQL database, so you need to use postgres-related queries.\n\n"
|
|
|
+ "The `2022 清冊數據(ISO)`, `2023 清冊數據(ISO)`, `2022 清冊數據(GHG)` and `2023 清冊數據(GHG)` table 描述了不同廠房分別在 ISO 14064-1:2018 與 GHG Protocol 標準下的溫室氣體排放量,並依類別1至類別6劃分。"
|
|
|
+ "It includes the following columns:\n"
|
|
|
+ "- `類別`: 溫室氣體的排放類別,包含以下:\n"
|
|
|
+ " \t*類別1-直接排放\n"
|
|
|
+ " \t*類別2-能源間接排放\n"
|
|
|
+ " \t*類別3-運輸間接排放\n"
|
|
|
+ " \t*類別4-組織使用產品間接排放\n"
|
|
|
+ " \t*類別5-使用來自組織產品間接排放\n"
|
|
|
+ " \t*類別6\n"
|
|
|
+ "- `排放源`: `類別`欄位進一步劃分的細項\n"
|
|
|
+ "- `高雄總部&運通廠`: 位於台灣的廠房據點\n"
|
|
|
+ "- `台北辦公室`: 位於台灣的廠房據點\n"
|
|
|
+ "- `北海建準廠`: 位於中國的廠房據點\n"
|
|
|
+ "- `北海立準廠`: 位於中國的廠房據點\n"
|
|
|
+ "- `昆山廣興廠`: 位於中國的廠房據點\n"
|
|
|
+ "- `菲律賓建準廠`: 位於菲律賓的廠房據點\n"
|
|
|
+ "- `India`: 位於印度的廠房據點\n"
|
|
|
+ "- `INC`: 位於美國的廠房據點\n"
|
|
|
+ "- `SAS`: 位於法國的廠房據點\n\n"
|
|
|
+
|
|
|
+ "The `水電使用量(ISO)` and `水電使用量(GHG)` table 描述了不同廠房分別在 ISO 14064-1:2018 與 GHG Protocol 標準下的水電使用量,包含'外購電力 度數 (kwh)'與'自來水 度數 (立方公尺 m³)'。"
|
|
|
+ "The `public.departments_table` table contains information about the various departments in the company. It includes:\n"
|
|
|
+ "- `外購電力(灰電)`: 灰電(火力發電、核能發電等)的外購電力度數(kwh)\n"
|
|
|
+ "- `外購電力(綠電)`: 綠電(太陽光電)的外購電力度數(kwh)\n"
|
|
|
+ "- `自產電力(綠電)`: 綠電(太陽光電)的自產電力度數(kwh)\n"
|
|
|
+ "- `用水量`: 自來水的使用度數(m³)\n\n"
|
|
|
+ )
|
|
|
+
|
|
|
+ return database_description
|
|
|
|
|
|
- template = """<|begin_of_text|><|start_header_id|>user<|end_header_id|>
|
|
|
+def write_query_chain(db):
|
|
|
|
|
|
+ template = """
|
|
|
+ <|begin_of_text|>
|
|
|
+
|
|
|
+ <|start_header_id|>system<|end_header_id|>
|
|
|
Generate a SQL query to answer this question: `{input}`
|
|
|
|
|
|
You are a PostgreSQL expert in ESG field. Given an input question, first create a syntactically correct PostgreSQL query to run,
|
|
@@ -100,12 +141,20 @@ def write_query_chain(db):
|
|
|
|
|
|
***Pay attention to only return query for PostgreSQL WITHOUT "```sql", And DO NOT content any other words.\n\
|
|
|
***Pay attention to only return PostgreSQL query.\n\
|
|
|
-
|
|
|
+ <|eot_id|>
|
|
|
+
|
|
|
+ <|begin_of_text|><|start_header_id|>user<|end_header_id|>
|
|
|
DDL statements:
|
|
|
- {table_info}<|eot_id|><|start_header_id|>assistant<|end_header_id|>
|
|
|
+ {table_info}
|
|
|
+
|
|
|
+ database description:
|
|
|
+ {database_description}
|
|
|
|
|
|
The following SQL query best answers the question `{input}`:
|
|
|
```sql
|
|
|
+ <|eot_id|>
|
|
|
+
|
|
|
+ <|start_header_id|>assistant<|end_header_id|>
|
|
|
"""
|
|
|
# prompt_template = PromptTemplate.from_template(template)
|
|
|
|
|
@@ -133,6 +182,7 @@ def sql_to_nl_chain():
|
|
|
# llm = Ollama(model = "llama3-groq-tool-use:latest", num_gpu=1)
|
|
|
answer_prompt = PromptTemplate.from_template(
|
|
|
"""
|
|
|
+ <|begin_of_text|>
|
|
|
<|begin_of_text|><|start_header_id|>system<|end_header_id|>
|
|
|
Given the following user question, corresponding SQL query, and SQL result, answer the user question.
|
|
|
給定以下使用者問題、對應的 SQL 查詢和 SQL 結果,以繁體中文回答使用者問題。
|
|
@@ -142,11 +192,18 @@ def sql_to_nl_chain():
|
|
|
SQL Query: SELECT SUM("昆山廣興廠") AS "建準廣興廠類別1總排放量"\nFROM "2023 清冊數據(GHG)"\nWHERE "類別" like \'%類別1%\'
|
|
|
SQL Result: [(1102.3712,)]
|
|
|
Answer: 建準廣興廠去年的類別1總排放量是1102.3712
|
|
|
+ <|eot_id|>
|
|
|
|
|
|
+ <|begin_of_text|><|start_header_id|>user<|end_header_id|>
|
|
|
Question: {question}
|
|
|
SQL Query: {query}
|
|
|
SQL Result: {result}
|
|
|
- Answer: """
|
|
|
+ Answer:
|
|
|
+ <|eot_id|>
|
|
|
+
|
|
|
+ <|start_header_id|>assistant<|end_header_id|>
|
|
|
+
|
|
|
+ """
|
|
|
)
|
|
|
|
|
|
chain = answer_prompt | llm | StrOutputParser()
|
|
@@ -156,7 +213,7 @@ def sql_to_nl_chain():
|
|
|
def run(db, question, selected_table):
|
|
|
|
|
|
write_query = write_query_chain(db)
|
|
|
- query = write_query.invoke({"question": question, 'table_names_to_use': selected_table, "top_k": 1000, "table_info":context["table_info"]})
|
|
|
+ query = write_query.invoke({"question": question, 'table_names_to_use': selected_table, "top_k": 1000, "table_info":context["table_info"], "database_description": table_description()})
|
|
|
|
|
|
query = re.split('SQL query: ', query)[-1]
|
|
|
print(query)
|
|
@@ -176,9 +233,10 @@ if __name__ == "__main__":
|
|
|
|
|
|
start = time.time()
|
|
|
|
|
|
- selected_table = ['2022 清冊數據(GHG)', '2022 清冊數據(ISO)', '2023 清冊數據(GHG)', '2023 清冊數據(ISO)']
|
|
|
- question = "去年的固定燃燒總排放量是多少?"
|
|
|
+ selected_table = ['2022 清冊數據(GHG)', '2022 清冊數據(ISO)', '2023 清冊數據(GHG)', '2023 清冊數據(ISO)', '水電使用量(GHG)', '水電使用量(ISO)']
|
|
|
+ question = "建準廣興廠去年的綠電使用量是多少?"
|
|
|
query, result, answer = run(db, question, selected_table)
|
|
|
+ print("question: ", question)
|
|
|
print("query: ", query)
|
|
|
print("result: ", result)
|
|
|
print("answer: ", answer)
|