2 Коміти 889552f0c0 ... f32ae83c55

Автор SHA1 Опис Дата
  ling f32ae83c55 add new column '範疇' 4 місяців тому
  ling 4e2e6c2245 update route prompt 4 місяців тому
3 змінених файлів з 46 додано та 13 видалено
  1. 23 11
      ai_agent.py
  2. 1 0
      post_processing_sqlparse.py
  3. 22 2
      text_to_sql_private.py

+ 23 - 11
ai_agent.py

@@ -80,7 +80,7 @@ def faiss_query(question: str, docs, llm, multi_query: bool = False) -> str:
     )
 
     rag_chain = prompt | llm | StrOutputParser()
-    return rag_chain.invoke({"context": context, "question": question})
+    return docs, rag_chain.invoke({"context": context, "question": question})
 
 ### Hallucination Grader
 
@@ -166,16 +166,21 @@ def generate_additional_detail(sql_query):
     answer = ""
     all_documents = []
     for term in list(set(terms)):
+        print(term)
         if term is None: continue
-        question_format = [f"溫室氣體排放源中的{term}是什麼意思?", f"{term}是什麼意思?"]
+        question_format = [ f"溫室氣體排放源中的{term}是什麼意思?",  f"{term}是什麼意思?"]
+        # f"溫室氣體排放源中的{term}是什麼意思?",
         for question in question_format:
             # question = f"什麼是{term}?"
             documents = retriever.get_relevant_documents(question, k=5)
             all_documents.extend(documents)
             # for doc in documents:
             #     print(doc)
-        all_question = "\n".join(question_format)
-        generation = faiss_query(all_question, all_documents, llm, multi_query=True) + "\n"
+        all_question = "".join(question_format)
+        documents, generation = faiss_query(all_question, all_documents, llm, multi_query=True) 
+        # print(generation)
+        # print("-----------------------")
+        # generation = answer + "\n"
         if "test@systex.com" in generation:
             generation = ""
         
@@ -238,10 +243,9 @@ def Router():
     prompt = PromptTemplate(
         template="""<|begin_of_text|><|start_header_id|>system<|end_header_id|> 
         You are an expert at routing a user question to a 專業知識 or 自有數據. 
-        Use company private data for questions about the informations about a company's greenhouse gas emissions data.
-        Otherwise, use the 專業知識 for questions on ESG field knowledge or news about ESG. 
-        你需要分辨使用者問題是否在詢問公司的自有數據,例如想了解公司的碳排放源數據等等,如果判斷為是,則使用"自有數據",
-        若使用者的問題是想了解碳盤查或碳管理等等的 ESG 知識和相關新聞,請使用"專業知識"。
+        你需要分辨使用者問題是否在詢問某個公司與其據點廠房的自有數據或是尋求專業的碳盤查或碳管理等等的 ESG 知識和相關新聞,
+        如果問題是想了解某個公司與其據點廠房的碳排放源的排放量或用電、用水量等等,請使用"自有數據",
+        若使用者的問題是想了解碳盤查、碳交易或碳管理等等的 ESG 知識和相關新聞,請使用"專業知識"。
         You do not need to be stringent with the keywords in the question related to these topics. 
         Give a binary choice '自有數據' or '專業知識' based on the question. 
         Return the a JSON with a single key 'datasource' and no premable or explanation. 
@@ -319,7 +323,9 @@ def retrieve_and_generation(state):
             
         # docs_documents = "\n\n".join(doc.page_content for doc in documents)
         # print(documents)
-        generation = faiss_query(question, documents, llm, multi_query=True)
+        documents, generation = faiss_query(question, documents, llm, multi_query=True)
+        # for doc in documents:
+        #     print(doc)
     else:
         generation = state["generation"]
         
@@ -328,7 +334,8 @@ def retrieve_and_generation(state):
             documents = retriever.get_relevant_documents(sub_question, k=5)
             # for doc in documents:
             #     print(doc)
-            generation += faiss_query(sub_question, documents, llm, multi_query=True)
+            documents, answer = faiss_query(sub_question, documents, llm, multi_query=True)
+            generation += answer
             generation += "\n"
             
     print(generation)
@@ -443,8 +450,13 @@ def route_question(state):
     # print(question)
     question_router = Router()
     source = question_router.invoke({"question": question})
-    if "建準" in question:
+    print("Original:", source["datasource"])
+    # if "建準" in question:
+    kw = ["建準", "北海", "廣興", "崑山廣興", "Inc", "SAS", "立準"]
+    if any(char in question for char in kw):
         source["datasource"] = "自有數據"
+    elif "範例" in question:
+        source["datasource"] = "專業知識"
         
     # print(source)
     print(source["datasource"])

+ 1 - 0
post_processing_sqlparse.py

@@ -60,6 +60,7 @@ def parse_sql_where(sql):
     column_dict = {
         "排放源": None,
         "類別": None,
+        "範疇": None,
         "類別項目": None,
         "項目": None,
         

+ 22 - 2
text_to_sql_private.py

@@ -109,7 +109,17 @@ def get_examples():
                         AND "年度" = EXTRACT(YEAR FROM CURRENT_DATE)-1;""",
         },
         {
-            "input": "建準北海廠去年的類別1總排放量",
+            "input": "廣興廠2023綠電使用量",
+            "query": """SELECT SUM("用電度數(kwh)") AS "綠電使用量"
+                        FROM "用電度數"
+                        WHERE "項目" like '%綠電%'
+                        AND "事業名稱" like '%建準%'
+                        AND "據點" = '昆山廣興廠'
+                        AND "盤查標準" = 'GHG'
+                        AND "年度" = 2023;""",
+        },
+        {
+            "input": "北海廠去年的類別1總排放量",
             "query": """SELECT SUM("排放量(公噸CO2e)") AS "類別1總排放量"
                         FROM "建準碳排放清冊數據new"
                         WHERE "事業名稱" like '%建準%'
@@ -119,7 +129,7 @@ def get_examples():
                         AND "年度" = EXTRACT(YEAR FROM CURRENT_DATE)-1;""",
         },
         {
-            "input": "建準廣興廠去年的直接排放總排放量是多少?",
+            "input": "廣興廠去年的直接排放總排放量是多少?",
             "query": """SELECT SUM("排放量(公噸CO2e)") AS "直接排放總排放量"
                         FROM "建準碳排放清冊數據new"
                         WHERE "事業名稱" like '%建準%'
@@ -178,6 +188,15 @@ def get_examples():
                         AND "盤查標準" = 'GHG'
                         AND "年度" = EXTRACT(YEAR FROM CURRENT_DATE)-2;""",
         },
+        {
+            "input": "建準去年範疇三排放量",
+            "query": """SELECT SUM("排放量(公噸CO2e)") AS "範疇三排放量"
+                        FROM "建準碳排放清冊數據new"
+                        WHERE "事業名稱" like '%建準%'
+                        AND "範疇" = '範疇三'
+                        AND "盤查標準" = 'GHG'
+                        AND "年度" = EXTRACT(YEAR FROM CURRENT_DATE)-1;""",
+        },
 
 
 
@@ -195,6 +214,7 @@ def table_description():
         "- `事業名稱`: 公司名稱"
         "- `據點`: 建準廠房據點 include '高雄總部及運通廠', '台北辦事處', '昆山廣興廠', '北海建準廠', '北海立準廠', '菲律賓建準廠', 'Inc', 'SAS', 'India'"
         "- `國家`: 據點所在國家"
+        "- `範疇`: 碳盤查中把溫室氣體排放源分成三大範疇"
         "- `類別`: 溫室氣體的排放類別,包含以下選項:\n"
         "   \t*類別1-直接排放:\n"
         "   \t*類別2-能源間接排放\n"