Преглед изворни кода

commit before switch branch

SherryLiu пре 7 месеци
родитељ
комит
fb0200e2e4
13 измењених фајлова са 1941 додато и 0 уклоњено
  1. 17 0
      .gitignore
  2. 1 0
      =40.0.0
  3. 2 0
      Modelfile
  4. 198 0
      RAG_app.py
  5. 20 0
      README_docker.md
  6. 11 0
      docker-compose.yml
  7. 15 0
      dockerfile
  8. 222 0
      faiss_index.py
  9. 74 0
      ollama_chat.py
  10. 6 0
      requirements_semantic_search.txt
  11. 1339 0
      semantic_cache.ipynb
  12. 30 0
      taide_rag.py
  13. 6 0
      test_data.txt

+ 17 - 0
.gitignore

@@ -0,0 +1,17 @@
+chroma_db
+chroma_db_ans_embedding/
+chroma_db_carbon_questions/
+dump.rdb
+venv
+.env
+.langchain.db
+sqlite.db
+__pycache__
+documents_rows.csv
+environment.env
+faiss_index.bin
+faiss_metadata.pkl
+main.log
+out.gv
+out.png
+QA_database_rows.csv 

+ 1 - 0
=40.0.0

@@ -0,0 +1 @@
+Requirement already satisfied: setuptools in /opt/anaconda3/envs/choozemo-carbon/lib/python3.9/site-packages (70.1.1)

+ 2 - 0
Modelfile

@@ -0,0 +1,2 @@
+FROM "path to the .gguf file"
+# TAIDE .gguf file can be downlaoded here https://huggingface.co/taide/TAIDE-LX-7B-Chat-4bit/tree/main?show_file_info=taide-7b-a.2-q4_k_m.gguf

+ 198 - 0
RAG_app.py

@@ -0,0 +1,198 @@
+from dotenv import load_dotenv
+load_dotenv('environment.env')
+
+from fastapi import FastAPI, Request, HTTPException, status, Body
+# from fastapi.templating import Jinja2Templates
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import FileResponse
+from fastapi import Depends
+from contextlib import asynccontextmanager
+from pydantic import BaseModel
+from typing import List, Optional
+import uvicorn
+
+from typing import List, Optional
+import sqlparse
+from sqlalchemy import create_engine
+import pandas as pd
+#from retrying import retry
+import datetime
+import json
+from json import loads
+import pandas as pd
+import time
+from langchain.callbacks import get_openai_callback
+
+from langchain_community.vectorstores import Chroma
+from langchain_openai import OpenAIEmbeddings
+from RAG_strategy import multi_query, naive_rag, naive_rag_for_qapairs
+from Indexing_Split import create_retriever as split_retriever
+from Indexing_Split import gen_doc_from_database, gen_doc_from_history
+
+import os
+from langchain_community.vectorstores import SupabaseVectorStore
+from langchain_openai import OpenAIEmbeddings
+from supabase.client import Client, create_client
+from add_vectordb import GetVectorStore
+from langchain_community.cache import RedisSemanticCache  # 更新导入路径
+from langchain_core.prompts import PromptTemplate
+import openai
+
+openai_api_key = os.getenv("OPENAI_API_KEY")
+openai.api_key = openai_api_key
+URI = os.getenv("SUPABASE_URI")
+
+global_retriever = None
+
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    global global_retriever
+    global vector_store
+    
+    start = time.time()
+    # global_retriever = split_retriever(path='./Documents', extension="docx")
+    # global_retriever = raptor_retriever(path='../Documents', extension="txt")
+    # global_retriever = unstructured_retriever(path='../Documents')
+
+    supabase_url = os.getenv("SUPABASE_URL")
+    supabase_key = os.getenv("SUPABASE_KEY")
+    document_table = "documents"
+    supabase: Client = create_client(supabase_url, supabase_key)
+
+    embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)
+    vector_store = GetVectorStore(embeddings, supabase, document_table)
+    global_retriever = vector_store.as_retriever(search_kwargs={"k": 4})
+
+    print(time.time() - start)
+    yield
+
+def get_retriever():
+    return global_retriever
+
+
+def get_vector_store():
+    return vector_store
+
+app = FastAPI(lifespan=lifespan)
+
+# templates = Jinja2Templates(directory="temp")
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+
+@app.get("/answer2")
+def multi_query_answer(question, retriever=Depends(get_retriever)):
+    start = time.time()
+
+    with get_openai_callback() as cb:
+        # qa_doc = gen_doc_from_database()
+        # qa_history_doc = gen_doc_from_history()
+        # qa_doc.extend(qa_history_doc)
+        # vectorstore = Chroma.from_documents(documents=qa_doc, embedding=OpenAIEmbeddings(), collection_name="qa_pairs")
+        # retriever_qa = vectorstore.as_retriever(search_kwargs={"k": 3})
+        # final_answer, reference_docs = naive_rag_for_qapairs(question, retriever_qa)
+        final_answer = 'False'
+        if final_answer == 'False':
+            final_answer, reference_docs = multi_query(question, retriever, chat_history=[])
+
+    # print(CHAT_HISTORY)
+    
+    # with get_openai_callback() as cb:
+    #     final_answer, reference_docs = multi_query(question, retriever)
+    processing_time = time.time() - start
+    print(processing_time)
+    save_history(question, final_answer, reference_docs, cb, processing_time)
+
+    return {"Answer": final_answer}
+
+class ChatHistoryItem(BaseModel):
+    q: str
+    a: str
+
+@app.post("/answer_with_history")
+def multi_query_answer(question: Optional[str] = '', chat_history: List[ChatHistoryItem] = Body(...), retriever=Depends(get_retriever)):
+    start = time.time()
+    
+    chat_history = [(item.q, item.a) for item in chat_history if item.a != ""]
+    print(chat_history)
+
+    # TODO: similarity search
+    
+    with get_openai_callback() as cb:
+        final_answer, reference_docs = multi_query(question, retriever, chat_history)
+    processing_time = time.time() - start
+    print(processing_time)
+    save_history(question, final_answer, reference_docs, cb, processing_time)
+
+    return {"Answer": final_answer}
+
+
+@app.post("/answer_with_history2")
+def multi_query_answer(question: Optional[str] = '', extension: Optional[str] = 'pdf', chat_history: List[ChatHistoryItem] = Body(...), retriever=Depends(get_retriever)):
+    start = time.time()
+
+    retriever = vector_store.as_retriever(search_kwargs={"k": 4,
+                                                         'filter': {'extension':extension}})
+    
+    chat_history = [(item.q, item.a) for item in chat_history if item.a != ""]
+    print(chat_history)
+
+    # TODO: similarity search
+    
+    with get_openai_callback() as cb:
+        final_answer, reference_docs = multi_query(question, retriever, chat_history)
+    processing_time = time.time() - start
+    print(processing_time)
+    save_history(question, final_answer, reference_docs, cb, processing_time)
+
+    return {"Answer": final_answer}
+
+def save_history(question, answer, reference, cb, processing_time):
+    # reference = [doc.dict() for doc in reference]
+    record = {
+        'Question': [question],
+        'Answer': [answer],
+        'Total_Tokens': [cb.total_tokens],
+        'Total_Cost': [cb.total_cost],
+        'Processing_time': [processing_time],
+        'Contexts': [str(reference)]
+    }
+    df = pd.DataFrame(record)
+    engine = create_engine(URI)
+    df.to_sql(name='systex_records', con=engine, index=False, if_exists='append')
+
+class history_output(BaseModel):
+    Question: str
+    Answer: str
+    Contexts: str
+    Total_Tokens: int
+    Total_Cost: float
+    Processing_time: float
+    Time: datetime.datetime
+    
+@app.get('/history', response_model=List[history_output])
+async def get_history():
+    engine = create_engine(URI, echo=True)
+
+    df = pd.read_sql_table("systex_records", engine.connect())  
+    df.fillna('', inplace=True)
+    result = df.to_json(orient='index', force_ascii=False)
+    result = loads(result)
+    return result.values()
+
+@app.get("/")
+def read_root():
+    return {"message": "Welcome to the SYSTEX API"}
+
+
+if __name__ == "__main__":
+    uvicorn.run("RAG_app:app", host='127.0.0.1', port=8081, reload=True)
+    
+# if __name__ == "__main__":
+#     uvicorn.run("RAG_app:app", host='cmm.ai', port=8081, reload=True, ssl_keyfile="/etc/letsencrypt/live/cmm.ai/privkey.pem", 
+#                 ssl_certfile="/etc/letsencrypt/live/cmm.ai/fullchain.pem")
+

+ 20 - 0
README_docker.md

@@ -0,0 +1,20 @@
+# Run TAIDE RAG in VM
+
+## Prerequisites
+- Docker and Docker Compose
+- Ollama (for creating the taide-local model)
+- Download TAIDE .gguf file. https://huggingface.co/taide/TAIDE-LX-7B-Chat-4bit/tree/main?show_file_info=taide-7b-a.2-q4_k_m.gguf Update the file path in `Modelfile`
+
+## Setting up taide-local
+
+1. Install Ollama on your VM 
+`curl -fsSL https://ollama.com/install.sh | sh`
+2. Create the taide-local model
+`ollama create taide-local -f Modelfile`
+
+## Running the Applciation
+1. Clone this repository.
+`git clone -b public https://github.com/yourusername/your-repo.git`
+2. Create a `.env` file in the project root with your API keys.
+3. Run `docker-compose up --build` 
+4. The application will be available at `http://localhost:8000`.

+ 11 - 0
docker-compose.yml

@@ -0,0 +1,11 @@
+services:
+  app:
+    build: .
+    ports:
+      - "8081:8081"
+    environment:
+      - OPENAI_API_KEY=${OPENAI_API_KEY}
+      - SUPABASE_URL=${SUPABASE_URL}
+      - SUPABASE_KEY=${SUPABASE_KEY}
+    volumes:
+      - ./Documents:/app/Documents

+ 15 - 0
dockerfile

@@ -0,0 +1,15 @@
+FROM python:3.9
+
+WORKDIR /app
+
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+
+# Copy application files
+COPY RAG_strategy.py .
+COPY RAG_app_copy.py .
+COPY add_vectordb.py .
+COPY Indexing_Split.py
+COPY Documents/ ./Documents/
+
+CMD ["python", "RAG_app_copy.py"]

+ 222 - 0
faiss_index.py

@@ -0,0 +1,222 @@
+import faiss
+import numpy as np
+import json
+from tqdm import tqdm
+from time import time
+from RAG_strategy import multi_query
+from ragas import evaluate
+from ragas.metrics import answer_relevancy, faithfulness, context_recall, context_precision
+from datasets import Dataset
+from typing import List, Callable
+
+from dotenv import load_dotenv
+load_dotenv('environment.env')
+import os
+import pickle
+from supabase.client import Client, create_client
+from langchain_openai import OpenAIEmbeddings
+from add_vectordb import GetVectorStore
+from sqlalchemy import create_engine
+import pandas as pd
+from langchain_core.documents import Document
+
+# Load environment variables
+supabase_url = os.getenv("SUPABASE_URL")
+supabase_key = os.getenv("SUPABASE_KEY")
+openai_api_key = os.getenv("OPENAI_API_KEY")
+document_table = "documents"
+URI = os.getenv("SUPABASE_URI")
+
+# Initialize Supabase client
+supabase: Client = create_client(supabase_url, supabase_key)
+
+# Initialize embeddings
+embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)
+
+# Initialize original vector store
+original_vector_store = GetVectorStore(embeddings, supabase, document_table)
+original_retriever = original_vector_store.as_retriever(search_kwargs={"k": 4})
+
+
+
+def download_embeddings():
+    response = supabase.table(document_table).select("id, embedding, metadata, content").execute()
+    embeddings = []
+    ids = []
+    metadatas = []
+    contents = []
+    for item in response.data:
+        # Parse the embedding string into a list of floats
+        embedding = json.loads(item['embedding'])
+        embeddings.append(embedding)
+        ids.append(item['id'])
+        metadatas.append(item['metadata'])
+        contents.append(item['content'])
+    return np.array(embeddings, dtype=np.float32), ids, metadatas, contents
+
+def create_faiss_index(embeddings):
+    dimension = embeddings.shape[1]
+    index = faiss.IndexFlatL2(dimension)
+    index.add(embeddings)
+    return index
+
+def save_faiss_index(index, file_path):
+    faiss.write_index(index, file_path)
+    print(f"FAISS index saved to {file_path}")
+
+def load_faiss_index(file_path):
+    if os.path.exists(file_path):
+        index = faiss.read_index(file_path)
+        print(f"FAISS index loaded from {file_path}")
+        return index
+    return None
+
+def save_metadata(ids, metadatas, contents, file_path):
+    with open(file_path, 'wb') as f:
+        pickle.dump((ids, metadatas, contents), f)
+    print(f"Metadata saved to {file_path}")
+
+def load_metadata(file_path):
+    if os.path.exists(file_path):
+        with open(file_path, 'rb') as f:
+            ids, metadatas, contents = pickle.load(f)
+        print(f"Metadata loaded from {file_path}")
+        return ids, metadatas, contents
+    return None, None, None
+
+def search_faiss(index, query_vector, k=4):
+    # Convert query_vector to a numpy array if it's not already
+    if not isinstance(query_vector, np.ndarray):
+        query_vector = np.array(query_vector)
+    
+    # Ensure the query_vector is 2D
+    if query_vector.ndim == 1:
+        query_vector = query_vector.reshape(1, -1)
+    
+    distances, indices = index.search(query_vector, k)
+    return distances[0], indices[0]
+
+class FAISSRetriever:
+    def __init__(self, index, ids, metadatas, contents):
+        self.index = index
+        self.ids = ids
+        self.metadatas = metadatas
+        self.contents = contents
+        self.embeddings_model = OpenAIEmbeddings(openai_api_key=openai_api_key)
+
+    def get_relevant_documents(self, query: str) -> List[Document]:
+        query_vector = self.embeddings_model.embed_query(query)
+        _, indices = search_faiss(self.index, query_vector)
+        return [
+            Document(page_content=self.contents[i], metadata=self.metadatas[i])
+            for i in indices
+        ]
+
+    def as_retriever(self, search_kwargs=None):
+        return self
+
+    def map(self) -> Callable[[List[str]], List[List[Document]]]:
+        def _map(queries: List[str]) -> List[List[Document]]:
+            return [self.get_relevant_documents(query) for query in queries]
+        return _map
+
+def load_qa_pairs():
+    df = pd.read_csv("QA_database_rows.csv")
+    return df['Question'].tolist(), df['Answer'].tolist()
+
+if __name__ == "__main__":
+    faiss_index_path = "faiss_index.bin"
+    metadata_path = "faiss_metadata.pkl"
+
+    # Try to load existing FAISS index and metadata
+    index = load_faiss_index(faiss_index_path)
+    ids, metadatas, contents = load_metadata(metadata_path)
+
+    if index is None or ids is None:
+        print("FAISS index or metadata not found. Creating new index...")
+        print("Downloading embeddings from Supabase...")
+        embeddings, ids, metadatas, contents = download_embeddings()
+
+        print("Creating FAISS index...")
+        index = create_faiss_index(embeddings)
+
+        # Save the index and metadata
+        save_faiss_index(index, faiss_index_path)
+        save_metadata(ids, metadatas, contents, metadata_path)
+    else:
+        print("Using existing FAISS index and metadata.")
+
+    print("Creating FAISS retriever...")
+    faiss_retriever = FAISSRetriever(index, ids, metadatas, contents)
+
+    # Load QA pairs from database
+    questions, ground_truths = load_qa_pairs()
+
+    # Compare performance
+    for question, ground_truth in zip(questions, ground_truths):
+        print(f"\nQuestion: {question}")
+
+        # Measure time for FAISS retrieval
+        start_time = time()
+        faiss_answer, faiss_docs = multi_query(question, faiss_retriever, chat_history=[])
+        faiss_time = time() - start_time
+        print(f"FAISS Answer: {faiss_answer}")
+        print(f"FAISS Time: {faiss_time:.4f} seconds")
+
+        # Measure time for original retrieval
+        start_time = time()
+        original_answer, original_docs = multi_query(question, original_retriever, chat_history=[])
+        original_time = time() - start_time
+        print(f"Original Answer: {original_answer}")
+        print(f"Original Time: {original_time:.4f} seconds")
+
+        # RAGAS evaluation for FAISS
+        faiss_datasets = {
+            "question": [question],
+            "answer": [faiss_answer],
+            "contexts": [[doc.page_content for doc in faiss_docs]],
+            "ground_truths": [[ground_truth]]
+        }
+        faiss_evalsets = Dataset.from_dict(faiss_datasets)
+
+        faiss_result = evaluate(
+            faiss_evalsets,
+            metrics=[
+                context_precision,
+                faithfulness,
+                answer_relevancy,
+                context_recall,
+            ],
+        )
+
+        print("FAISS RAGAS Evaluation:")
+        print(faiss_result.to_pandas())
+
+        # RAGAS evaluation for Original retriever
+        original_datasets = {
+            "question": [question],
+            "answer": [original_answer],
+            "contexts": [[doc.page_content for doc in original_docs]],
+            "ground_truths": [[ground_truth]]
+        }
+        original_evalsets = Dataset.from_dict(original_datasets)
+
+        original_result = evaluate(
+            original_evalsets,
+            metrics=[
+                context_precision,
+                faithfulness,
+                answer_relevancy,
+                context_recall,
+            ],
+        )
+
+        print("Original RAGAS Evaluation:")
+        print(original_result.to_pandas())
+
+    print("\nPerformance comparison complete.")
+
+# Key points:
+
+# If both the index and metadata (ids, metadatas, contents) are found, we don't need to download embeddings or recreate the index. The FAISS index already contains the embeddings.
+# The FAISSRetriever class doesn't need the raw embeddings. It uses the FAISS index for similarity search and the metadata (ids, metadatas, contents) for returning document information.

+ 74 - 0
ollama_chat.py

@@ -0,0 +1,74 @@
+import subprocess
+import json
+from typing import Any, List, Optional, Dict
+from langchain_core.callbacks import CallbackManagerForLLMRun
+from langchain_core.language_models import BaseChatModel
+from langchain_core.messages import BaseMessage, AIMessage, HumanMessage
+from langchain_core.outputs import ChatResult, ChatGeneration
+from pydantic import Field
+
+class OllamaChatModel(BaseChatModel):
+    model_name: str = Field(default="taide-local")
+
+    def _generate(
+        self,
+        messages: List[BaseMessage],
+        stop: Optional[List[str]] = None,
+        run_manager: Optional[CallbackManagerForLLMRun] = None,
+        **kwargs: Any,
+    ) -> ChatResult:
+        prompt = "\n".join([f"{msg.__class__.__name__}: {msg.content}" for msg in messages])
+        
+        command = ["ollama", "run", self.model_name, prompt]
+        result = subprocess.run(command, capture_output=True, text=True)
+        
+        if result.returncode != 0:
+            raise Exception(f"Ollama command failed: {result.stderr}")
+        
+        content = result.stdout.strip()
+        
+        message = AIMessage(content=content)
+        generation = ChatGeneration(message=message)
+        return ChatResult(generations=[generation])
+
+    @property
+    def _llm_type(self) -> str:
+        return "ollama-chat-model"
+
+def check_model_availability(model_name: str):
+    result = subprocess.run(["ollama", "list"], capture_output=True, text=True)
+    if result.returncode != 0:
+        print(f"Error checking model availability: {result.stderr}")
+        return False
+    
+    models = result.stdout.splitlines()
+    return any(model_name in model for model in models)
+
+# Usage example
+if __name__ == "__main__":
+    model_name = "taide-local"
+
+    print(f"Checking availability of model {model_name}...")
+    if not check_model_availability(model_name):
+        print(f"Model {model_name} is not available. Please check if it's correctly installed in Ollama.")
+        exit(1)
+
+    model = OllamaChatModel(model_name=model_name)
+    
+    print(f"Starting chat with {model_name} model. Type 'exit' to quit.")
+    
+    messages = []
+    while True:
+        user_input = input("You: ")
+        if user_input.lower() == 'exit':
+            break
+        
+        messages.append(HumanMessage(content=user_input))
+        try:
+            response = model.invoke(messages)
+            print("AI:", response.content)
+            messages.append(AIMessage(content=response.content))
+        except Exception as e:
+            print(f"Error communicating with Ollama: {e}")
+
+print("Chat session ended. Goodbye!")

+ 6 - 0
requirements_semantic_search.txt

@@ -0,0 +1,6 @@
+python-dotenv
+openai
+langchain-openai
+langchain-community
+langchain-chroma
+chromadb

+ 1339 - 0
semantic_cache.ipynb

@@ -0,0 +1,1339 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "### Python = 3.9\n",
+    "import os\n",
+    "from dotenv import load_dotenv\n",
+    "load_dotenv('environment.env')\n",
+    "\n",
+    "import openai \n",
+    "openai_api_key = os.getenv(\"OPENAI_API_KEY\")\n",
+    "openai.api_key = openai_api_key\n",
+    "\n",
+    "from langchain_openai import OpenAIEmbeddings\n",
+    "embeddings_model = OpenAIEmbeddings()\n",
+    "\n",
+    "from langchain_community.document_loaders.csv_loader import CSVLoader\n",
+    "from langchain_chroma import Chroma\n",
+    "\n",
+    "import pandas as pd\n",
+    "import re\n",
+    "# from supabase import create_client, Client \n",
+    "# supabase_url = os.getenv(\"SUPABASE_URL\")\n",
+    "# supabase_key = os.getenv(\"SUPABASE_KEY\")\n",
+    "# supabase: Client = create_client(supabase_url, supabase_key)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "############## Load data from csv ###############\n",
+    "def extract_question(text):\n",
+    "    match = re.search(r'Question: (.+?)(?:\\n|$)', text)\n",
+    "    if match:\n",
+    "        return match.group(1)\n",
+    "    return None\n",
+    "\n",
+    "\n",
+    "loader = CSVLoader(file_path=\"/QA_database.csv\")\n",
+    "data = loader.load()\n",
+    "\n",
+    "questions = [extract_question(doc.page_content) for doc in data]\n",
+    "\n",
+    "# ########### load data from supabase ##########\n",
+    "# embeddings_model = OpenAIEmbeddings()\n",
+    "# response = supabase.table(\"tablename\").select(\"question\").execute()\n",
+    "# data = response.data \n",
+    "# created_at = []\n",
+    "# question = []\n",
+    "# ids = []\n",
+    "# answer = []\n",
+    "# video_url = []\n",
+    "\n",
+    "# for item in data:\n",
+    "#     ids.append(item['id'])\n",
+    "#     created_at.append(item['created_at'])\n",
+    "#     question.append(item['question'])\n",
+    "#     answer.append(item['answer'])\n",
+    "#     video_url.append(item['video_url'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "######### generate embedding ###########\n",
+    "embedding = embeddings_model.embed_documents(questions)\n",
+    "\n",
+    "########## Write embedding to the supabase table  #######\n",
+    "# for id, new_embedding in zip(ids, embedding):\n",
+    "#     supabase.table(\"tablename\").insert({\"embedding\": embedding.tolist()}).eq(\"id\", id).execute()\n",
+    "\n",
+    "########### Vector Store #############\n",
+    "# Put pre-compute embeddings to vector store. ## save to disk\n",
+    "vectorstore = Chroma.from_texts(\n",
+    "    texts=questions,\n",
+    "    embedding=embeddings_model,\n",
+    "    persist_directory=\"./chroma_db_carbon_questions\"\n",
+    "    )"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "###### load from disk  #######\n",
+    "vectorstore = Chroma(persist_directory=\"./chroma_db_carbon_questions\", embedding_function=embeddings_model)\n",
+    "\n",
+    "####### Query it #########\n",
+    "def evaluation(target, query, SIMILARITY_THRESHOLD=0.83):\n",
+    "    #### 參考QA_database的標準問題,給予幾個換句話說問題,目標是讓兩者能夠被exact match。\n",
+    "    #### 若無法被exact match,則檢查兩者的similarity score是否在指定的標準以上,並回傳最近似問題\n",
+    "    correct = 0\n",
+    "    count = 0   \n",
+    "    print(f\"問題:{target[0]}\")\n",
+    "    for q in query:\n",
+    "        docs_and_scores = vectorstore.similarity_search_with_relevance_scores(q, k=1)\n",
+    "        doc, score = docs_and_scores[0]\n",
+    "        print(f\"Variation{count+1}:{q}\")\n",
+    "        if doc.page_content in target: \n",
+    "            print(f\"找到Target | score:{round(score, 2)}\")\n",
+    "            correct += 1 \n",
+    "        elif score >= SIMILARITY_THRESHOLD: \n",
+    "            print(f\"找到近似問題:{doc.page_content} | score:{round(score, 2)}\")\n",
+    "        else:\n",
+    "            print(f\"沒有找到 | score:{round(score, 2)}\")\n",
+    "        count += 1\n",
+    "    print(f\"Accuracy rate {correct} / {count}\")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "問題:台灣為什麼要制定氣候變遷因應法?\n",
+      "Variation1:氣候變遷因應法是什麼時候制定的\n",
+      "找到Target:score:0.91\n",
+      "Variation2:氣候變遷因應法有什麼內容\n",
+      "找到Target:score:0.87\n",
+      "Variation3:氣候變遷有哪些法律\n",
+      "找到Target:score:0.85\n",
+      "Variation4:台灣有哪些氣候法規\n",
+      "找到近似問題:台灣氣候變遷法是否已設立各階段管制目標? | score:0.87\n",
+      "Variation5:氣候變遷的影響\n",
+      "沒有找到 | score:0.83\n",
+      "Accuracy rate 3 / 5\n"
+     ]
+    }
+   ],
+   "source": [
+    "target = [\"台灣為什麼要制定氣候變遷因應法?\"]\n",
+    "query = [\"氣候變遷因應法是什麼時候制定的\", \"氣候變遷因應法有什麼內容\", \"氣候變遷有哪些法律\", \"台灣有哪些氣候法規\", \"氣候變遷的影響\"]\n",
+    "evaluation(target, query)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "問題:台灣為什麼要制定氣候變遷因應法?\n",
+      "Variation1:氣候變遷因應法是什麼時候制定的\n",
+      "找到Target:score:0.91\n",
+      "Variation2:氣候變遷因應法有什麼內容\n",
+      "找到Target:score:0.87\n",
+      "Variation3:氣候變遷有哪些法律\n",
+      "找到Target:score:0.85\n",
+      "Variation4:台灣有哪些氣候法規\n",
+      "找到近似問題:台灣氣候變遷法是否已設立各階段管制目標? | score:0.87\n",
+      "Variation5:氣候變遷的影響\n",
+      "沒有找到 | score:0.83\n",
+      "Accuracy rate 3 / 5\n"
+     ]
+    }
+   ],
+   "source": [
+    "target = [\"台灣為什麼要制定氣候變遷因應法?\"]\n",
+    "query = [\"氣候變遷因應法是什麼時候制定的\", \"氣候變遷因應法有什麼內容\", \"氣候變遷有哪些法律\", \"台灣有哪些氣候法規\", \"氣候變遷的影響\"]\n",
+    "evaluation(target, query)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "問題:國家溫室氣體長期減量目標\n",
+      "Variation1:溫室氣體減量\n",
+      "找到近似問題:溫室氣體 | score:0.92\n",
+      "Variation2:台灣溫室氣體減量目標\n",
+      "找到Target:score:0.94\n",
+      "Variation3:台灣有溫室氣體減量目標嗎\n",
+      "找到Target:score:0.91\n",
+      "Variation4:我們有溫室氣體減量目標嗎\n",
+      "找到Target:score:0.88\n",
+      "Variation5:溫室氣體要怎麼減量\n",
+      "找到近似問題:溫室氣體 | score:0.88\n",
+      "Accuracy rate 3 / 5\n"
+     ]
+    }
+   ],
+   "source": [
+    "target = [\"國家溫室氣體長期減量目標\"]\n",
+    "query = [\"溫室氣體減量\", \"台灣溫室氣體減量目標\", \"台灣有溫室氣體減量目標嗎\", \"我們有溫室氣體減量目標嗎\", \"溫室氣體要怎麼減量\"]\n",
+    "evaluation(target, query)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "問題:淨零排放意涵為何\n",
+      "Variation1:零排放是什麼\n",
+      "找到Target:score:0.87\n",
+      "Variation2:解釋淨零\n",
+      "找到Target:score:0.82\n",
+      "Variation3:為什麼要淨零\n",
+      "找到Target:score:0.85\n",
+      "Variation4:台灣要把溫室氣體排放歸零嗎\n",
+      "找到近似問題:組織溫室氣體排放量可以用碳權抵減嗎? | score:0.85\n",
+      "Variation5:零排放\n",
+      "找到Target:score:0.82\n",
+      "Accuracy rate 4 / 5\n"
+     ]
+    }
+   ],
+   "source": [
+    "target = [\"淨零排放意涵為何\"]\n",
+    "query = [\"零排放是什麼\", \"解釋淨零\", \"為什麼要淨零\", \"台灣要把溫室氣體排放歸零嗎\", \"零排放\"]\n",
+    "evaluation(target, query)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "問題:是否已設立各階段管制目標\n",
+      "Variation1:碳排放的管制目標有什麼\n",
+      "沒有找到 | score:0.81\n",
+      "Variation2:管制碳排放的階段目標\n",
+      "找到近似問題:國家溫室氣體長期減量目標 | score:0.86\n",
+      "Variation3:簡短介紹階段性管制目標\n",
+      "找到Target:score:0.88\n",
+      "Variation4:設立管制目標\n",
+      "找到Target:score:0.9\n",
+      "Variation5:溫室氣體排放的管制\n",
+      "找到近似問題:溫室氣體 | score:0.9\n",
+      "Accuracy rate 2 / 5\n"
+     ]
+    }
+   ],
+   "source": [
+    "target = [\"是否已設立各階段管制目標\"]\n",
+    "query = [\"碳排放的管制目標有什麼\", \"管制碳排放的階段目標\", \"簡短介紹階段性管制目標\", \"設立管制目標\", \"溫室氣體排放的管制\"]\n",
+    "evaluation(target, query)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "問題:何類對象需執行溫室氣體盤查\n",
+      "Variation1:誰會被溫室提起盤查\n",
+      "找到Target:score:0.84\n",
+      "Variation2:碳盤查查誰\n",
+      "找到近似問題:什麼是碳盤查? | score:0.88\n",
+      "Variation3:誰會被強制碳盤查\n",
+      "沒有找到 | score:0.83\n",
+      "Variation4:溫室氣體排放盤查對象\n",
+      "找到Target:score:0.89\n",
+      "Variation5:哪些公司會被盤查溫室氣體\n",
+      "找到Target:score:0.83\n",
+      "Accuracy rate 3 / 5\n"
+     ]
+    }
+   ],
+   "source": [
+    "target = [\"何類對象需執行溫室氣體盤查\"]\n",
+    "query = [\"誰會被溫室提起盤查\", \"碳盤查查誰\", \"誰會被強制碳盤查\", \"溫室氣體排放盤查對象\", \"哪些公司會被盤查溫室氣體\"]\n",
+    "evaluation(target, query)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "問題:何謂總量管制,我國是否已設定總量管制\n",
+      "Variation1:排放氣體總量管制是什麼\n",
+      "找到Target:score:0.84\n",
+      "Variation2:總量管制的目標\n",
+      "找到Target:score:0.88\n",
+      "Variation3:介紹總量管制\n",
+      "找到Target:score:0.89\n",
+      "Variation4:總量管制是規範什麼\n",
+      "找到Target:score:0.9\n",
+      "Variation5:總量管制的法規查詢\n",
+      "找到Target:score:0.87\n",
+      "Accuracy rate 5 / 5\n"
+     ]
+    }
+   ],
+   "source": [
+    "target = [\"何謂總量管制,我國是否已設定總量管制\"]\n",
+    "query = [\"排放氣體總量管制是什麼\", \"總量管制的目標\",\"介紹總量管制\", \"總量管制是規範什麼\", \"總量管制的法規查詢\"]\n",
+    "evaluation(target, query)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "問題:徵收碳費依據\n",
+      "Variation1:碳排放的收費依據\n",
+      "找到Target:score:0.91\n",
+      "Variation2:碳費收費標準\n",
+      "找到Target:score:0.88\n",
+      "Variation3:碳排放氣體怎麼收費\n",
+      "找到Target:score:0.87\n",
+      "Variation4:攤費怎麼計算的\n",
+      "沒有找到 | score:0.75\n",
+      "Variation5:碳排放依據什麼收費\n",
+      "找到Target:score:0.89\n",
+      "Accuracy rate 4 / 5\n"
+     ]
+    }
+   ],
+   "source": [
+    "target = [\"徵收碳費依據\"]\n",
+    "query = [\"碳排放的收費依據\", \"碳費收費標準\",\"碳排放氣體怎麼收費\", \"攤費怎麼計算的\", \"碳排放依據什麼收費\"]\n",
+    "evaluation(target, query)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "問題:誰需要繳交碳費?\n",
+      "Variation1:哪類型公司需要交碳費\n",
+      "找到Target:score:0.86\n",
+      "Variation2:碳費的收費對象\n",
+      "找到近似問題:碳費用途 | score:0.91\n",
+      "Variation3:如何知道我需不需要繳交碳費\n",
+      "找到Target:score:0.86\n",
+      "Variation4:製造業需要繳交碳費嗎\n",
+      "找到Target:score:0.88\n",
+      "Variation5:誰需要繳交溫室氣體排放費用\n",
+      "找到Target:score:0.89\n",
+      "Accuracy rate 4 / 5\n"
+     ]
+    }
+   ],
+   "source": [
+    "target = [\"誰需要繳交碳費?\"]\n",
+    "query = [\"哪類型公司需要交碳費\", \"碳費的收費對象\",\"如何知道我需不需要繳交碳費\", \"製造業需要繳交碳費嗎\", \"誰需要繳交溫室氣體排放費用\"]\n",
+    "evaluation(target, query)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "問題:碳費一噸多少錢\n",
+      "Variation1:一頓碳多少錢\n",
+      "找到Target:score:0.91\n",
+      "Variation2:碳排放費率\n",
+      "找到近似問題:碳費用途 | score:0.84\n",
+      "Variation3:一頓碳的排放收費\n",
+      "找到近似問題:徵收碳費依據 | score:0.88\n",
+      "Variation4:碳 一頓 的收費多少\n",
+      "找到Target:score:0.92\n",
+      "Accuracy rate 2 / 4\n"
+     ]
+    }
+   ],
+   "source": [
+    "target = [\"碳費一噸多少錢\"]\n",
+    "query = [\"一頓碳多少錢\",\"碳排放費率\", \"一頓碳的排放收費\", \"碳 一頓 的收費多少\"]\n",
+    "evaluation(target, query)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "問題:碳費用途\n",
+      "Variation1:碳費的目的\n",
+      "找到Target:score:0.94\n",
+      "Variation2:碳費的使用\n",
+      "找到Target:score:0.94\n",
+      "Variation3:碳費會被用在什麼事情上\n",
+      "找到Target:score:0.92\n",
+      "Accuracy rate 3 / 3\n"
+     ]
+    }
+   ],
+   "source": [
+    "target = [\"碳費用途\"]\n",
+    "query = [\"碳費的目的\",\"碳費的使用\", \"碳費會被用在什麼事情上\"]\n",
+    "evaluation(target, query)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "問題:碳權來源\n",
+      "Variation1:碳權是什麼\n",
+      "找到Target:score:0.9\n",
+      "Variation2:解釋 碳權\n",
+      "找到Target:score:0.86\n",
+      "Variation3:碳權是怎麼得到的\n",
+      "找到Target:score:0.9\n",
+      "Accuracy rate 3 / 3\n"
+     ]
+    }
+   ],
+   "source": [
+    "target = [\"碳權來源\"]\n",
+    "query = [\"碳權是什麼\",\"解釋 碳權\", \"碳權是怎麼得到的\"]\n",
+    "evaluation(target, query)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "問題:組織溫室氣體排放量可以用碳權抵減嗎?\n",
+      "Variation1:組織溫室氣體可以抵銷碳權嗎\n",
+      "找到Target:score:0.93\n",
+      "Variation2:公司可以用碳權付費溫室氣體排放嗎\n",
+      "找到Target:score:0.91\n",
+      "Variation3:組織碳權抵銷溫室氣體\n",
+      "找到Target:score:0.89\n",
+      "Variation4:公司碳權抵銷溫室氣體的規定有哪些\n",
+      "找到Target:score:0.84\n",
+      "Accuracy rate 4 / 4\n"
+     ]
+    }
+   ],
+   "source": [
+    "target = [\"組織溫室氣體排放量可以用碳權抵減嗎?\"]\n",
+    "query = [\"組織溫室氣體可以抵銷碳權嗎\",\"公司可以用碳權付費溫室氣體排放嗎\", \"組織碳權抵銷溫室氣體\", \"公司碳權抵銷溫室氣體的規定有哪些\"]\n",
+    "evaluation(target, query)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 28,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "問題:碳費可以用碳權抵減嗎\n",
+      "Variation1:碳排放可以抵銷碳權嗎\n",
+      "找到Target:score:0.89\n",
+      "Variation2:可以用碳權付費碳費嗎\n",
+      "找到Target:score:0.93\n",
+      "Variation3:碳權抵銷碳費的規定\n",
+      "找到Target:score:0.87\n",
+      "Variation4:碳權拿來付費碳費 可以嗎\n",
+      "找到Target:score:0.91\n",
+      "Accuracy rate 4 / 4\n"
+     ]
+    }
+   ],
+   "source": [
+    "target = [\"碳費可以用碳權抵減嗎\"]\n",
+    "query = [\"碳排放可以抵銷碳權嗎\",\"可以用碳權付費碳費嗎\", \"碳權抵銷碳費的規定\", \"碳權拿來付費碳費 可以嗎\"]\n",
+    "evaluation(target, query)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "問題:老舊汽車汰舊換新溫室氣體減量效益的用途為何\n",
+      "Variation1:舊汽車汰換 溫室氣體排放抵換 \n",
+      "找到Target:score:0.88\n",
+      "Variation2:舊汽車換新 溫室氣體減量效益\n",
+      "找到Target:score:0.93\n",
+      "Variation3:舊汽車換新 溫室氣體排放減量 補助\n",
+      "找到Target:score:0.88\n",
+      "Accuracy rate 3 / 3\n"
+     ]
+    }
+   ],
+   "source": [
+    "target = [\"老舊汽車汰舊換新溫室氣體減量效益的用途為何\"]\n",
+    "query = [\"舊汽車汰換 溫室氣體排放抵換 \",\"舊汽車換新 溫室氣體減量效益\", \"舊汽車換新 溫室氣體排放減量 補助\"]\n",
+    "evaluation(target, query)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "問題:組織型碳盤查遵循標準\n",
+      "Variation1:組織碳排查 規定 跟標準\n",
+      "找到Target:score:0.9\n",
+      "Variation2:公司碳盤查 法律規定\n",
+      "找到Target:score:0.86\n",
+      "Variation3:組織的碳盤查有哪些規定\n",
+      "找到Target:score:0.92\n",
+      "Variation4:如何確保我的公司遵循碳盤查的規範\n",
+      "找到Target:score:0.88\n",
+      "Variation5:碳盤查 要遵循哪些審查標準\n",
+      "找到Target:score:0.91\n",
+      "Accuracy rate 5 / 5\n"
+     ]
+    }
+   ],
+   "source": [
+    "target = [\"組織型碳盤查遵循標準\"]\n",
+    "query = [\"組織碳排查 規定 跟標準\",\"公司碳盤查 法律規定\", \"組織的碳盤查有哪些規定\" , \"如何確保我的公司遵循碳盤查的規範\", \"碳盤查 要遵循哪些審查標準\"]\n",
+    "evaluation(target, query)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "問題:溫室氣體種類\n",
+      "Variation1:溫室氣體有哪些\n",
+      "找到Target:score:0.93\n",
+      "Variation2:溫室氣氣包括哪些\n",
+      "找到Target:score:0.87\n",
+      "Variation3:甲烷是溫室氣體嗎\n",
+      "找到近似問題:溫室氣體 | score:0.87\n",
+      "Variation4:二氧化碳是溫室氣體嗎\n",
+      "找到近似問題:溫室氣體 | score:0.87\n",
+      "Accuracy rate 2 / 4\n"
+     ]
+    }
+   ],
+   "source": [
+    "target = [\"溫室氣體種類\"]\n",
+    "query = [\"溫室氣體有哪些\", \"溫室氣氣包括哪些\" , \"甲烷是溫室氣體嗎\", \"二氧化碳是溫室氣體嗎\"]\n",
+    "evaluation(target, query)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "問題:盤查範圍應如何設定\n",
+      "Variation1:盤查範圍決定\n",
+      "找到Target:score:0.95\n",
+      "Variation2:盤查範圍\n",
+      "找到Target:score:0.95\n",
+      "Variation3:碳排放的盤查範圍\n",
+      "找到近似問題:碳足跡涵蓋範圍 | score:0.87\n",
+      "Variation4:溫室氣體盤查的範圍\n",
+      "找到近似問題:溫室氣體 | score:0.85\n",
+      "Accuracy rate 2 / 4\n"
+     ]
+    }
+   ],
+   "source": [
+    "target = [\"盤查範圍應如何設定\"]\n",
+    "query = [\"盤查範圍決定\", \"盤查範圍\" , \"碳排放的盤查範圍\", \"溫室氣體盤查的範圍\"]\n",
+    "evaluation(target, query)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 33,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "問題:溫室氣體排放量計算方式\n",
+      "Variation1:溫室氣體計算單位\n",
+      "找到Target:score:0.9\n",
+      "Variation2:溫室氣體的計算方式\n",
+      "找到Target:score:0.95\n",
+      "Variation3:溫室氣體排放的計量方式\n",
+      "找到Target:score:0.97\n",
+      "Variation4:如何計量溫室氣體的排放\n",
+      "找到Target:score:0.94\n",
+      "Accuracy rate 4 / 4\n"
+     ]
+    }
+   ],
+   "source": [
+    "target = [\"溫室氣體排放量計算方式\"]\n",
+    "query = [\"溫室氣體計算單位\", \"溫室氣體的計算方式\" , \"溫室氣體排放的計量方式\", \"如何計量溫室氣體的排放\"]\n",
+    "evaluation(target, query)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 34,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "問題:完成盤查作業後,需產出何種文件\n",
+      "Variation1:盤查後的報告文件產出\n",
+      "找到Target:score:0.89\n",
+      "Variation2:盤查後的文件產出\n",
+      "找到Target:score:0.9\n",
+      "Variation3:盤查產出的報告\n",
+      "找到Target:score:0.83\n",
+      "Accuracy rate 3 / 3\n"
+     ]
+    }
+   ],
+   "source": [
+    "target = [\"完成盤查作業後,需產出何種文件\"]\n",
+    "query = [\"盤查後的報告文件產出\", \"盤查後的文件產出\" , \"盤查產出的報告\"]\n",
+    "evaluation(target, query)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 35,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "問題:組織型盤查是否皆需完成第三者查證作業\n",
+      "Variation1:組織盤查 需要第三者驗證碼\n",
+      "找到Target:score:0.91\n",
+      "Variation2:組織盤查 需要外部認證嗎\n",
+      "找到Target:score:0.88\n",
+      "Variation3:組織盤查 第三者檢查\n",
+      "找到Target:score:0.92\n",
+      "Accuracy rate 3 / 3\n"
+     ]
+    }
+   ],
+   "source": [
+    "target = [\"組織型盤查是否皆需完成第三者查證作業\"]\n",
+    "query = [\"組織盤查 需要第三者驗證碼\", \"組織盤查 需要外部認證嗎\" , \"組織盤查 第三者檢查\"]\n",
+    "evaluation(target, query)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 36,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "問題:碳足跡是指\n",
+      "Variation1:解釋碳足跡\n",
+      "找到近似問題:碳足跡 | score:0.91\n",
+      "Variation2:碳足跡的產生\n",
+      "找到近似問題:碳足跡 | score:0.92\n",
+      "Variation3:碳足跡是什麼\n",
+      "找到Target:score:0.95\n",
+      "Variation4:哪些活動會產生碳足跡\n",
+      "找到近似問題:碳足跡 | score:0.85\n",
+      "Accuracy rate 1 / 4\n"
+     ]
+    }
+   ],
+   "source": [
+    "target = [\"碳足跡是指\"]\n",
+    "query = [\"解釋碳足跡\", \"碳足跡的產生\" , \"碳足跡是什麼\", \"哪些活動會產生碳足跡\"]\n",
+    "evaluation(target, query)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 37,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "問題:碳足跡遵循標準\n",
+      "Variation1:碳足跡 法規\n",
+      "找到近似問題:碳足跡 | score:0.9\n",
+      "Variation2:碳足跡相關規定\n",
+      "找到Target:score:0.91\n",
+      "Variation3:產品碳足跡規定\n",
+      "找到Target:score:0.9\n",
+      "Accuracy rate 2 / 3\n"
+     ]
+    }
+   ],
+   "source": [
+    "target = [\"碳足跡遵循標準\"]\n",
+    "query = [\"碳足跡 法規\", \"碳足跡相關規定\" , \"產品碳足跡規定\"]\n",
+    "evaluation(target, query)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 38,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "問題:碳足跡涵蓋範圍\n",
+      "Variation1:碳足跡 範圍舉例\n",
+      "找到Target:score:0.94\n",
+      "Variation2:碳足跡的範圍有多廣\n",
+      "找到Target:score:0.94\n",
+      "Variation3:組織碳足跡 範圍\n",
+      "找到Target:score:0.93\n",
+      "Accuracy rate 3 / 3\n"
+     ]
+    }
+   ],
+   "source": [
+    "target = [\"碳足跡涵蓋範圍\"]\n",
+    "query = [\"碳足跡 範圍舉例\", \"碳足跡的範圍有多廣\" , \"組織碳足跡 範圍\"]\n",
+    "evaluation(target, query)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 39,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "問題:CEV系統依循標準為何\n",
+      "Variation1:CEV系統 規定\n",
+      "找到Target:score:0.9\n",
+      "Variation2:CEV要遵守什麼\n",
+      "找到Target:score:0.84\n",
+      "Variation3:組織CEV的標準\n",
+      "找到Target:score:0.87\n",
+      "Accuracy rate 3 / 3\n"
+     ]
+    }
+   ],
+   "source": [
+    "target = [\"CEV系統依循標準為何\"]\n",
+    "query = [\"CEV系統 規定\", \"CEV要遵守什麼\" , \"組織CEV的標準\"]\n",
+    "evaluation(target, query)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 40,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "問題:CEV系統可以支援盤查到什麼程度\n",
+      "Variation1:CEV可以盤查到什麼\n",
+      "找到Target:score:0.92\n",
+      "Variation2:CEV的支援盤查範圍\n",
+      "找到Target:score:0.9\n",
+      "Variation3:CEV可以上傳數據嗎\n",
+      "找到Target:score:0.82\n",
+      "Accuracy rate 3 / 3\n"
+     ]
+    }
+   ],
+   "source": [
+    "target = [\"CEV系統可以支援盤查到什麼程度\"]\n",
+    "query = [\"CEV可以盤查到什麼\", \"CEV的支援盤查範圍\" , \"CEV可以上傳數據嗎\"]\n",
+    "evaluation(target, query)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 41,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "問題:什麼是精誠資訊\n",
+      "Variation1:精誠 介紹\n",
+      "找到Target:score:0.84\n",
+      "Variation2:精誠的業務範圍\n",
+      "找到Target:score:0.8\n",
+      "Variation3:精誠有哪些主要服務\n",
+      "找到Target:score:0.84\n",
+      "Accuracy rate 3 / 3\n"
+     ]
+    }
+   ],
+   "source": [
+    "target = [\"什麼是精誠資訊\"]\n",
+    "query = [\"精誠 介紹\", \"精誠的業務範圍\" , \"精誠有哪些主要服務\"]\n",
+    "evaluation(target, query)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 42,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "問題:溫暖化潛勢是什麼?\n",
+      "Variation1:解釋溫暖化潛勢\n",
+      "找到Target:score:0.92\n",
+      "Variation2:解釋global warming potential\n",
+      "找到Target:score:0.78\n",
+      "Variation3:解釋gwp\n",
+      "找到Target:score:0.66\n",
+      "Accuracy rate 3 / 3\n"
+     ]
+    }
+   ],
+   "source": [
+    "target = [\"溫暖化潛勢是什麼?\"]\n",
+    "query = [\"解釋溫暖化潛勢\", \"解釋global warming potential\" , \"解釋gwp\"]\n",
+    "evaluation(target, query)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 43,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "問題:什麼是碳盤查?\n",
+      "Variation1:碳盤查 介紹\n",
+      "找到Target:score:0.9\n",
+      "Variation2:碳盤查是什麼\n",
+      "找到Target:score:0.95\n",
+      "Variation3:碳盤查的意思\n",
+      "找到近似問題:碳盤查的目的是什麼? | score:0.93\n",
+      "Accuracy rate 2 / 3\n"
+     ]
+    }
+   ],
+   "source": [
+    "target = [\"什麼是碳盤查?\"]\n",
+    "query = [\"碳盤查 介紹\", \"碳盤查是什麼\" , \"碳盤查的意思\"]\n",
+    "evaluation(target, query)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 44,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "問題:碳盤查的目的是什麼?\n",
+      "Variation1:為什麼要做碳盤查\n",
+      "找到近似問題:什麼是碳盤查? | score:0.92\n",
+      "Variation2:碳盤查有什麼好處\n",
+      "找到Target:score:0.91\n",
+      "Variation3:碳盤查的目標\n",
+      "找到Target:score:0.92\n",
+      "Accuracy rate 2 / 3\n"
+     ]
+    }
+   ],
+   "source": [
+    "target = [\"碳盤查的目的是什麼?\"]\n",
+    "query = [\"為什麼要做碳盤查\", \"碳盤查有什麼好處\" , \"碳盤查的目標\"]\n",
+    "evaluation(target, query)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 45,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "問題:碳盤查的標準有哪些類別?\n",
+      "Variation1:碳盤查的盤查標準\n",
+      "找到Target:score:0.92\n",
+      "Variation2:碳盤查有哪些標準\n",
+      "找到Target:score:0.94\n",
+      "Variation3:組織溫室氣體盤查標準\n",
+      "找到近似問題:溫室氣體 | score:0.86\n",
+      "Accuracy rate 2 / 3\n"
+     ]
+    }
+   ],
+   "source": [
+    "target = [\"碳盤查的標準有哪些類別?\"]\n",
+    "query = [\"碳盤查的盤查標準\", \"碳盤查有哪些標準\" , \"組織溫室氣體盤查標準\"]\n",
+    "evaluation(target, query)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 46,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "問題:碳足跡\n",
+      "Variation1:碳足跡 介紹\n",
+      "找到Target:score:0.93\n",
+      "Variation2:碳足跡 解釋\n",
+      "找到Target:score:0.94\n",
+      "Variation3:什麼是碳足跡\n",
+      "找到Target:score:0.93\n",
+      "Accuracy rate 3 / 3\n"
+     ]
+    }
+   ],
+   "source": [
+    "target = [\"碳足跡\"]\n",
+    "query = [\"碳足跡 介紹\", \"碳足跡 解釋\",\"什麼是碳足跡\"]\n",
+    "evaluation(target, query)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 47,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "問題:14061-1\n",
+      "Variation1:14064-1是什麼\n",
+      "沒有找到 | score:0.83\n",
+      "Variation2:ISO 14064解釋\n",
+      "沒有找到 | score:0.71\n",
+      "Variation3:iso 14064法規\n",
+      "沒有找到 | score:0.76\n",
+      "Variation4:14061\n",
+      "沒有找到 | score:0.81\n",
+      "Accuracy rate 0 / 4\n"
+     ]
+    }
+   ],
+   "source": [
+    "target = [\"14061-1\"]\n",
+    "query = [\"14064-1是什麼\", \"ISO 14064解釋\",\"iso 14064法規\", \"14061\"]\n",
+    "evaluation(target, query)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 48,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "問題:Describe ESG\n",
+      "Variation1:explain esg\n",
+      "找到Target:score:0.9\n",
+      "Variation2:whats esg\n",
+      "找到Target:score:0.91\n",
+      "Variation3:解釋esg\n",
+      "沒有找到 | score:0.79\n",
+      "Variation4:什麼是esg\n",
+      "找到Target:score:0.85\n",
+      "Accuracy rate 3 / 4\n"
+     ]
+    }
+   ],
+   "source": [
+    "target = [\"Describe ESG\"]\n",
+    "query = [\"explain esg\", \"whats esg\",\"解釋esg\", \"什麼是esg\"]\n",
+    "evaluation(target, query)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 49,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "問題:溫室氣體\n",
+      "Variation1:溫室氣體 介紹\n",
+      "找到Target:score:0.94\n",
+      "Variation2:溫室氣體 影響\n",
+      "找到Target:score:0.91\n",
+      "Variation3:溫室氣體解釋\n",
+      "找到Target:score:0.91\n",
+      "Accuracy rate 3 / 3\n"
+     ]
+    }
+   ],
+   "source": [
+    "target = [\"溫室氣體\"]\n",
+    "query = [\"溫室氣體 介紹\", \"溫室氣體 影響\",\"溫室氣體解釋\"]\n",
+    "evaluation(target, query)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 50,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "問題:碳盤查涉及哪些產業或領域?\n",
+      "Variation1:會被碳盤查的產業\n",
+      "找到Target:score:0.9\n",
+      "Variation2:碳排放 盤查產業\n",
+      "找到Target:score:0.87\n",
+      "Variation3:哪類型公司需要碳盤查\n",
+      "找到Target:score:0.84\n",
+      "Accuracy rate 3 / 3\n"
+     ]
+    }
+   ],
+   "source": [
+    "target = [\"碳盤查涉及哪些產業或領域?\"]\n",
+    "query = [\"會被碳盤查的產業\", \"碳排放 盤查產業\",\"哪類型公司需要碳盤查\"]\n",
+    "evaluation(target, query)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 51,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "問題:排放係數從哪裡得到?\n",
+      "Variation1:碳排放係數 查詢\n",
+      "找到Target:score:0.84\n",
+      "Variation2:哪裏可以查碳足跡資料\n",
+      "找到近似問題:碳足跡 | score:0.86\n",
+      "Variation3:查詢碳足跡排放係數\n",
+      "找到近似問題:碳足跡遵循標準 | score:0.83\n",
+      "Accuracy rate 1 / 3\n"
+     ]
+    }
+   ],
+   "source": [
+    "target = [\"排放係數從哪裡得到?\"]\n",
+    "query = [\"碳排放係數 查詢\", \"哪裏可以查碳足跡資料\",\"查詢碳足跡排放係數\"]\n",
+    "evaluation(target, query)\n",
+    "# 可加入“碳足跡”到“排放係數”"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 52,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "問題:台灣氣候變遷法是否已設立各階段管制目標?\n",
+      "Variation1:氣候變遷法 各階段目標\n",
+      "找到Target:score:0.92\n",
+      "Variation2:氣候變遷管制的階段性目標\n",
+      "找到Target:score:0.91\n",
+      "Variation3:台灣氣候變遷管制\n",
+      "找到Target:score:0.92\n",
+      "Accuracy rate 3 / 3\n"
+     ]
+    }
+   ],
+   "source": [
+    "target = [\"台灣氣候變遷法是否已設立各階段管制目標?\"]\n",
+    "query = [\"氣候變遷法 各階段目標\", \"氣候變遷管制的階段性目標\",\"台灣氣候變遷管制\"]\n",
+    "evaluation(target, query)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 53,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "問題:排放係數法是什麼\n",
+      "Variation1:排放係數計算\n",
+      "找到Target:score:0.92\n",
+      "Variation2:碳足跡計算\n",
+      "找到近似問題:碳足跡 | score:0.91\n",
+      "Variation3:排放係數\n",
+      "找到Target:score:0.93\n",
+      "Accuracy rate 2 / 3\n"
+     ]
+    }
+   ],
+   "source": [
+    "target = [\"排放係數法是什麼\"]\n",
+    "query = [\"排放係數計算\", \"碳足跡計算\",\"排放係數\"]\n",
+    "evaluation(target, query)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 54,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "問題:碳盤查有哪些範疇?\n",
+      "Variation1:碳盤查的範圍\n",
+      "找到Target:score:0.93\n",
+      "Variation2:溫室氣體盤查涵蓋範圍\n",
+      "找到近似問題:溫室氣體 | score:0.85\n",
+      "Variation3:碳排放盤查的範圍有多廣\n",
+      "找到Target:score:0.85\n",
+      "Accuracy rate 2 / 3\n"
+     ]
+    }
+   ],
+   "source": [
+    "target = [\"碳盤查有哪些範疇?\"]\n",
+    "query = [\"碳盤查的範圍\", \"溫室氣體盤查涵蓋範圍\",\"碳排放盤查的範圍有多廣\"]\n",
+    "evaluation(target, query)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 55,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "問題:制定溫室氣體排放量盤查登錄及查驗管理辦法的目的是什麼\n",
+      "Variation1:溫室氣體排放管理辦法的目的\n",
+      "找到Target:score:0.92\n",
+      "Variation2:溫室氣體查驗的目的\n",
+      "找到Target:score:0.89\n",
+      "Variation3:溫室氣體管理跟盤查的目的\n",
+      "找到Target:score:0.92\n",
+      "Accuracy rate 3 / 3\n"
+     ]
+    }
+   ],
+   "source": [
+    "target = [\"制定溫室氣體排放量盤查登錄及查驗管理辦法的目的是什麼\"]\n",
+    "query = [\"溫室氣體排放管理辦法的目的\", \"溫室氣體查驗的目的\",\"溫室氣體管理跟盤查的目的\"]\n",
+    "evaluation(target, query)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 56,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "問題:請問精誠資訊有什麼產品呢?\n",
+      "Variation1:精誠資訊服務介紹\n",
+      "找到近似問題:什麼是精誠資訊 | score:0.88\n",
+      "Variation2:精誠資訊產品有哪些\n",
+      "找到Target:score:0.93\n",
+      "Variation3:精誠資訊提供哪些服務\n",
+      "找到近似問題:什麼是精誠資訊 | score:0.88\n",
+      "Accuracy rate 1 / 3\n"
+     ]
+    }
+   ],
+   "source": [
+    "target = [\"請問精誠資訊有什麼產品呢?\"]\n",
+    "query = [\"精誠資訊服務介紹\", \"精誠資訊產品有哪些\",\"精誠資訊提供哪些服務\"]\n",
+    "evaluation(target, query)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 57,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "問題:供應鏈ESG評鑑該怎麼進行?\n",
+      "Variation1:esg評鑑怎麼做\n",
+      "找到Target:score:0.87\n",
+      "Variation2:供應鏈的esg評鑑\n",
+      "找到Target:score:0.93\n",
+      "Variation3:esg評價要準備什麼\n",
+      "找到Target:score:0.84\n",
+      "Accuracy rate 3 / 3\n"
+     ]
+    }
+   ],
+   "source": [
+    "target = [\"供應鏈ESG評鑑該怎麼進行?\"]\n",
+    "query = [\"esg評鑑怎麼做\", \"供應鏈的esg評鑑\",\"esg評價要準備什麼\"]\n",
+    "evaluation(target, query)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    " ### Observations\n",
+    "這裡有一些好像是同義詞的詞,會影響到llm的判讀,例如\n",
+    "- 溫室氣體跟碳排放\n",
+    "- 碳足跡跟排放係數\n",
+    "\n",
+    "--------\n",
+    "- 這裡的數字判斷不出來,例如14061-1。 但不是問題,之前測試過在RAG的Prompt裡面強調就ok了\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "choozemo-carbon",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.19"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

+ 30 - 0
taide_rag.py

@@ -0,0 +1,30 @@
+from dotenv import load_dotenv
+from langchain.vectorstores import Chroma
+import os
+load_dotenv('environment.env')
+openai_api_key = os.getenv("OPENAI_API_KEY")
+from RAG_strategy import taide_llm, multi_query, naive_rag
+from langchain.vectorstores import FAISS
+from langchain.embeddings import OpenAIEmbeddings
+from langchain.document_loaders import TextLoader
+from langchain.text_splitter import CharacterTextSplitter
+
+
+
+# Load and prepare a sample document
+loader = TextLoader("test_data.txt")
+documents = loader.load()
+text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
+docs = text_splitter.split_documents(documents)
+
+# Create a vector store
+embeddings = OpenAIEmbeddings()
+vectorstore = Chroma.from_documents(docs, embeddings)
+retriever = vectorstore.as_retriever()
+
+# Test multi_query
+print("\nTesting multi_query:")
+question = "什麼是碳排放獎勵辦法?"
+answer, docs = multi_query(question, retriever, [])
+print(f"Question: {question}")
+print(f"Answer: {answer}")

+ 6 - 0
test_data.txt

@@ -0,0 +1,6 @@
+辦法所稱低碳產品,指符合下列條件之一者:
+一、取得中央主管機關核發碳足跡標籤(以下簡稱碳標籤)使用權,且碳足跡數值為同類型碳標籤產品中前百分之十。
+二、取得中央主管機關核發之碳足跡減量標籤(以下簡稱減碳標籤)使用權。
+三、經中央主管機關審查展期通過且具實際減碳成效之碳標籤使用權。
+前項第一款所稱同類型碳標籤產品,指適用相同之碳足跡產品類別規則文件,且中華民國輸出入貨品分類號列前六碼相同或歸屬於相同之中華民國行業標準分類細類產品。但
+碳標籤產品無法以中華民國輸出入貨品分類號列或中華民國行業標準分類進行分類者,得檢具主管機關或