1 年之前 · ef69f16e46
--- a/RAG_app_copy.py
+++ b/RAG_app_copy.py
@@ -1,39 +1,27 @@
 
				 from dotenv import load_dotenv

			
 
				 load_dotenv('environment.env')

			
 
				 

			
 
				-from fastapi import FastAPI, Request, HTTPException, status, Body

			
 
				-# from fastapi.templating import Jinja2Templates

			
 
				+from fastapi import FastAPI, HTTPException, status, Body, Depends

			
 
				 from fastapi.middleware.cors import CORSMiddleware

			
 
				-from fastapi.responses import FileResponse

			
 
				-from fastapi import Depends

			
 
				 from contextlib import asynccontextmanager

			
 
				 from pydantic import BaseModel

			
 
				 from typing import List, Optional

			
 
				 import uvicorn

			
 
				 

			
 
				-import sqlparse

			
 
				 from sqlalchemy import create_engine

			
 
				 import pandas as pd

			
 
				-#from retrying import retry

			
 
				 import datetime

			
 
				 import json

			
 
				 from json import loads

			
 
				 import time

			
 
				 from langchain.callbacks import get_openai_callback

			
 
				 

			
 
				-from langchain_community.vectorstores import Chroma

			
 
				 from langchain_openai import OpenAIEmbeddings

			
 
				 from RAG_strategy import multi_query, naive_rag, naive_rag_for_qapairs

			
 
				-from Indexing_Split import create_retriever as split_retriever

			
 
				-from Indexing_Split import gen_doc_from_database, gen_doc_from_history

			
 
				 

			
 
				 import os

			
 
				-from langchain_community.vectorstores import SupabaseVectorStore

			
 
				-from langchain_openai import OpenAIEmbeddings

			
 
				 from supabase.client import Client, create_client

			
 
				 from add_vectordb import GetVectorStore

			
 
				-from langchain_community.cache import RedisSemanticCache  # 更新导入路径

			
 
				-from langchain_core.prompts import PromptTemplate

			
 
				 import openai

			
 
				 

			
 
				 # Get API log

			
@@ -44,23 +32,17 @@ openai_api_key = os.getenv("OPENAI_API_KEY")
 
				 URI = os.getenv("SUPABASE_URI")

			
 
				 openai.api_key = openai_api_key

			
 
				 

			
 
				-

			
 
				 global_retriever = None

			
 
				 

			
 
				-# 定義FastAPI的生命週期管理器，在啟動和關閉時執行特定操作

			
 
				 @asynccontextmanager

			
 
				 async def lifespan(app: FastAPI):

			
 
				     global global_retriever

			
 
				     global vector_store

			
 
				     

			
 
				     start = time.time()

			
 
				-    # global_retriever = split_retriever(path='./Documents', extension="docx")

			
 
				-    # global_retriever = raptor_retriever(path='../Documents', extension="txt")

			
 
				-    # global_retriever = unstructured_retriever(path='../Documents')

			
 
				 

			
 
				     supabase_url = os.getenv("SUPABASE_URL")

			
 
				     supabase_key = os.getenv("SUPABASE_KEY")

			
 
				-    URI = os.getenv("SUPABASE_URI")

			
 
				     document_table = "documents"

			
 
				     supabase: Client = create_client(supabase_url, supabase_key)

			
 
				 

			
@@ -68,21 +50,17 @@ async def lifespan(app: FastAPI):
 
				     vector_store = GetVectorStore(embeddings, supabase, document_table)

			
 
				     global_retriever = vector_store.as_retriever(search_kwargs={"k": 4})

			
 
				 

			
 
				-    print(time.time() - start)

			
 
				+    print(f"Initialization time: {time.time() - start}")

			
 
				     yield

			
 
				 

			
 
				-# 定義依賴注入函數，用於在請求處理過程中獲取全局變量

			
 
				 def get_retriever():

			
 
				     return global_retriever

			
 
				 

			
 
				-

			
 
				 def get_vector_store():

			
 
				     return vector_store

			
 
				 

			
 
				-# 創建FastAPI應用實例並配置以及中間件

			
 
				 app = FastAPI(lifespan=lifespan)

			
 
				 

			
 
				-# templates = Jinja2Templates(directory="temp")

			
 
				 app.add_middleware(

			
 
				     CORSMiddleware,

			
 
				     allow_origins=["*"],

			
@@ -91,31 +69,16 @@ app.add_middleware(
 
				     allow_headers=["*"],

			
 
				 )

			
 
				 

			
 
				-

			
 
				-# 定義API路由和處理函數

			
 
				-# 處理傳入的問題並返回答案

			
 
				 @app.get("/answer2")

			
 
				 def multi_query_answer(question, retriever=Depends(get_retriever)):

			
 
				     try:

			
 
				         start = time.time()

			
 
				 

			
 
				         with get_openai_callback() as cb:

			
 
				-            # qa_doc = gen_doc_from_database()

			
 
				-            # qa_history_doc = gen_doc_from_history()

			
 
				-            # qa_doc.extend(qa_history_doc)

			
 
				-            # vectorstore = Chroma.from_documents(documents=qa_doc, embedding=OpenAIEmbeddings(), collection_name="qa_pairs")

			
 
				-            # retriever_qa = vectorstore.as_retriever(search_kwargs={"k": 3})

			
 
				-            # final_answer, reference_docs = naive_rag_for_qapairs(question, retriever_qa)

			
 
				-            final_answer = 'False'

			
 
				-            if final_answer == 'False':

			
 
				-                final_answer, reference_docs = multi_query(question, retriever, chat_history=[])

			
 
				-

			
 
				-        # print(CHAT_HISTORY)

			
 
				-        

			
 
				-        # with get_openai_callback() as cb:

			
 
				-        #     final_answer, reference_docs = multi_query(question, retriever)

			
 
				+            final_answer, reference_docs = multi_query(question, retriever, chat_history=[])

			
 
				+

			
 
				         processing_time = time.time() - start

			
 
				-        print(processing_time)

			
 
				+        print(f"Processing time: {processing_time}")

			
 
				         save_history(question, final_answer, reference_docs, cb, processing_time)

			
 
				 

			
 
				         return {"Answer": final_answer}

			
@@ -127,48 +90,39 @@ class ChatHistoryItem(BaseModel):
 
				     q: str

			
 
				     a: str

			
 
				 

			
 
				-# 處理帶有歷史聊天紀錄的問題並返回答案

			
 
				 @app.post("/answer_with_history")

			
 
				 def multi_query_answer(question: Optional[str] = '', chat_history: List[ChatHistoryItem] = Body(...), retriever=Depends(get_retriever)):

			
 
				     start = time.time()

			
 
				     

			
 
				     chat_history = [(item.q, item.a) for item in chat_history if item.a != ""]

			
 
				-    print(chat_history)

			
 
				-

			
 
				-    # TODO: similarity search

			
 
				+    print(f"Chat history: {chat_history}")

			
 
				     

			
 
				     with get_openai_callback() as cb:

			
 
				         final_answer, reference_docs = multi_query(question, retriever, chat_history)

			
 
				     processing_time = time.time() - start

			
 
				-    print(processing_time)

			
 
				+    print(f"Processing time: {processing_time}")

			
 
				     save_history(question, final_answer, reference_docs, cb, processing_time)

			
 
				 

			
 
				     return {"Answer": final_answer}

			
 
				 

			
 
				-# 處理帶有聊天歷史紀錄和文件名過濾的問題，並返回答案

			
 
				 @app.post("/answer_with_history2")

			
 
				 def multi_query_answer(question: Optional[str] = '', extension: Optional[str] = 'pdf', chat_history: List[ChatHistoryItem] = Body(...), retriever=Depends(get_retriever)):

			
 
				     start = time.time()

			
 
				 

			
 
				-    retriever = vector_store.as_retriever(search_kwargs={"k": 4,

			
 
				-                                                         'filter': {'extension':extension}})

			
 
				+    retriever = vector_store.as_retriever(search_kwargs={"k": 4, 'filter': {'extension':extension}})

			
 
				     

			
 
				     chat_history = [(item.q, item.a) for item in chat_history if item.a != ""]

			
 
				-    print(chat_history)

			
 
				-

			
 
				-    # TODO: similarity search

			
 
				+    print(f"Chat history: {chat_history}")

			
 
				     

			
 
				     with get_openai_callback() as cb:

			
 
				         final_answer, reference_docs = multi_query(question, retriever, chat_history)

			
 
				     processing_time = time.time() - start

			
 
				-    print(processing_time)

			
 
				+    print(f"Processing time: {processing_time}")

			
 
				     save_history(question, final_answer, reference_docs, cb, processing_time)

			
 
				 

			
 
				     return {"Answer": final_answer}

			
 
				 

			
 
				-# 保存歷史。將處理結果儲存到數據庫

			
 
				 def save_history(question, answer, reference, cb, processing_time):

			
 
				-    # reference = [doc.dict() for doc in reference]

			
 
				     record = {

			
 
				         'Question': [question],

			
 
				         'Answer': [answer],

			
@@ -190,7 +144,6 @@ class history_output(BaseModel):
 
				     Processing_time: float

			
 
				     Time: datetime.datetime

			
 
				 

			
 
				-# 定義獲取歷史紀錄的路由

			
 
				 @app.get('/history', response_model=List[history_output])

			
 
				 async def get_history():

			
 
				     engine = create_engine(URI, echo=True)

			
@@ -205,11 +158,5 @@ async def get_history():
 
				 def read_root():

			
 
				     return {"message": "Welcome to the Carbon Chatbot API"}

			
 
				 

			
 
				-

			
 
				 if __name__ == "__main__":

			
 
				-    uvicorn.run("RAG_app_copy:app", host='127.0.0.1', port=8081, reload=True)

			
 
				-    

			
 
				-# if __name__ == "__main__":

			
 
				-#     uvicorn.run("RAG_app:app", host='cmm.ai', port=8081, reload=True, ssl_keyfile="/etc/letsencrypt/live/cmm.ai/privkey.pem", 

			
 
				-#                 ssl_certfile="/etc/letsencrypt/live/cmm.ai/fullchain.pem")

			
 
				-

			
 
				+    uvicorn.run("RAG_app_copy:app", host='127.0.0.1', port=8081, reload=True)
			
--- a/RAG_strategy.py
+++ b/RAG_strategy.py
@@ -9,7 +9,14 @@ from langchain_core.runnables import RunnablePassthrough
 
				 from langchain import hub

			
 
				 from langchain.globals import set_llm_cache

			
 
				 from langchain import PromptTemplate

			
 
				-from langchain.llms.base import LLM

			
 
				+import subprocess

			
 
				+import json

			
 
				+from typing import Any, List, Optional, Dict

			
 
				+from langchain_core.callbacks import CallbackManagerForLLMRun

			
 
				+from langchain_core.language_models import BaseChatModel

			
 
				+from langchain_core.messages import BaseMessage, AIMessage, HumanMessage, SystemMessage

			
 
				+from langchain_core.outputs import ChatResult, ChatGeneration

			
 
				+from pydantic import Field

			
 
				 

			
 
				 from langchain_core.runnables import (

			
 
				     RunnableBranch,

			
@@ -17,9 +24,6 @@ from langchain_core.runnables import (
 
				     RunnableParallel,

			
 
				     RunnablePassthrough,

			
 
				 )

			
 
				-from typing import Tuple, List, Optional

			
 
				-from langchain_core.messages import AIMessage, HumanMessage

			
 
				-

			
 
				 

			
 
				 from datasets import Dataset 

			
 
				 from ragas import evaluate

			
@@ -29,92 +33,71 @@ from ragas.metrics import (
 
				     context_recall,

			
 
				     context_precision,

			
 
				 )

			
 
				-from typing import List

			
 
				 import os

			
 
				 from dotenv import load_dotenv

			
 
				 load_dotenv('environment.env')

			
 
				 

			
 
				-########################################################################################################################

			
 
				-########################################################################################################################

			
 
				 from langchain.cache import SQLiteCache

			
 
				-from langchain.cache import RedisSemanticCache

			
 
				 from langchain_openai import OpenAIEmbeddings

			
 
				 from langchain.globals import set_llm_cache

			
 
				 

			
 
				-########################################################################################################################

			
 
				 import requests

			
 
				 import openai

			
 
				 openai_api_key = os.getenv("OPENAI_API_KEY")

			
 
				 openai.api_key = openai_api_key

			
 
				 URI = os.getenv("SUPABASE_URI")

			
 
				 

			
 
				-from typing import Optional, List, Any, Dict

			
 
				-

			
 
				-# 設置緩存，以減少對API的重複請求。使用Redis

			
 
				+# 設置緩存，以減少對API的重複請求。使用SQLite

			
 
				 set_llm_cache(SQLiteCache(database_path=".langchain.db"))

			
 
				-# set_llm_cache(RedisSemanticCache(redis_url="redis://localhost:6379", embedding=OpenAIEmbeddings(openai_api_key=openai_api_key), score_threshold=0.0005))

			
 
				 

			
 
				-# # TAIDE model on Ollama https://ollama.com/jcai/llama3-taide-lx-8b-chat-alpha1

			
 
				-# def interact_with_model(messages, api_url="http://localhost:11434/v1/chat/completions"):

			
 
				-#     print("Using model: TAIDE")

			
 
				-#     response = requests.post(api_url, json={"model": "jcai/llama3-taide-lx-8b-chat-alpha1:Q4_K_M", "messages": messages})

			
 
				-#     return response.json()["choices"][0]["message"]["content"]

			
 
				-

			
 
				-import requests

			
 
				-from typing import Optional, List, Any, Dict

			
 
				-from langchain.llms.base import LLM

			
 
				-

			
 
				-class CustomTAIDELLM(LLM):

			
 
				-    api_url: str = "http://localhost:11434/api/chat"

			
 
				-    model_name: str = "taide-local"

			
 
				-    system_prompt: str = "你是一個來自台灣的AI助理，你的名字是 TAIDE，樂於以台灣人的立場幫助使用者，會用繁體中文回答問題。"

			
 
				-

			
 
				-    def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:

			
 
				-        # Format the prompt according to TAIDE requirements

			
 
				-        formatted_prompt = f"<s>[INST] <<SYS>>\n{self.system_prompt}\n<</SYS>>\n\n{prompt} [/INST]"

			
 
				-        print(f"Formatted prompt being sent to TAIDE model: {formatted_prompt}")

			
 
				-

			
 
				-        payload = {

			
 
				-            "model": self.model_name,

			
 
				-            "messages": [

			
 
				-                {"role": "system", "content": self.system_prompt},

			
 
				-                {"role": "user", "content": prompt}

			
 
				-            ]

			
 
				-        }

			
 
				-

			
 
				-        try:

			
 
				-            response = requests.post(self.api_url, json=payload)

			
 
				-            response.raise_for_status()

			
 
				-            result = response.json()

			
 
				-            print(f"Full API response: {result}")

			
 
				-            if "message" in result:

			
 
				-                return result["message"]["content"].strip()

			
 
				-            else:

			
 
				-                print(f"Unexpected response structure: {result}")

			
 
				-                return "Error: Unexpected response from the model."

			
 
				-        except requests.RequestException as e:

			
 
				-            print(f"Error calling Ollama API: {e}")

			
 
				-            return f"Error: Unable to get response from the model. {str(e)}"

			
 
				+system_prompt: str = "你是一個來自台灣的AI助理，你的名字是 TAIDE，樂於以台灣人的立場幫助使用者，會用繁體中文回答問題。"

			
 
				+

			
 
				+class OllamaChatModel(BaseChatModel):

			
 
				+    model_name: str = Field(default="taide-local")

			
 
				+

			
 
				+    def _generate(

			
 
				+            self,

			
 
				+            messages: List[BaseMessage],

			
 
				+            stop: Optional[List[str]] = None,

			
 
				+            run_manager: Optional[CallbackManagerForLLMRun] = None,

			
 
				+            **kwargs: Any,

			
 
				+    ) -> ChatResult:

			
 
				+        formatted_messages = []

			
 
				+        for msg in messages:

			
 
				+            if isinstance(msg, HumanMessage):

			
 
				+                formatted_messages.append({"role": "user", "content": msg.content})

			
 
				+            elif isinstance(msg, AIMessage):

			
 
				+                formatted_messages.append({"role": "assistant", "content": msg.content})

			
 
				+            elif isinstance(msg, SystemMessage):

			
 
				+                 formatted_messages.append({"role": "system", "content": msg.content})

			
 
				+

			
 
				+        prompt = f"<s>[INST] <<SYS>>\n{system_prompt}\n<</SYS>>\n\n"

			
 
				+        for msg in formatted_messages:

			
 
				+            if msg['role'] == 'user':

			
 
				+                prompt += f"{msg['content']} [/INST]"

			
 
				+            elif msg['role'] == "assistant":

			
 
				+                prompt += f"{msg['content']} </s><s>[INST]"

			
 
				+

			
 
				+        command = ["ollama", "run", self.model_name, prompt]

			
 
				+        result = subprocess.run(command, capture_output=True, text=True)

			
 
				+

			
 
				+        if result.returncode != 0:

			
 
				+            raise Exception(f"Ollama command failed: {result.stderr}")

			
 
				+        

			
 
				+        content = result.stdout.strip()

			
 
				 

			
 
				+        message = AIMessage(content=content)

			
 
				+        generation = ChatGeneration(message=message)

			
 
				+        return ChatResult(generations=[generation])

			
 
				+    

			
 
				     @property

			
 
				     def _llm_type(self) -> str:

			
 
				-        return "custom_taide"

			
 
				-

			
 
				-    def get_model_name(self):

			
 
				-        return self.model_name

			
 
				-

			
 
				-    @property

			
 
				-    def _identifying_params(self) -> Dict[str, Any]:

			
 
				-        return {"model_name": self.model_name}

			
 
				-

			
 
				-# Create an instance of the custom LLM

			
 
				-taide_llm = CustomTAIDELLM(api_url="http://localhost:11434/api/chat", model_name="taide-local")

			
 
				+        return "ollama-chat-model"

			
 
				+    

			
 
				+taide_llm = OllamaChatModel(model_name="taide-local")

			
 
				 

			
 
				-# 生成多個不同版本的問題，進行檢索，並返回答案和參考文檔

			
 
				 def multi_query(question, retriever, chat_history):

			
 
				-

			
 
				     def multi_query_chain():

			
 
				-        # Multi Query: Different Perspectives

			
 
				         template = """You are an AI language model assistant. Your task is to generate three 

			
 
				         different versions of the given user question to retrieve relevant documents from a vector 

			
 
				         database. By generating multiple perspectives on the user question, your goal is to help

			
@@ -123,27 +106,12 @@ def multi_query(question, retriever, chat_history):
 
				 

			
 
				         You must return original question also, which means that you return 1 original version + 3 different versions = 4 questions.

			
 
				         

			
 
				-        

			
 
				         Original question: {question}"""

			
 
				         prompt_perspectives = ChatPromptTemplate.from_template(template)

			
 
				 

			
 
				-        messages = [

			
 
				-            {"role": "system", "content": template},

			
 
				-            {"role": "user", "content": question},

			
 
				-        ]

			
 
				-        # generate_queries = interact_with_model(messages).split("\n")

			
 
				-

			
 
				-        

			
 
				-        # llm = ChatOpenAI(model="gpt-4-1106-preview")

			
 
				-        # llm = ChatOllama(model="llama3", num_gpu=1, temperature=0)

			
 
				-        # llm = ChatOllama(model="gemma2", temperature=0)

			
 
				-        # llm = ChatOllama(model=model)

			
 
				-

			
 
				-

			
 
				         generate_queries = (

			
 
				             prompt_perspectives 

			
 
				             | taide_llm

			
 
				-            # | llm

			
 
				             | StrOutputParser() 

			
 
				             | (lambda x: x.split("\n"))

			
 
				         )

			
@@ -151,14 +119,9 @@ def multi_query(question, retriever, chat_history):
 
				         return generate_queries

			
 
				 

			
 
				     def get_unique_union(documents: List[list]):

			
 
				-        """ Unique union of retrieved docs """

			
 
				-        # Flatten list of lists, and convert each Document to string

			
 
				         flattened_docs = [dumps(doc) for sublist in documents for doc in sublist]

			
 
				-        # Get unique documents

			
 
				         unique_docs = list(set(flattened_docs))

			
 
				-        # Return

			
 
				         return [loads(doc) for doc in unique_docs]

			
 
				-    

			
 
				 

			
 
				     _search_query = get_search_query()

			
 
				     modified_question = _search_query.invoke({"question":question, "chat_history": chat_history})

			
@@ -173,7 +136,6 @@ def multi_query(question, retriever, chat_history):
 
				 

			
 
				     return answer, docs

			
 
				 

			
 
				-# 根據檢索到的文檔和用戶問題生成最後答案

			
 
				 def multi_query_rag_prompt(retrieval_chain, question):

			
 
				     template = """Answer the following question based on this context:

			
 
				 

			
@@ -205,25 +167,8 @@ def multi_query_rag_prompt(retrieval_chain, question):
 
				     except Exception as e:

			
 
				         print(f"Error invoking rag_chain: {e}")

			
 
				         return "Error occurred while processing the question."

			
 
				-    

			
 
				-########################################################################################################################

			
 
				 

			
 
				-# 將聊天紀錄個跟進問題轉化為獨立問題

			
 
				 def get_search_query():

			
 
				-    # Condense a chat history and follow-up question into a standalone question

			
 
				-    # 

			
 
				-    # _template = """Given the following conversation and a follow up question, 

			
 
				-    # rephrase the follow up question to be a standalone question to help others understand the question without having to go back to the conversation transcript.

			
 
				-    # Generate standalone question in its original language.

			
 
				-    # Chat History:

			
 
				-    # {chat_history}

			
 
				-    # Follow Up Input: {question}

			
 
				-

			
 
				-    # Hint:

			
 
				-    # * Refer to chat history and add the subject to the question

			
 
				-    # * Replace the pronouns in the question with the correct person or thing, please refer to chat history

			
 
				-    

			
 
				-    # Standalone question:"""  # noqa: E501

			
 
				     _template = """Rewrite the following query by incorporating relevant context from the conversation history.

			
 
				     The rewritten query should:

			
 
				     

			
@@ -244,7 +189,7 @@ def get_search_query():
 
				     """

			
 
				     CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template)

			
 
				 

			
 
				-    def _format_chat_history(chat_history: List[Tuple[str, str]]) -> List:

			
 
				+    def _format_chat_history(chat_history: List[tuple[str, str]]) -> List:

			
 
				         buffer = []

			
 
				         for human, ai in chat_history:

			
 
				             buffer.append(HumanMessage(content=human))

			
@@ -252,11 +197,10 @@ def get_search_query():
 
				         return buffer

			
 
				 

			
 
				     _search_query = RunnableBranch(

			
 
				-        # If input includes chat_history, we condense it with the follow-up question

			
 
				         (

			
 
				             RunnableLambda(lambda x: bool(x.get("chat_history"))).with_config(

			
 
				                 run_name="HasChatHistoryCheck"

			
 
				-            ),  # Condense follow-up question and chat into a standalone_question

			
 
				+            ),

			
 
				             RunnablePassthrough.assign(

			
 
				                 chat_history=lambda x: _format_chat_history(x["chat_history"])

			
 
				             )

			
@@ -264,48 +208,31 @@ def get_search_query():
 
				             | ChatOpenAI()

			
 
				             | StrOutputParser(),

			
 
				         ),

			
 
				-        # Else, we have no chat history, so just pass through the question

			
 
				         RunnableLambda(lambda x : x["question"]),

			
 
				     )

			
 
				 

			
 
				     return _search_query

			
 
				-########################################################################################################################

			
 
				-# 檢索文檔並生成答案

			
 
				-def naive_rag(question, retriever):

			
 
				-    #### RETRIEVAL and GENERATION ####

			
 
				 

			
 
				-    # Prompt

			
 
				+def naive_rag(question, retriever):

			
 
				     prompt = hub.pull("rlm/rag-prompt")

			
 
				 

			
 
				-    # LLM

			
 
				-    # llm = ChatOpenAI(model_name="gpt-3.5-turbo")

			
 
				-

			
 
				-    # Post-processing

			
 
				     def format_docs(docs):

			
 
				         return "\n\n".join(doc.page_content for doc in docs)

			
 
				 

			
 
				     reference = retriever.get_relevant_documents(question)

			
 
				     

			
 
				-    # Chain

			
 
				     rag_chain = (

			
 
				         {"context": retriever | format_docs, "question": RunnablePassthrough()}

			
 
				         | prompt

			
 
				         | taide_llm

			
 
				-        # | llm

			
 
				         | StrOutputParser()

			
 
				     )

			
 
				 

			
 
				-    # Question

			
 
				     answer = rag_chain.invoke(question)

			
 
				 

			
 
				     return answer, reference

			
 
				-################################################################################################

			
 
				-# 處理question-answer pairs的檢索和生成答案

			
 
				-def naive_rag_for_qapairs(question, retriever):

			
 
				-    #### RETRIEVAL and GENERATION ####

			
 
				 

			
 
				-    # Prompt

			
 
				-    # prompt = hub.pull("rlm/rag-prompt")

			
 
				+def naive_rag_for_qapairs(question, retriever):

			
 
				     template = """You are an assistant for question-answering tasks. 

			
 
				     Use the following pieces of retrieved context to answer the question. 

			
 
				     Following retrieved context is question-answer pairs of historical QA, Find the suitable answer from the qa pairs

			
@@ -320,19 +247,13 @@ def naive_rag_for_qapairs(question, retriever):
 
				     """

			
 
				     prompt = PromptTemplate.from_template(template)

			
 
				 

			
 
				-    # LLM

			
 
				     llm = ChatOpenAI(model_name="gpt-4-0125-preview")

			
 
				-    # llm = ChatOllama(model="llama3", num_gpu=1, temperature=0)

			
 
				-    # llm = ChatOllama(model="gemma2", num_gpu=1, temperature=0)

			
 
				 

			
 
				-

			
 
				-    # Post-processing

			
 
				     def format_docs(docs):

			
 
				         return "\n\n".join(doc.page_content for doc in docs)

			
 
				 

			
 
				     reference = retriever.get_relevant_documents(question)

			
 
				     

			
 
				-    # Chain

			
 
				     rag_chain = (

			
 
				         {"context": retriever | format_docs, "question": RunnablePassthrough()}

			
 
				         | prompt

			
@@ -340,19 +261,16 @@ def naive_rag_for_qapairs(question, retriever):
 
				         | StrOutputParser()

			
 
				     )

			
 
				 

			
 
				-    # Question

			
 
				     answer = rag_chain.invoke(question)

			
 
				 

			
 
				     return answer, reference

			
 
				-########################################################################################################################

			
 
				 

			
 
				 def rag_score(question, ground_truth, answer, reference_docs):

			
 
				-    

			
 
				     datasets = {

			
 
				-              "question": [question],       # question: list[str]

			
 
				-              "answer": [answer],           # answer: list[str]

			
 
				-              "contexts": [reference_docs], # contexts: list[list[str]]

			
 
				-              "ground_truths": [[ground_truth]] # ground_truth: list[list[str]]

			
 
				+              "question": [question],

			
 
				+              "answer": [answer],

			
 
				+              "contexts": [reference_docs],

			
 
				+              "ground_truths": [[ground_truth]]

			
 
				             }

			
 
				     evalsets = Dataset.from_dict(datasets)

			
 
				 

			
@@ -371,9 +289,8 @@ def rag_score(question, ground_truth, answer, reference_docs):
 
				     result_df.to_csv('ragas_rag.csv')

			
 
				     return result

			
 
				 

			
 
				-

			
 
				 def print_current_model(llm):

			
 
				-    if isinstance(llm, CustomTAIDELLM):

			
 
				-        print(f"Currently using model: {llm.get_model_name()}")

			
 
				+    if isinstance(llm, OllamaChatModel):

			
 
				+        print(f"Currently using model: {llm.model_name}")

			
 
				     else:

			
 
				         pass
			
--- a/ragas_data_generation.py
+++ b/ragas_data_generation.py
@@ -1,29 +0,0 @@
 
				-from dotenv import load_dotenv
			
 
				-load_dotenv('environment.env')
			
 
				-
			
 
				-
			
 
				-from ragas.testset.generator import TestsetGenerator
			
 
				-from ragas.testset.evolutions import simple, reasoning, multi_context 
			
 
				-from langchain_openai import ChatOpenAi, OpenAIEmbeddings
			
 
				-from langchain_community.document_loaders import DirectoryLoader
			
 
				-from langchain_community.document_loaders import PyPDFLoader
			
 
				-
			
 
				-loader = DirectoryLoader("Documents")
			
 
				-for file in 
			
 
				-documents = loader.load()
			
 
				-
			
 
				-
			
 
				-for document in documents:
			
 
				-    document.metadata['filename'] = document.metadata['source']
			
 
				-
			
 
				-generator_llm = ChatOpenAi(model = "gpt-3.5-turbo-16k")
			
 
				-critic_llm = ChatOpenAI(model="gpt-4")
			
 
				-embeddings = OpenAIEmbeddings()
			
 
				-
			
 
				-generator = TestGenerator.from_langchain(
			
 
				-    generator_llm,
			
 
				-    critic_llm,
			
 
				-    embeddings
			
 
				-)
			
 
				-# Generate testset
			
 
				-testset = generator.generate_with_langchain_docs(documents, test_size=10, distributions={simple: 0.5, reasoning: 0.25, multi_context: 0.25})
			
--- a/run.sh
+++ b/run.sh
@@ -1,37 +1,6 @@
 
				 #!/bin/bash
			
 
				 
			
 
				-# Function to check if Docker is running
			
 
				-docker_running() {
			
 
				-    docker info >/dev/null 2>&1
			
 
				-}
			
 
				 
			
 
				-# Start Docker if it's not already running
			
 
				-if ! docker_running; then
			
 
				-    echo "Starting Docker..."
			
 
				-    open -a Docker
			
 
				-    
			
 
				-    # Wait for Docker to start
			
 
				-    while ! docker_running; do
			
 
				-        echo "Waiting for Docker to start..."
			
 
				-        sleep 5
			
 
				-    done
			
 
				-    echo "Docker is now running"
			
 
				-fi
			
 
				-
			
 
				-# Get the script directory
			
 
				-script_dir=$(dirname "$0")
			
 
				-cd "$script_dir"
			
 
				-
			
 
				-# Start the services defined in docker-compose.yml
			
 
				-echo "Starting services with Docker Compose..."
			
 
				-docker-compose up -d
			
 
				-echo "Waiting for services to start..."
			
 
				-sleep 20 
			
 
				-
			
 
				-# Change to the directory containing Python script
			
 
				-cd "$script_dir/systex-RAG-sherry"
			
 
				-echo "Running RAG application..."
			
 
				-python ollama_chat.py
			
 
				 
			
 
				 # 使脚本文件可执行：
			
 
				 # chmod +x run.sh
			
--- a/taide_rag.py
+++ b/taide_rag.py
@@ -0,0 +1,30 @@
 
				+from dotenv import load_dotenv
			
 
				+from langchain.vectorstores import Chroma
			
 
				+import os
			
 
				+load_dotenv('environment.env')
			
 
				+openai_api_key = os.getenv("OPENAI_API_KEY")
			
 
				+from RAG_strategy import taide_llm, multi_query, naive_rag
			
 
				+from langchain.vectorstores import FAISS
			
 
				+from langchain.embeddings import OpenAIEmbeddings
			
 
				+from langchain.document_loaders import TextLoader
			
 
				+from langchain.text_splitter import CharacterTextSplitter
			
 
				+
			
 
				+
			
 
				+
			
 
				+# Load and prepare a sample document
			
 
				+loader = TextLoader("test_data.txt")
			
 
				+documents = loader.load()
			
 
				+text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
			
 
				+docs = text_splitter.split_documents(documents)
			
 
				+
			
 
				+# Create a vector store
			
 
				+embeddings = OpenAIEmbeddings()
			
 
				+vectorstore = Chroma.from_documents(docs, embeddings)
			
 
				+retriever = vectorstore.as_retriever()
			
 
				+
			
 
				+# Test multi_query
			
 
				+print("\nTesting multi_query:")
			
 
				+question = "什麼是碳排放獎勵辦法？"
			
 
				+answer, docs = multi_query(question, retriever, [])
			
 
				+print(f"Question: {question}")
			
 
				+print(f"Answer: {answer}")
			
--- a/test_connection.py
+++ b/test_connection.py
@@ -1,102 +0,0 @@
 
				-# import os
			
 
				-# import sys
			
 
				-
			
 
				-# from supabase import create_client, Client
			
 
				-
			
 
				-# # # Load environment variables
			
 
				-# from dotenv import load_dotenv
			
 
				-# load_dotenv('environment.env')
			
 
				-
			
 
				-# # Get Supabase configuration from environment variables
			
 
				-# SUPABASE_URL = os.getenv("SUPABASE_URL")
			
 
				-# SUPABASE_KEY = os.getenv("SUPABASE_KEY")
			
 
				-# SUPABASE_URI = os.getenv("SUPABASE_URI")
			
 
				-# OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
			
 
				-
			
 
				-# # Check if environment variables are successfully loaded
			
 
				-# if not SUPABASE_URL or not SUPABASE_KEY or not OPENAI_API_KEY or not SUPABASE_URI:
			
 
				-#     print("Please ensure SUPABASE_URL, SUPABASE_KEY, and OPENAI_API_KEY are correctly set in the .env file.")
			
 
				-#     sys.exit(1)
			
 
				-# else:
			
 
				-#     print("Connection successful.")
			
 
				-#     try:
			
 
				-#         supabase: Client = create_client(SUPABASE_URL, SUPABASE_KEY)
			
 
				-#         print("Client created successfully.")
			
 
				-#     except Exception as e:
			
 
				-#         print("Client creation failed:", e)
			
 
				-#         sys.exit(1)
			
 
				-
			
 
				-# # List all table names
			
 
				-# try:
			
 
				-#     response = supabase.table('information_schema.tables').select('table_name').eq('table_schema', 'public').execute()
			
 
				-#     table_names = [table['table_name'] for table in response.data]
			
 
				-#     print("All table names:")
			
 
				-#     for name in table_names:
			
 
				-#         print(name)
			
 
				-# except Exception as e:
			
 
				-#     print("Connection failed:", e)
			
 
				-#     sys.exit(1)
			
 
				-
			
 
				-
			
 
				-# ### Test hugging face tokens for the TAIDE local model. ######################################################
			
 
				-# from transformers import AutoTokenizer, AutoModelForCausalLM
			
 
				-
			
 
				-# token = os.getenv("HF_API_KEY_7B4BIT")
			
 
				-
			
 
				-# # Check if the token is loaded correctly
			
 
				-# if token is None:
			
 
				-#     raise ValueError("Hugging Face API token is not set. Please check your environment.env file.")
			
 
				-
			
 
				-# # Load the tokenizer and model with the token
			
 
				-# try:
			
 
				-#     tokenizer = AutoTokenizer.from_pretrained("../TAIDE-LX-7B-Chat-4bit", token=token)  
			
 
				-#     model = AutoModelForCausalLM.from_pretrained("../TAIDE-LX-7B-Chat-4bit", token=token)
			
 
				-    
			
 
				-#     # Verify the model and tokenizer
			
 
				-#     print(f"Loaded tokenizer: {tokenizer.name_or_path}")
			
 
				-#     print(f"Loaded model: {model.name_or_path}")
			
 
				-
			
 
				-#     # Optional: Print model and tokenizer configuration for more details
			
 
				-#     print(f"Model configuration: {model.config}")
			
 
				-#     print(f"Tokenizer configuration: {tokenizer}")
			
 
				-
			
 
				-# except Exception as e:
			
 
				-#     print(f"Error loading model or tokenizer: {e}")
			
 
				-
			
 
				-#################################################################################################################
			
 
				-# import torch
			
 
				-# from transformers import AutoModelForCausalLM, AutoTokenizer
			
 
				-# from huggingface_hub import hf_hub_download
			
 
				-# from llama_cpp import Llama
			
 
				-
			
 
				-# ## Download the GGUF model
			
 
				-# model_name = "TheBloke/Mixtral-8x7B-Instruct-v0.1-GGUF"
			
 
				-# model_file = "mixtral-8x7b-instruct-v0.1.Q4_K_M.gguf" # this is the specific model file we'll use in this example. It's a 4-bit quant, but other levels of quantization are available in the model repo if preferred
			
 
				-# model_path = hf_hub_download(model_name, filename=model_file)
			
 
				-
			
 
				-
			
 
				-
			
 
				-
			
 
				-# import requests
			
 
				-
			
 
				-# def generate_response(input_text, max_length=512, temperature=0.7):
			
 
				-#     # URL to interact with the model
			
 
				-#     url = "http://localhost:11434/v1/chat/completions"  # Adjust based on how Ollama exposes the model
			
 
				-
			
 
				-#     # Payload to send to the model
			
 
				-#     payload = {
			
 
				-#         "input": input_text,
			
 
				-#         "parameters": {
			
 
				-#             "max_length": max_length,
			
 
				-#             "temperature": temperature
			
 
				-#         }
			
 
				-#     }
			
 
				-
			
 
				-#     # Make a request to the model
			
 
				-#     response = requests.post(url, json=payload)
			
 
				-#     return response.json()["output"]
			
 
				-
			
 
				-# if __name__ == "__main__":
			
 
				-#     input_text = "I believe the meaning of life is"
			
 
				-#     response = generate_response(input_text, max_length=128, temperature=0.5)
			
 
				-#     print(f"Model: {response}")