import faiss import numpy as np import json from time import time import asyncio from datasets import Dataset from typing import List from dotenv import load_dotenv import os import pickle from supabase.client import Client, create_client from langchain_openai import OpenAIEmbeddings, ChatOpenAI from langchain.prompts import ChatPromptTemplate from langchain_core.output_parsers import StrOutputParser import pandas as pd from langchain_core.documents import Document from langchain.load import dumps, loads from langchain_community.chat_models import ChatOllama from langchain.callbacks import get_openai_callback # Import from the parent directory import sys from RAG_strategy import multi_query_chain sys.path.append('..') # from RAG_strategy_Taide import taide_llm, system_prompt, multi_query system_prompt: str = "你是一個來自台灣的AI助理,你的名字是 TAIDE,樂於以台灣人的立場幫助使用者,會用繁體中文回答問題。" # llm = ChatOpenAI(model_name="gpt-4o-mini", temperature=0) from langchain.prompts import ChatPromptTemplate from langchain_core.output_parsers import StrOutputParser from file_loader.add_vectordb import GetVectorStore # from local_llm import ollama_, hf # # from local_llm import ollama_, taide_llm, hf # llm = hf() # llm = taide_llm # Import RAGAS metrics from ragas import evaluate from ragas.metrics import answer_relevancy, faithfulness, context_recall, context_precision # Load environment variables load_dotenv('../../.env') supabase_url = os.getenv("SUPABASE_URL") supabase_key = os.getenv("SUPABASE_KEY") openai_api_key = os.getenv("OPENAI_API_KEY") document_table = "documents2" # Initialize Supabase client supabase: Client = create_client(supabase_url, supabase_key) # Initialize embeddings and chat model embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key) def download_embeddings(): response = supabase.table(document_table).select("id, embedding, metadata, content").execute() # response = supabase.table(document_table).select("id, embedding, metadata, content").eq('metadata ->> source', 'supplement.docx').execute() embeddings = [] ids = [] metadatas = [] contents = [] for item in response.data: embedding = json.loads(item['embedding']) embeddings.append(embedding) ids.append(item['id']) metadatas.append(item['metadata']) contents.append(item['content']) return np.array(embeddings, dtype=np.float32), ids, metadatas, contents def create_faiss_index(embeddings): dimension = embeddings.shape[1] index = faiss.IndexFlatIP(dimension) # Use Inner Product for cosine similarity faiss.normalize_L2(embeddings) # Normalize embeddings for cosine similarity index.add(embeddings) return index def save_faiss_index(index, file_path): faiss.write_index(index, file_path) print(f"FAISS index saved to {file_path}") def load_faiss_index(file_path): if os.path.exists(file_path): index = faiss.read_index(file_path) print(f"FAISS index loaded from {file_path}") return index return None def save_metadata(ids, metadatas, contents, file_path): with open(file_path, 'wb') as f: pickle.dump((ids, metadatas, contents), f) print(f"Metadata saved to {file_path}") def load_metadata(file_path): if os.path.exists(file_path): with open(file_path, 'rb') as f: ids, metadatas, contents = pickle.load(f) print(f"Metadata loaded from {file_path}") return ids, metadatas, contents return None, None, None def search_faiss(index, query_vector, k=4): query_vector = np.array(query_vector, dtype=np.float32).reshape(1, -1) faiss.normalize_L2(query_vector) distances, indices = index.search(query_vector, k) return distances[0], indices[0] class FAISSRetriever: def __init__(self, index, ids, metadatas, contents, embeddings_model): self.index = index self.ids = ids self.metadatas = metadatas self.contents = contents self.embeddings_model = embeddings_model def get_relevant_documents(self, query: str, k: int = 4) -> List[Document]: query_vector = self.embeddings_model.embed_query(query) _, indices = search_faiss(self.index, query_vector, k=k) return [ Document(page_content=self.contents[i], metadata=self.metadatas[i]) for i in indices ] def map(self, query_list: List[list]) -> List[Document]: def get_unique_union(documents: List[list]): """ Unique union of retrieved docs """ # Flatten list of lists, and convert each Document to string flattened_docs = [dumps(doc) for sublist in documents for doc in sublist] # Get unique documents unique_docs = list(set(flattened_docs)) # Return return [loads(doc) for doc in unique_docs] documents = [] for query in query_list: if query != "": docs = self.get_relevant_documents(query) documents.extend(docs) return get_unique_union(documents) def load_qa_pairs(): # df = pd.read_csv("../QA_database_rows.csv") response = supabase.table('QA_database').select("Question, Answer").execute() df = pd.DataFrame(response.data) return df['Question'].tolist(), df['Answer'].tolist() def faiss_multiquery(question: str, retriever: FAISSRetriever, llm, k: int = 4): generate_queries = multi_query_chain(llm) questions = generate_queries.invoke(question) questions = [item for item in questions if item != ""] questions.append(question) for q in questions: print(q) # docs = list(map(retriever.get_relevant_documents, questions)) docs = list(map(lambda query: retriever.get_relevant_documents(query, k=k), questions)) docs = [item for sublist in docs for item in sublist] return docs def faiss_query(retriever, question: str, llm, k: int = 4, multi_query: bool = False) -> str: if multi_query: docs = faiss_multiquery(question, retriever, llm, k) # print(docs) else: docs = retriever.get_relevant_documents(question, k) # print(docs) context = docs system_prompt: str = "你是一個來自台灣的AI助理,樂於以台灣人的立場幫助使用者,會用繁體中文回答問題。" template = """ <|begin_of_text|> <|start_header_id|>system<|end_header_id|> 你是一個來自台灣的ESG的AI助理,請用繁體中文回答問題 \n You should not mention anything about "根據提供的文件內容" or other similar terms. 請盡可能的詳細回答問題。 如果你不知道答案請回答:"很抱歉,目前我無法回答您的問題,請將您的詢問發送至 test@systex.com 以便獲得更進一步的幫助,謝謝。" 勿回答無關資訊或任何與某特定公司相關的問題 <|eot_id|> <|start_header_id|>user<|end_header_id|> Answer the following question based on this context: {context} Question: {question} 用繁體中文回答問題,請用一段話詳細的回答。勿回答無關資訊或任何與某特定公司相關的問題。 如果你不知道答案請回答:"很抱歉,目前我無法回答您的問題,請將您的詢問發送至 test@systex.com 以便獲得更進一步的幫助,謝謝。" <|eot_id|> <|start_header_id|>assistant<|end_header_id|> """ prompt = ChatPromptTemplate.from_template( system_prompt + "\n\n" + template ) rag_chain = prompt | llm | StrOutputParser() return context, rag_chain.invoke({"context": context, "question": question}) def create_faiss_retriever(): faiss_index_path = "faiss_index.bin" metadata_path = "faiss_metadata.pkl" index = load_faiss_index(faiss_index_path) ids, metadatas, contents = load_metadata(metadata_path) if index is None or ids is None: print("FAISS index or metadata not found. Creating new index...") print("Downloading embeddings from Supabase...") embeddings_array, ids, metadatas, contents = download_embeddings() print("Creating FAISS index...") index = create_faiss_index(embeddings_array) save_faiss_index(index, faiss_index_path) save_metadata(ids, metadatas, contents, metadata_path) else: print("Using existing FAISS index and metadata.") print("Creating FAISS retriever...") faiss_retriever = FAISSRetriever(index, ids, metadatas, contents, embeddings) return faiss_retriever async def run_evaluation(): local_llm = "llama3-groq-tool-use:latest" llama3 = ChatOllama(model=local_llm, temperature=0) openai = ChatOpenAI(model_name="gpt-4o-mini", temperature=0) retriever = create_faiss_retriever() questions, ground_truths = load_qa_pairs() for question, ground_truth in zip(questions[:5], ground_truths[:5]): print(f"\nQuestion: {question}") start_time = time() llama3_docs, llama3_answer = faiss_query(retriever, question, llama3, multi_query=True) llama3_time = time() - start_time print(f"llama3 Answer: {llama3_answer}") print(f"llama3 Time: {llama3_time:.4f} seconds") llama3_datasets = { "question": [question], "answer": [llama3_answer], "contexts": [[doc.page_content for doc in llama3_docs]], "ground_truth": [ground_truth] } llama3_evalsets = Dataset.from_dict(llama3_datasets) llama3_result = evaluate( llama3_evalsets, metrics=[ context_precision, faithfulness, answer_relevancy, context_recall, ], ) print("llama3 RAGAS Evaluation:") llama3_result['time'] = llama3_time df = llama3_result.to_pandas() print(df) df.to_csv("llama.csv", mode='a') ############################################################# start_time = time() openai_docs, openai_answer = faiss_query(retriever, question, openai, multi_query=True) openai_time = time() - start_time print(f"openai Answer: {openai_answer}") print(f"openai Time: {openai_time:.4f} seconds") openai_datasets = { "question": [question], "answer": [openai_answer], "contexts": [[doc.page_content for doc in openai_docs]], "ground_truth": [ground_truth] } openai_evalsets = Dataset.from_dict(openai_datasets) openai_result = evaluate( openai_evalsets, metrics=[ context_precision, faithfulness, answer_relevancy, context_recall, ], ) print("openai RAGAS Evaluation:") openai_result['time'] = llama3_time df = openai_result.to_pandas() print(df) df.to_csv("openai.csv", mode='a') print("\nPerformance comparison complete.") def load_kg_q(): import pandas as pd sheet_id ="1eVmO8fIjjtEQBlmqtipi2d0H-8VgJkI-icqhSTfdhHQ" gid = "0" df = pd.read_csv(f"https://docs.google.com/spreadsheets/d/{sheet_id}/export?format=csv&gid={gid}") return df['Question'].tolist() async def run_q_batch(): # local_llm = "llama3-groq-tool-use:latest" # llama3 = ChatOllama(model=local_llm, temperature=0) openai = ChatOpenAI(model_name="gpt-4o-mini", temperature=0) retriever = create_faiss_retriever() questions = load_kg_q() result_list = [] for question in questions: print(f"\nQuestion: {question}") with get_openai_callback() as cb: start_time = time() # openai_docs, openai_answer = faiss_query(retriever, question, openai, multi_query=True) documents, answer = faiss_query(retriever, question, openai, multi_query=True) openai_time = time() - start_time print(f"Answer: {answer}") print(f"Time: {openai_time:.4f} seconds") save_history(question, answer, cb, openai_time) result = {'Question': question, 'Answer': answer} result_list.append(result) df = pd.DataFrame.from_records(result_list) print(df) df.to_csv("kg_qa.csv", mode='w') print("\nQA complete.") def save_history(question, answer, cb, processing_time): # reference = [doc.dict() for doc in reference] record = { 'Question': question, 'Answer': answer, 'Total_Tokens': cb.total_tokens, 'Total_Cost': cb.total_cost, 'Processing_time': processing_time, } response = ( supabase.table("agent_records") .insert(record) .execute() ) if __name__ == "__main__": # asyncio.run(run_evaluation()) asyncio.run(run_q_batch()) # print(load_kg_q())