123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372 |
- import faiss
- import numpy as np
- import json
- from time import time
- import asyncio
- from datasets import Dataset
- from typing import List
- from dotenv import load_dotenv
- import os
- import pickle
- from supabase.client import Client, create_client
- from langchain_openai import OpenAIEmbeddings, ChatOpenAI
- from langchain.prompts import ChatPromptTemplate
- from langchain_core.output_parsers import StrOutputParser
- import pandas as pd
- from langchain_core.documents import Document
- from langchain.load import dumps, loads
- from langchain_community.chat_models import ChatOllama
- from langchain.callbacks import get_openai_callback
- # Import from the parent directory
- import sys
- from RAG_strategy import multi_query_chain
- sys.path.append('..')
- # from RAG_strategy_Taide import taide_llm, system_prompt, multi_query
- system_prompt: str = "你是一個來自台灣的AI助理,你的名字是 TAIDE,樂於以台灣人的立場幫助使用者,會用繁體中文回答問題。"
- # llm = ChatOpenAI(model_name="gpt-4o-mini", temperature=0)
- from langchain.prompts import ChatPromptTemplate
- from langchain_core.output_parsers import StrOutputParser
- from file_loader.add_vectordb import GetVectorStore
- # from local_llm import ollama_, hf
- # # from local_llm import ollama_, taide_llm, hf
- # llm = hf()
- # llm = taide_llm
- # Import RAGAS metrics
- from ragas import evaluate
- from ragas.metrics import answer_relevancy, faithfulness, context_recall, context_precision
- # Load environment variables
- load_dotenv('../../.env')
- supabase_url = os.getenv("SUPABASE_URL")
- supabase_key = os.getenv("SUPABASE_KEY")
- openai_api_key = os.getenv("OPENAI_API_KEY")
- document_table = "documents2"
- # Initialize Supabase client
- supabase: Client = create_client(supabase_url, supabase_key)
- # Initialize embeddings and chat model
- embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)
- def download_embeddings():
- response = supabase.table(document_table).select("id, embedding, metadata, content").execute()
- # response = supabase.table(document_table).select("id, embedding, metadata, content").eq('metadata ->> source', 'supplement.docx').execute()
- embeddings = []
- ids = []
- metadatas = []
- contents = []
- for item in response.data:
- embedding = json.loads(item['embedding'])
- embeddings.append(embedding)
- ids.append(item['id'])
- metadatas.append(item['metadata'])
- contents.append(item['content'])
- return np.array(embeddings, dtype=np.float32), ids, metadatas, contents
- def create_faiss_index(embeddings):
- dimension = embeddings.shape[1]
- index = faiss.IndexFlatIP(dimension) # Use Inner Product for cosine similarity
- faiss.normalize_L2(embeddings) # Normalize embeddings for cosine similarity
- index.add(embeddings)
- return index
- def save_faiss_index(index, file_path):
- faiss.write_index(index, file_path)
- print(f"FAISS index saved to {file_path}")
- def load_faiss_index(file_path):
- if os.path.exists(file_path):
- index = faiss.read_index(file_path)
- print(f"FAISS index loaded from {file_path}")
- return index
- return None
- def save_metadata(ids, metadatas, contents, file_path):
- with open(file_path, 'wb') as f:
- pickle.dump((ids, metadatas, contents), f)
- print(f"Metadata saved to {file_path}")
- def load_metadata(file_path):
- if os.path.exists(file_path):
- with open(file_path, 'rb') as f:
- ids, metadatas, contents = pickle.load(f)
- print(f"Metadata loaded from {file_path}")
- return ids, metadatas, contents
- return None, None, None
- def search_faiss(index, query_vector, k=4):
- query_vector = np.array(query_vector, dtype=np.float32).reshape(1, -1)
- faiss.normalize_L2(query_vector)
- distances, indices = index.search(query_vector, k)
- return distances[0], indices[0]
- class FAISSRetriever:
- def __init__(self, index, ids, metadatas, contents, embeddings_model):
- self.index = index
- self.ids = ids
- self.metadatas = metadatas
- self.contents = contents
- self.embeddings_model = embeddings_model
- def get_relevant_documents(self, query: str, k: int = 4) -> List[Document]:
- query_vector = self.embeddings_model.embed_query(query)
- _, indices = search_faiss(self.index, query_vector, k=k)
- return [
- Document(page_content=self.contents[i], metadata=self.metadatas[i])
- for i in indices
- ]
- def map(self, query_list: List[list]) -> List[Document]:
- def get_unique_union(documents: List[list]):
- """ Unique union of retrieved docs """
- # Flatten list of lists, and convert each Document to string
- flattened_docs = [dumps(doc) for sublist in documents for doc in sublist]
- # Get unique documents
- unique_docs = list(set(flattened_docs))
- # Return
- return [loads(doc) for doc in unique_docs]
-
- documents = []
- for query in query_list:
- if query != "":
- docs = self.get_relevant_documents(query)
- documents.extend(docs)
- return get_unique_union(documents)
- def load_qa_pairs():
- # df = pd.read_csv("../QA_database_rows.csv")
- response = supabase.table('QA_database').select("Question, Answer").execute()
- df = pd.DataFrame(response.data)
- return df['Question'].tolist(), df['Answer'].tolist()
- def faiss_multiquery(question: str, retriever: FAISSRetriever, llm, k: int = 4):
- generate_queries = multi_query_chain(llm)
- questions = generate_queries.invoke(question)
- questions = [item for item in questions if item != ""]
- questions.append(question)
- for q in questions:
- print(q)
- # docs = list(map(retriever.get_relevant_documents, questions))
- docs = list(map(lambda query: retriever.get_relevant_documents(query, k=k), questions))
- docs = [item for sublist in docs for item in sublist]
- return docs
- def faiss_query(retriever, question: str, llm, k: int = 4, multi_query: bool = False) -> str:
- if multi_query:
- docs = faiss_multiquery(question, retriever, llm, k)
- # print(docs)
- else:
- docs = retriever.get_relevant_documents(question, k)
- # print(docs)
- context = docs
-
- system_prompt: str = "你是一個來自台灣的AI助理,樂於以台灣人的立場幫助使用者,會用繁體中文回答問題。"
- template = """
- <|begin_of_text|>
-
- <|start_header_id|>system<|end_header_id|>
- 你是一個來自台灣的ESG的AI助理,請用繁體中文回答問題 \n
- You should not mention anything about "根據提供的文件內容" or other similar terms.
- 請盡可能的詳細回答問題。
- 如果你不知道答案請回答:"很抱歉,目前我無法回答您的問題,請將您的詢問發送至 test@systex.com 以便獲得更進一步的幫助,謝謝。"
- 勿回答無關資訊或任何與某特定公司相關的問題
- <|eot_id|>
-
- <|start_header_id|>user<|end_header_id|>
- Answer the following question based on this context:
- {context}
- Question: {question}
- 用繁體中文回答問題,請用一段話詳細的回答。勿回答無關資訊或任何與某特定公司相關的問題。
- 如果你不知道答案請回答:"很抱歉,目前我無法回答您的問題,請將您的詢問發送至 test@systex.com 以便獲得更進一步的幫助,謝謝。"
-
- <|eot_id|>
-
- <|start_header_id|>assistant<|end_header_id|>
- """
- prompt = ChatPromptTemplate.from_template(
- system_prompt + "\n\n" +
- template
- )
- rag_chain = prompt | llm | StrOutputParser()
- return context, rag_chain.invoke({"context": context, "question": question})
- def create_faiss_retriever():
- faiss_index_path = "faiss_index.bin"
- metadata_path = "faiss_metadata.pkl"
- index = load_faiss_index(faiss_index_path)
- ids, metadatas, contents = load_metadata(metadata_path)
- if index is None or ids is None:
- print("FAISS index or metadata not found. Creating new index...")
- print("Downloading embeddings from Supabase...")
- embeddings_array, ids, metadatas, contents = download_embeddings()
- print("Creating FAISS index...")
- index = create_faiss_index(embeddings_array)
- save_faiss_index(index, faiss_index_path)
- save_metadata(ids, metadatas, contents, metadata_path)
- else:
- print("Using existing FAISS index and metadata.")
- print("Creating FAISS retriever...")
- faiss_retriever = FAISSRetriever(index, ids, metadatas, contents, embeddings)
- return faiss_retriever
- async def run_evaluation():
- local_llm = "llama3-groq-tool-use:latest"
- llama3 = ChatOllama(model=local_llm, temperature=0)
- openai = ChatOpenAI(model_name="gpt-4o-mini", temperature=0)
- retriever = create_faiss_retriever()
- questions, ground_truths = load_qa_pairs()
- for question, ground_truth in zip(questions[:5], ground_truths[:5]):
- print(f"\nQuestion: {question}")
- start_time = time()
- llama3_docs, llama3_answer = faiss_query(retriever, question, llama3, multi_query=True)
- llama3_time = time() - start_time
- print(f"llama3 Answer: {llama3_answer}")
- print(f"llama3 Time: {llama3_time:.4f} seconds")
- llama3_datasets = {
- "question": [question],
- "answer": [llama3_answer],
- "contexts": [[doc.page_content for doc in llama3_docs]],
- "ground_truth": [ground_truth]
- }
- llama3_evalsets = Dataset.from_dict(llama3_datasets)
- llama3_result = evaluate(
- llama3_evalsets,
- metrics=[
- context_precision,
- faithfulness,
- answer_relevancy,
- context_recall,
- ],
- )
- print("llama3 RAGAS Evaluation:")
- llama3_result['time'] = llama3_time
- df = llama3_result.to_pandas()
- print(df)
-
- df.to_csv("llama.csv", mode='a')
- #############################################################
- start_time = time()
- openai_docs, openai_answer = faiss_query(retriever, question, openai, multi_query=True)
- openai_time = time() - start_time
- print(f"openai Answer: {openai_answer}")
- print(f"openai Time: {openai_time:.4f} seconds")
- openai_datasets = {
- "question": [question],
- "answer": [openai_answer],
- "contexts": [[doc.page_content for doc in openai_docs]],
- "ground_truth": [ground_truth]
- }
- openai_evalsets = Dataset.from_dict(openai_datasets)
- openai_result = evaluate(
- openai_evalsets,
- metrics=[
- context_precision,
- faithfulness,
- answer_relevancy,
- context_recall,
- ],
- )
- print("openai RAGAS Evaluation:")
- openai_result['time'] = llama3_time
- df = openai_result.to_pandas()
- print(df)
-
- df.to_csv("openai.csv", mode='a')
- print("\nPerformance comparison complete.")
- def load_kg_q():
- import pandas as pd
- sheet_id ="1eVmO8fIjjtEQBlmqtipi2d0H-8VgJkI-icqhSTfdhHQ"
- gid = "0"
- df = pd.read_csv(f"https://docs.google.com/spreadsheets/d/{sheet_id}/export?format=csv&gid={gid}")
-
- return df['Question'].tolist()
-
- async def run_q_batch():
- # local_llm = "llama3-groq-tool-use:latest"
- # llama3 = ChatOllama(model=local_llm, temperature=0)
- openai = ChatOpenAI(model_name="gpt-4o-mini", temperature=0)
- retriever = create_faiss_retriever()
- questions = load_kg_q()
- result_list = []
- for question in questions:
- print(f"\nQuestion: {question}")
- with get_openai_callback() as cb:
- start_time = time()
- # openai_docs, openai_answer = faiss_query(retriever, question, openai, multi_query=True)
- documents, answer = faiss_query(retriever, question, openai, multi_query=True)
- openai_time = time() - start_time
- print(f"Answer: {answer}")
- print(f"Time: {openai_time:.4f} seconds")
- save_history(question, answer, cb, openai_time)
-
- result = {'Question': question, 'Answer': answer}
- result_list.append(result)
- df = pd.DataFrame.from_records(result_list)
- print(df)
-
- df.to_csv("kg_qa.csv", mode='w')
- print("\nQA complete.")
-
- def save_history(question, answer, cb, processing_time):
- # reference = [doc.dict() for doc in reference]
- record = {
- 'Question': question,
- 'Answer': answer,
- 'Total_Tokens': cb.total_tokens,
- 'Total_Cost': cb.total_cost,
- 'Processing_time': processing_time,
- }
- response = (
- supabase.table("agent_records")
- .insert(record)
- .execute()
- )
- if __name__ == "__main__":
- # asyncio.run(run_evaluation())
- asyncio.run(run_q_batch())
- # print(load_kg_q())
-
|