|
@@ -30,20 +30,53 @@ from ragas.metrics import (
|
|
context_precision,
|
|
context_precision,
|
|
)
|
|
)
|
|
from typing import List
|
|
from typing import List
|
|
|
|
+import os
|
|
from dotenv import load_dotenv
|
|
from dotenv import load_dotenv
|
|
-load_dotenv()
|
|
|
|
|
|
+load_dotenv('environment.env')
|
|
|
|
|
|
########################################################################################################################
|
|
########################################################################################################################
|
|
########################################################################################################################
|
|
########################################################################################################################
|
|
from langchain.cache import SQLiteCache
|
|
from langchain.cache import SQLiteCache
|
|
-
|
|
|
|
from langchain.cache import RedisSemanticCache
|
|
from langchain.cache import RedisSemanticCache
|
|
from langchain_openai import OpenAIEmbeddings
|
|
from langchain_openai import OpenAIEmbeddings
|
|
-# set_llm_cache(SQLiteCache(database_path=".langchain.db"))
|
|
|
|
-set_llm_cache(RedisSemanticCache(redis_url="redis://localhost:6380", embedding=OpenAIEmbeddings(), score_threshold=0.0005))
|
|
|
|
|
|
+from langchain.globals import set_llm_cache
|
|
|
|
+
|
|
########################################################################################################################
|
|
########################################################################################################################
|
|
|
|
+import requests
|
|
|
|
+import openai
|
|
|
|
+openai_api_key = os.getenv("OPENAI_API_KEY")
|
|
|
|
+openai.api_key = openai_api_key
|
|
|
|
+URI = os.getenv("SUPABASE_URI")
|
|
|
|
+
|
|
|
|
+# 設置緩存,以減少對API的重複請求。使用Redis
|
|
|
|
+# set_llm_cache(SQLiteCache(database_path=".langchain.db"))
|
|
|
|
+# set_llm_cache(RedisSemanticCache(redis_url="redis://localhost:6380", embedding=OpenAIEmbeddings(openai_api_key=openai_api_key), score_threshold=0.0005))
|
|
|
|
+
|
|
|
|
+# # TAIDE model on Ollama https://ollama.com/jcai/llama3-taide-lx-8b-chat-alpha1
|
|
|
|
+# def interact_with_model(messages, api_url="http://localhost:11434/v1/chat/completions"):
|
|
|
|
+# print("Using model: TAIDE")
|
|
|
|
+# response = requests.post(api_url, json={"model": "jcai/llama3-taide-lx-8b-chat-alpha1:Q4_K_M", "messages": messages})
|
|
|
|
+# return response.json()["choices"][0]["message"]["content"]
|
|
|
|
+
|
|
|
|
+# class CustomTAIDELLM(LLM):
|
|
|
|
+# api_url: str = "http://localhost:11434/v1/chat/completions"
|
|
|
|
+
|
|
|
|
+# def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
|
|
|
|
+# messages = [{"role": "user", "content": prompt}]
|
|
|
|
+# response = requests.post(self.api_url, json={
|
|
|
|
+# "model": "taide-local", # Use your local model name
|
|
|
|
+# "messages": messages
|
|
|
|
+# })
|
|
|
|
+# return response.json()["choices"][0]["message"]["content"]
|
|
|
|
+
|
|
|
|
+# @property
|
|
|
|
+# def _llm_type(self) -> str:
|
|
|
|
+# return "custom_taide"
|
|
|
|
|
|
|
|
+# # Create an instance of the custom LLM
|
|
|
|
+# taide_llm = CustomTAIDELLM()
|
|
|
|
|
|
|
|
+# 生成多個不同版本的問題,進行檢索,並返回答案和參考文檔
|
|
def multi_query(question, retriever, chat_history):
|
|
def multi_query(question, retriever, chat_history):
|
|
|
|
|
|
def multi_query_chain():
|
|
def multi_query_chain():
|
|
@@ -60,9 +93,18 @@ def multi_query(question, retriever, chat_history):
|
|
Original question: {question}"""
|
|
Original question: {question}"""
|
|
prompt_perspectives = ChatPromptTemplate.from_template(template)
|
|
prompt_perspectives = ChatPromptTemplate.from_template(template)
|
|
|
|
|
|
|
|
+ messages = [
|
|
|
|
+ {"role": "system", "content": template},
|
|
|
|
+ {"role": "user", "content": question},
|
|
|
|
+ ]
|
|
|
|
+ # generate_queries = interact_with_model(messages).split("\n")
|
|
|
|
+
|
|
|
|
|
|
- llm = ChatOpenAI(temperature=0, model="gpt-4-1106-preview")
|
|
|
|
|
|
+ llm = ChatOpenAI(model="gpt-4-1106-preview")
|
|
# llm = ChatOllama(model="llama3", num_gpu=1, temperature=0)
|
|
# llm = ChatOllama(model="llama3", num_gpu=1, temperature=0)
|
|
|
|
+ # llm = ChatOllama(model="gemma2", temperature=0)
|
|
|
|
+ # llm = ChatOllama(model=model)
|
|
|
|
+
|
|
|
|
|
|
generate_queries = (
|
|
generate_queries = (
|
|
prompt_perspectives
|
|
prompt_perspectives
|
|
@@ -96,6 +138,7 @@ def multi_query(question, retriever, chat_history):
|
|
|
|
|
|
return answer, docs
|
|
return answer, docs
|
|
|
|
|
|
|
|
+# 根據檢索到的文檔和用戶問題生成最後答案
|
|
def multi_query_rag_prompt(retrieval_chain, question):
|
|
def multi_query_rag_prompt(retrieval_chain, question):
|
|
# RAG
|
|
# RAG
|
|
template = """Answer the following question based on this context:
|
|
template = """Answer the following question based on this context:
|
|
@@ -103,16 +146,20 @@ def multi_query_rag_prompt(retrieval_chain, question):
|
|
{context}
|
|
{context}
|
|
|
|
|
|
Question: {question}
|
|
Question: {question}
|
|
- Output in user's language. If the question is in zh-tw, then the output will be in zh-tw. \n
|
|
|
|
|
|
+ Output in user's language. If the question is in zh-tw, then the output will be in zh-tw. If the question is in English, then the output will be in English\n
|
|
You should not mention anything about "根據提供的文件內容" or other similar terms.
|
|
You should not mention anything about "根據提供的文件內容" or other similar terms.
|
|
- If you don't know the answer, just say that "很抱歉,目前我無法回答您的問題,請將您的詢問發送至 test@systex.com 以便獲得更進一步的幫助,謝謝。"
|
|
|
|
|
|
+ If you don't know the answer, just say that "很抱歉,目前我無法回答您的問題,請將您的詢問發送至 test@email.com 以便獲得更進一步的幫助,謝謝。I'm sorry I cannot answer your question. Please send your question to test@email.com for further assistance. Thank you."
|
|
"""
|
|
"""
|
|
|
|
|
|
prompt = ChatPromptTemplate.from_template(template)
|
|
prompt = ChatPromptTemplate.from_template(template)
|
|
|
|
+ context = retrieval_chain.invoke({"question": question}) # Ensure this returns the context
|
|
|
|
+
|
|
|
|
|
|
# llm = ChatOpenAI(temperature=0)
|
|
# llm = ChatOpenAI(temperature=0)
|
|
- llm = ChatOpenAI(temperature=0, model="gpt-4-1106-preview")
|
|
|
|
|
|
+ llm = ChatOpenAI(model="gpt-4-1106-preview")
|
|
# llm = ChatOllama(model="llama3", num_gpu=1, temperature=0)
|
|
# llm = ChatOllama(model="llama3", num_gpu=1, temperature=0)
|
|
|
|
+ # llm = ChatOllama(model="gemma2", temperature=0)
|
|
|
|
+
|
|
|
|
|
|
final_rag_chain = (
|
|
final_rag_chain = (
|
|
{"context": retrieval_chain,
|
|
{"context": retrieval_chain,
|
|
@@ -121,8 +168,13 @@ def multi_query_rag_prompt(retrieval_chain, question):
|
|
| llm
|
|
| llm
|
|
| StrOutputParser()
|
|
| StrOutputParser()
|
|
)
|
|
)
|
|
-
|
|
|
|
- # answer = final_rag_chain.invoke({"question":question})
|
|
|
|
|
|
+ messages = [
|
|
|
|
+ {"role": "system", "content": template},
|
|
|
|
+ {"role": "user", "content": question},
|
|
|
|
+ {"role": "assistant", "content": context}
|
|
|
|
+ ]
|
|
|
|
+ # answer = interact_with_model(messages)
|
|
|
|
+ answer = final_rag_chain.invoke({"question":question})
|
|
|
|
|
|
answer = ""
|
|
answer = ""
|
|
for text in final_rag_chain.stream({"question":question}):
|
|
for text in final_rag_chain.stream({"question":question}):
|
|
@@ -133,6 +185,7 @@ def multi_query_rag_prompt(retrieval_chain, question):
|
|
return answer
|
|
return answer
|
|
########################################################################################################################
|
|
########################################################################################################################
|
|
|
|
|
|
|
|
+# 將聊天紀錄個跟進問題轉化為獨立問題
|
|
def get_search_query():
|
|
def get_search_query():
|
|
# Condense a chat history and follow-up question into a standalone question
|
|
# Condense a chat history and follow-up question into a standalone question
|
|
#
|
|
#
|
|
@@ -185,7 +238,7 @@ def get_search_query():
|
|
chat_history=lambda x: _format_chat_history(x["chat_history"])
|
|
chat_history=lambda x: _format_chat_history(x["chat_history"])
|
|
)
|
|
)
|
|
| CONDENSE_QUESTION_PROMPT
|
|
| CONDENSE_QUESTION_PROMPT
|
|
- | ChatOpenAI(temperature=0)
|
|
|
|
|
|
+ | ChatOpenAI()
|
|
| StrOutputParser(),
|
|
| StrOutputParser(),
|
|
),
|
|
),
|
|
# Else, we have no chat history, so just pass through the question
|
|
# Else, we have no chat history, so just pass through the question
|
|
@@ -194,6 +247,7 @@ def get_search_query():
|
|
|
|
|
|
return _search_query
|
|
return _search_query
|
|
########################################################################################################################
|
|
########################################################################################################################
|
|
|
|
+# 檢索文檔並生成答案
|
|
def naive_rag(question, retriever):
|
|
def naive_rag(question, retriever):
|
|
#### RETRIEVAL and GENERATION ####
|
|
#### RETRIEVAL and GENERATION ####
|
|
|
|
|
|
@@ -201,7 +255,7 @@ def naive_rag(question, retriever):
|
|
prompt = hub.pull("rlm/rag-prompt")
|
|
prompt = hub.pull("rlm/rag-prompt")
|
|
|
|
|
|
# LLM
|
|
# LLM
|
|
- llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)
|
|
|
|
|
|
+ llm = ChatOpenAI(model_name="gpt-3.5-turbo")
|
|
|
|
|
|
# Post-processing
|
|
# Post-processing
|
|
def format_docs(docs):
|
|
def format_docs(docs):
|
|
@@ -222,6 +276,7 @@ def naive_rag(question, retriever):
|
|
|
|
|
|
return answer, reference
|
|
return answer, reference
|
|
################################################################################################
|
|
################################################################################################
|
|
|
|
+# 處理question-answer pairs的檢索和生成答案
|
|
def naive_rag_for_qapairs(question, retriever):
|
|
def naive_rag_for_qapairs(question, retriever):
|
|
#### RETRIEVAL and GENERATION ####
|
|
#### RETRIEVAL and GENERATION ####
|
|
|
|
|
|
@@ -242,8 +297,10 @@ def naive_rag_for_qapairs(question, retriever):
|
|
prompt = PromptTemplate.from_template(template)
|
|
prompt = PromptTemplate.from_template(template)
|
|
|
|
|
|
# LLM
|
|
# LLM
|
|
- llm = ChatOpenAI(model_name="gpt-4-0125-preview", temperature=0)
|
|
|
|
|
|
+ llm = ChatOpenAI(model_name="gpt-4-0125-preview")
|
|
# llm = ChatOllama(model="llama3", num_gpu=1, temperature=0)
|
|
# llm = ChatOllama(model="llama3", num_gpu=1, temperature=0)
|
|
|
|
+ # llm = ChatOllama(model="gemma2", num_gpu=1, temperature=0)
|
|
|
|
+
|
|
|
|
|
|
# Post-processing
|
|
# Post-processing
|
|
def format_docs(docs):
|
|
def format_docs(docs):
|
|
@@ -285,4 +342,7 @@ def rag_score(question, ground_truth, answer, reference_docs):
|
|
],
|
|
],
|
|
)
|
|
)
|
|
|
|
|
|
- return result
|
|
|
|
|
|
+ result_df = result.to_pandas()
|
|
|
|
+ print(result_df.head())
|
|
|
|
+ result_df.to_csv('ragas_rag.csv')
|
|
|
|
+ return result
|