sherry
/
Systex


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348
							from langchain.prompts import ChatPromptTemplate
from langchain.load import dumps, loads
from langchain_core.output_parsers import StrOutputParser
from langchain_openai import ChatOpenAI
from langchain_community.llms import Ollama
from langchain_community.chat_models import ChatOllama
from operator import itemgetter
from langchain_core.runnables import RunnablePassthrough
from langchain import hub
from langchain.globals import set_llm_cache
from langchain import PromptTemplate


from langchain_core.runnables import (
    RunnableBranch,
    RunnableLambda,
    RunnableParallel,
    RunnablePassthrough,
)
from typing import Tuple, List, Optional
from langchain_core.messages import AIMessage, HumanMessage


from datasets import Dataset 
from ragas import evaluate
from ragas.metrics import (
    answer_relevancy,
    faithfulness,
    context_recall,
    context_precision,
)
from typing import List
import os
from dotenv import load_dotenv
load_dotenv('environment.env')

########################################################################################################################
########################################################################################################################
from langchain.cache import SQLiteCache
from langchain.cache import RedisSemanticCache
from langchain_openai import OpenAIEmbeddings
from langchain.globals import set_llm_cache

########################################################################################################################
import requests
import openai
openai_api_key = os.getenv("OPENAI_API_KEY")
openai.api_key = openai_api_key
URI = os.getenv("SUPABASE_URI")

# 設置緩存，以減少對API的重複請求。使用Redis
# set_llm_cache(SQLiteCache(database_path=".langchain.db"))
# set_llm_cache(RedisSemanticCache(redis_url="redis://localhost:6380", embedding=OpenAIEmbeddings(openai_api_key=openai_api_key), score_threshold=0.0005))

# # TAIDE model on Ollama https://ollama.com/jcai/llama3-taide-lx-8b-chat-alpha1
# def interact_with_model(messages, api_url="http://localhost:11434/v1/chat/completions"):
#     print("Using model: TAIDE")
#     response = requests.post(api_url, json={"model": "jcai/llama3-taide-lx-8b-chat-alpha1:Q4_K_M", "messages": messages})
#     return response.json()["choices"][0]["message"]["content"]

# class CustomTAIDELLM(LLM):
#     api_url: str = "http://localhost:11434/v1/chat/completions"
    
#     def _call(self, prompt: str, stop: Optional[List[str]] = None) -> str:
#         messages = [{"role": "user", "content": prompt}]
#         response = requests.post(self.api_url, json={
#             "model": "taide-local",  # Use your local model name
#             "messages": messages
#         })
#         return response.json()["choices"][0]["message"]["content"]
    
#     @property
#     def _llm_type(self) -> str:
#         return "custom_taide"

# # Create an instance of the custom LLM
# taide_llm = CustomTAIDELLM()

# 生成多個不同版本的問題，進行檢索，並返回答案和參考文檔
def multi_query(question, retriever, chat_history):

    def multi_query_chain():
        # Multi Query: Different Perspectives
        template = """You are an AI language model assistant. Your task is to generate three 
        different versions of the given user question to retrieve relevant documents from a vector 
        database. By generating multiple perspectives on the user question, your goal is to help
        the user overcome some of the limitations of the distance-based similarity search. 
        Provide these alternative questions separated by newlines. 

        You must return original question also, which means that you return 1 original version + 3 different versions = 4 questions.
        
        
        Original question: {question}"""
        prompt_perspectives = ChatPromptTemplate.from_template(template)

        messages = [
            {"role": "system", "content": template},
            {"role": "user", "content": question},
        ]
        # generate_queries = interact_with_model(messages).split("\n")

        
        llm = ChatOpenAI(model="gpt-4-1106-preview")
        # llm = ChatOllama(model="llama3", num_gpu=1, temperature=0)
        # llm = ChatOllama(model="gemma2", temperature=0)
        # llm = ChatOllama(model=model)


        generate_queries = (
            prompt_perspectives 
            | llm
            | StrOutputParser() 
            | (lambda x: x.split("\n"))
        )

        return generate_queries

    def get_unique_union(documents: List[list]):
        """ Unique union of retrieved docs """
        # Flatten list of lists, and convert each Document to string
        flattened_docs = [dumps(doc) for sublist in documents for doc in sublist]
        # Get unique documents
        unique_docs = list(set(flattened_docs))
        # Return
        return [loads(doc) for doc in unique_docs]
    

    _search_query = get_search_query()
    modified_question = _search_query.invoke({"question":question, "chat_history": chat_history})
    print(modified_question)

    generate_queries = multi_query_chain()

    retrieval_chain = generate_queries | retriever.map() | get_unique_union
    docs = retrieval_chain.invoke({"question":modified_question})

    answer = multi_query_rag_prompt(retrieval_chain, modified_question)

    return answer, docs

# 根據檢索到的文檔和用戶問題生成最後答案
def multi_query_rag_prompt(retrieval_chain, question):
    # RAG
    template = """Answer the following question based on this context:

    {context}

    Question: {question}
    Output in user's language. If the question is in zh-tw, then the output will be in zh-tw. If the question is in English, then the output will be in English\n
    You should not mention anything about "根據提供的文件內容" or other similar terms.
    If you don't know the answer, just say that "很抱歉，目前我無法回答您的問題，請將您的詢問發送至 test@email.com 以便獲得更進一步的幫助，謝謝。I'm sorry I cannot answer your question. Please send your question to test@email.com for further assistance. Thank you."
    """

    prompt = ChatPromptTemplate.from_template(template)
    context = retrieval_chain.invoke({"question": question})  # Ensure this returns the context


    # llm = ChatOpenAI(temperature=0)
    llm = ChatOpenAI(model="gpt-4-1106-preview")
    # llm = ChatOllama(model="llama3", num_gpu=1, temperature=0)
    # llm = ChatOllama(model="gemma2", temperature=0)


    final_rag_chain = (
        {"context": retrieval_chain, 
        "question": itemgetter("question")} 
        | prompt
        | llm
        | StrOutputParser()
    )
    messages = [
        {"role": "system", "content": template},
        {"role": "user", "content": question},
        {"role": "assistant", "content": context}
    ]
    # answer = interact_with_model(messages)
    answer = final_rag_chain.invoke({"question":question})

    answer = ""
    for text in final_rag_chain.stream({"question":question}):
        print(text, end="", flush=True)
        answer += text


    return answer
########################################################################################################################

# 將聊天紀錄個跟進問題轉化為獨立問題
def get_search_query():
    # Condense a chat history and follow-up question into a standalone question
    # 
    # _template = """Given the following conversation and a follow up question, 
    # rephrase the follow up question to be a standalone question to help others understand the question without having to go back to the conversation transcript.
    # Generate standalone question in its original language.
    # Chat History:
    # {chat_history}
    # Follow Up Input: {question}

    # Hint:
    # * Refer to chat history and add the subject to the question
    # * Replace the pronouns in the question with the correct person or thing, please refer to chat history
    
    # Standalone question:"""  # noqa: E501
    _template = """Rewrite the following query by incorporating relevant context from the conversation history.
    The rewritten query should:
    
    - Preserve the core intent and meaning of the original query
    - Expand and clarify the query to make it more specific and informative for retrieving relevant context
    - Avoid introducing new topics or queries that deviate from the original query
    - DONT EVER ANSWER the Original query, but instead focus on rephrasing and expanding it into a new query
    - The rewritten query should be in its original language.
    
    Return ONLY the rewritten query text, without any additional formatting or explanations.
    
    Conversation History:
    {chat_history}
    
    Original query: [{question}]
    
    Rewritten query: 
    """
    CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template)

    def _format_chat_history(chat_history: List[Tuple[str, str]]) -> List:
        buffer = []
        for human, ai in chat_history:
            buffer.append(HumanMessage(content=human))
            buffer.append(AIMessage(content=ai))
        return buffer

    _search_query = RunnableBranch(
        # If input includes chat_history, we condense it with the follow-up question
        (
            RunnableLambda(lambda x: bool(x.get("chat_history"))).with_config(
                run_name="HasChatHistoryCheck"
            ),  # Condense follow-up question and chat into a standalone_question
            RunnablePassthrough.assign(
                chat_history=lambda x: _format_chat_history(x["chat_history"])
            )
            | CONDENSE_QUESTION_PROMPT
            | ChatOpenAI()
            | StrOutputParser(),
        ),
        # Else, we have no chat history, so just pass through the question
        RunnableLambda(lambda x : x["question"]),
    )

    return _search_query
########################################################################################################################
# 檢索文檔並生成答案
def naive_rag(question, retriever):
    #### RETRIEVAL and GENERATION ####

    # Prompt
    prompt = hub.pull("rlm/rag-prompt")

    # LLM
    llm = ChatOpenAI(model_name="gpt-3.5-turbo")

    # Post-processing
    def format_docs(docs):
        return "\n\n".join(doc.page_content for doc in docs)

    reference = retriever.get_relevant_documents(question)
    
    # Chain
    rag_chain = (
        {"context": retriever | format_docs, "question": RunnablePassthrough()}
        | prompt
        | llm
        | StrOutputParser()
    )

    # Question
    answer = rag_chain.invoke(question)

    return answer, reference
################################################################################################
# 處理question-answer pairs的檢索和生成答案
def naive_rag_for_qapairs(question, retriever):
    #### RETRIEVAL and GENERATION ####

    # Prompt
    # prompt = hub.pull("rlm/rag-prompt")
    template = """You are an assistant for question-answering tasks. 
    Use the following pieces of retrieved context to answer the question. 
    Following retrieved context is question-answer pairs of historical QA, Find the suitable answer from the qa pairs
    If you can not find the suitable answer, just return "False". 
    Use three sentences maximum and Do not make up the answer.

    Output in user's language. If the question is in zh-tw, then the output will be in zh-tw.

    {context}

    Question: {question}
    """
    prompt = PromptTemplate.from_template(template)

    # LLM
    llm = ChatOpenAI(model_name="gpt-4-0125-preview")
    # llm = ChatOllama(model="llama3", num_gpu=1, temperature=0)
    # llm = ChatOllama(model="gemma2", num_gpu=1, temperature=0)


    # Post-processing
    def format_docs(docs):
        return "\n\n".join(doc.page_content for doc in docs)

    reference = retriever.get_relevant_documents(question)
    
    # Chain
    rag_chain = (
        {"context": retriever | format_docs, "question": RunnablePassthrough()}
        | prompt
        | llm
        | StrOutputParser()
    )

    # Question
    answer = rag_chain.invoke(question)

    return answer, reference
########################################################################################################################

def rag_score(question, ground_truth, answer, reference_docs):
    
    datasets = {
              "question": [question],       # question: list[str]
              "answer": [answer],           # answer: list[str]
              "contexts": [reference_docs], # contexts: list[list[str]]
              "ground_truths": [[ground_truth]] # ground_truth: list[list[str]]
            }
    evalsets = Dataset.from_dict(datasets)

    result = evaluate(
        evalsets,
        metrics=[
            context_precision,
            faithfulness,
            answer_relevancy,
            context_recall,
        ],
    )

    result_df = result.to_pandas()
    print(result_df.head())
    result_df.to_csv('ragas_rag.csv')
    return result