from langchain_core.output_parsers import StrOutputParser from langchain_openai import ChatOpenAI from langchain_core.runnables import RunnablePassthrough from langchain import PromptTemplate from langchain_community.chat_models import ChatOllama from langchain_core.runnables import ( RunnableBranch, RunnableLambda, RunnableParallel, RunnablePassthrough, ) from typing import Tuple, List, Optional from langchain_core.messages import AIMessage, HumanMessage # local_llm = "llama3-groq-tool-use:latest" # llm = ChatOllama(model=local_llm, temperature=0) # llm_json = ChatOllama(model=local_llm, format="json", temperature=0) from dotenv import load_dotenv load_dotenv() from langchain_openai import ChatOpenAI llm = ChatOpenAI(model_name="gpt-4o-mini", temperature=0) def get_search_query(): # Condense a chat history and follow-up question into a standalone question # # _template = """Given the following conversation and a follow up question, # rephrase the follow up question to be a standalone question to help others understand the question without having to go back to the conversation transcript. # Generate standalone question in its original language. # Chat History: # {chat_history} # Follow Up Input: {question} # Hint: # * Refer to chat history and add the subject to the question # * Replace the pronouns in the question with the correct person or thing, please refer to chat history # Standalone question:""" # noqa: E501 _template = """ <|begin_of_text|> <|start_header_id|>system<|end_header_id|> Rewrite the following query by incorporating relevant context from the conversation history. The rewritten query should: - Preserve the core intent and meaning of the original query - Expand and clarify the query to make it more specific and informative for retrieving relevant context - Avoid introducing new topics or queries that deviate from the original query - DONT EVER ANSWER the Original query, but instead focus on rephrasing and expanding it into a new query - The rewritten query should be in its original language. Return ONLY the rewritten query text, without any additional formatting or explanations. <|eot_id|> <|begin_of_text|><|start_header_id|>user<|end_header_id|> Conversation History: {chat_history} Original query: [{question}] Hint: * Refer to chat history and add the subject to the question * Replace the pronouns in the question with the correct person or thing, please refer to chat history Rewritten query: <|eot_id|> <|start_header_id|>assistant<|end_header_id|> """ CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template) def _format_chat_history(chat_history: List[Tuple[str, str]]) -> List: buffer = [] for human, ai in chat_history: buffer.append(HumanMessage(content=human)) buffer.append(AIMessage(content=ai)) return buffer _search_query = RunnableBranch( # If input includes chat_history, we condense it with the follow-up question ( RunnableLambda(lambda x: bool(x.get("chat_history"))).with_config( run_name="HasChatHistoryCheck" ), # Condense follow-up question and chat into a standalone_question RunnablePassthrough.assign( chat_history=lambda x: _format_chat_history(x["chat_history"]) ) | CONDENSE_QUESTION_PROMPT | llm | StrOutputParser(), ), # Else, we have no chat history, so just pass through the question RunnableLambda(lambda x : x["question"]), ) return _search_query if __name__ == "__main__": _search_query = get_search_query() chat_history = [ { "q": "北海建準廠2023年的類別3排放量是多少?", "a": """根據北海建準廠2023年的數據,類別3的排放量是2,162.62公噸CO2e。 類別3指的是溫室氣體排放量盤查作業中的一個範疇,該範疇涵蓋了事業之溫室氣體排放量的盤查和登錄。""" } ] chat_history = [(history["q"] , history["a"] ) for history in chat_history if history["a"] != "" and history["a"] != "string"] print(chat_history) question = "廣興廠" modified_question = _search_query.invoke({"question": question, "chat_history": chat_history}) print(modified_question)