rewrite_question.py 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114
  1. from langchain_core.output_parsers import StrOutputParser
  2. from langchain_openai import ChatOpenAI
  3. from langchain_core.runnables import RunnablePassthrough
  4. from langchain import PromptTemplate
  5. from langchain_community.chat_models import ChatOllama
  6. from langchain_core.runnables import (
  7. RunnableBranch,
  8. RunnableLambda,
  9. RunnableParallel,
  10. RunnablePassthrough,
  11. )
  12. from typing import Tuple, List, Optional
  13. from langchain_core.messages import AIMessage, HumanMessage
  14. # local_llm = "llama3-groq-tool-use:latest"
  15. # llm = ChatOllama(model=local_llm, temperature=0)
  16. # llm_json = ChatOllama(model=local_llm, format="json", temperature=0)
  17. from dotenv import load_dotenv
  18. load_dotenv()
  19. from langchain_openai import ChatOpenAI
  20. llm = ChatOpenAI(model_name="gpt-4o-mini", temperature=0)
  21. def get_search_query():
  22. # Condense a chat history and follow-up question into a standalone question
  23. #
  24. # _template = """Given the following conversation and a follow up question,
  25. # rephrase the follow up question to be a standalone question to help others understand the question without having to go back to the conversation transcript.
  26. # Generate standalone question in its original language.
  27. # Chat History:
  28. # {chat_history}
  29. # Follow Up Input: {question}
  30. # Hint:
  31. # * Refer to chat history and add the subject to the question
  32. # * Replace the pronouns in the question with the correct person or thing, please refer to chat history
  33. # Standalone question:""" # noqa: E501
  34. _template = """
  35. <|begin_of_text|>
  36. <|start_header_id|>system<|end_header_id|>
  37. Rewrite the following query by incorporating relevant context from the conversation history.
  38. The rewritten query should:
  39. - Preserve the core intent and meaning of the original query
  40. - Expand and clarify the query to make it more specific and informative for retrieving relevant context
  41. - Avoid introducing new topics or queries that deviate from the original query
  42. - DONT EVER ANSWER the Original query, but instead focus on rephrasing and expanding it into a new query
  43. - The rewritten query should be in its original language.
  44. Return ONLY the rewritten query text, without any additional formatting or explanations.
  45. <|eot_id|>
  46. <|begin_of_text|><|start_header_id|>user<|end_header_id|>
  47. Conversation History:
  48. {chat_history}
  49. Original query: [{question}]
  50. Hint:
  51. * Refer to chat history and add the subject to the question
  52. * Replace the pronouns in the question with the correct person or thing, please refer to chat history
  53. Rewritten query:
  54. <|eot_id|>
  55. <|start_header_id|>assistant<|end_header_id|>
  56. """
  57. CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template)
  58. def _format_chat_history(chat_history: List[Tuple[str, str]]) -> List:
  59. buffer = []
  60. for human, ai in chat_history:
  61. buffer.append(HumanMessage(content=human))
  62. buffer.append(AIMessage(content=ai))
  63. return buffer
  64. _search_query = RunnableBranch(
  65. # If input includes chat_history, we condense it with the follow-up question
  66. (
  67. RunnableLambda(lambda x: bool(x.get("chat_history"))).with_config(
  68. run_name="HasChatHistoryCheck"
  69. ), # Condense follow-up question and chat into a standalone_question
  70. RunnablePassthrough.assign(
  71. chat_history=lambda x: _format_chat_history(x["chat_history"])
  72. )
  73. | CONDENSE_QUESTION_PROMPT
  74. | llm
  75. | StrOutputParser(),
  76. ),
  77. # Else, we have no chat history, so just pass through the question
  78. RunnableLambda(lambda x : x["question"]),
  79. )
  80. return _search_query
  81. if __name__ == "__main__":
  82. _search_query = get_search_query()
  83. chat_history = [
  84. {
  85. "q": "北海建準廠2023年的類別3排放量是多少?",
  86. "a": """根據北海建準廠2023年的數據,類別3的排放量是2,162.62公噸CO2e。
  87. 類別3指的是溫室氣體排放量盤查作業中的一個範疇,該範疇涵蓋了事業之溫室氣體排放量的盤查和登錄。"""
  88. }
  89. ]
  90. chat_history = [(history["q"] , history["a"] ) for history in chat_history if history["a"] != "" and history["a"] != "string"]
  91. print(chat_history)
  92. question = "廣興廠"
  93. modified_question = _search_query.invoke({"question": question, "chat_history": chat_history})
  94. print(modified_question)