rewrite_question.py 4.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110
  1. from langchain_core.output_parsers import StrOutputParser
  2. from langchain_openai import ChatOpenAI
  3. from langchain_core.runnables import RunnablePassthrough
  4. from langchain import PromptTemplate
  5. from langchain_community.chat_models import ChatOllama
  6. from langchain_core.runnables import (
  7. RunnableBranch,
  8. RunnableLambda,
  9. RunnableParallel,
  10. RunnablePassthrough,
  11. )
  12. from typing import Tuple, List, Optional
  13. from langchain_core.messages import AIMessage, HumanMessage
  14. local_llm = "llama3-groq-tool-use:latest"
  15. # llm_json = ChatOllama(model=local_llm, format="json", temperature=0)
  16. llm = ChatOllama(model=local_llm, temperature=0)
  17. def get_search_query():
  18. # Condense a chat history and follow-up question into a standalone question
  19. #
  20. # _template = """Given the following conversation and a follow up question,
  21. # rephrase the follow up question to be a standalone question to help others understand the question without having to go back to the conversation transcript.
  22. # Generate standalone question in its original language.
  23. # Chat History:
  24. # {chat_history}
  25. # Follow Up Input: {question}
  26. # Hint:
  27. # * Refer to chat history and add the subject to the question
  28. # * Replace the pronouns in the question with the correct person or thing, please refer to chat history
  29. # Standalone question:""" # noqa: E501
  30. _template = """
  31. <|begin_of_text|>
  32. <|start_header_id|>system<|end_header_id|>
  33. Rewrite the following query by incorporating relevant context from the conversation history.
  34. The rewritten query should:
  35. - Preserve the core intent and meaning of the original query
  36. - Expand and clarify the query to make it more specific and informative for retrieving relevant context
  37. - Avoid introducing new topics or queries that deviate from the original query
  38. - DONT EVER ANSWER the Original query, but instead focus on rephrasing and expanding it into a new query
  39. - The rewritten query should be in its original language.
  40. Return ONLY the rewritten query text, without any additional formatting or explanations.
  41. <|eot_id|>
  42. <|begin_of_text|><|start_header_id|>user<|end_header_id|>
  43. Conversation History:
  44. {chat_history}
  45. Original query: [{question}]
  46. Hint:
  47. * Refer to chat history and add the subject to the question
  48. * Replace the pronouns in the question with the correct person or thing, please refer to chat history
  49. Rewritten query:
  50. <|eot_id|>
  51. <|start_header_id|>assistant<|end_header_id|>
  52. """
  53. CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template)
  54. def _format_chat_history(chat_history: List[Tuple[str, str]]) -> List:
  55. buffer = []
  56. for human, ai in chat_history:
  57. buffer.append(HumanMessage(content=human))
  58. buffer.append(AIMessage(content=ai))
  59. return buffer
  60. _search_query = RunnableBranch(
  61. # If input includes chat_history, we condense it with the follow-up question
  62. (
  63. RunnableLambda(lambda x: bool(x.get("chat_history"))).with_config(
  64. run_name="HasChatHistoryCheck"
  65. ), # Condense follow-up question and chat into a standalone_question
  66. RunnablePassthrough.assign(
  67. chat_history=lambda x: _format_chat_history(x["chat_history"])
  68. )
  69. | CONDENSE_QUESTION_PROMPT
  70. | llm
  71. | StrOutputParser(),
  72. ),
  73. # Else, we have no chat history, so just pass through the question
  74. RunnableLambda(lambda x : x["question"]),
  75. )
  76. return _search_query
  77. if __name__ == "__main__":
  78. _search_query = get_search_query()
  79. chat_history = [
  80. {
  81. "q": "北海建準廠2023年的類別3排放量是多少?",
  82. "a": """根據北海建準廠2023年的數據,類別3的排放量是2,162.62公噸CO2e。
  83. 類別3指的是溫室氣體排放量盤查作業中的一個範疇,該範疇涵蓋了事業之溫室氣體排放量的盤查和登錄。"""
  84. }
  85. ]
  86. chat_history = [(history["q"] , history["a"] ) for history in chat_history if history["a"] != "" and history["a"] != "string"]
  87. print(chat_history)
  88. question = "類別2呢"
  89. modified_question = _search_query.invoke({"question": question, "chat_history": chat_history})
  90. print(modified_question)