ling
/
systex_phase2


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114
							from langchain_core.output_parsers import StrOutputParser
from langchain_openai import ChatOpenAI
from langchain_core.runnables import RunnablePassthrough
from langchain import PromptTemplate
from langchain_community.chat_models import ChatOllama


from langchain_core.runnables import (
    RunnableBranch,
    RunnableLambda,
    RunnableParallel,
    RunnablePassthrough,
)
from typing import Tuple, List, Optional
from langchain_core.messages import AIMessage, HumanMessage

# local_llm = "llama3-groq-tool-use:latest"
# llm = ChatOllama(model=local_llm, temperature=0)
# llm_json = ChatOllama(model=local_llm, format="json", temperature=0)
from dotenv import load_dotenv
load_dotenv()
from langchain_openai import ChatOpenAI
llm = ChatOpenAI(model_name="gpt-4o-mini", temperature=0)

def get_search_query():
    # Condense a chat history and follow-up question into a standalone question
    # 
    # _template = """Given the following conversation and a follow up question, 
    # rephrase the follow up question to be a standalone question to help others understand the question without having to go back to the conversation transcript.
    # Generate standalone question in its original language.
    # Chat History:
    # {chat_history}
    # Follow Up Input: {question}

    # Hint:
    # * Refer to chat history and add the subject to the question
    # * Replace the pronouns in the question with the correct person or thing, please refer to chat history
    
    # Standalone question:"""  # noqa: E501
    _template = """
    <|begin_of_text|>
    
    <|start_header_id|>system<|end_header_id|>
    Rewrite the following query by incorporating relevant context from the conversation history.
    The rewritten query should:
    
    - Preserve the core intent and meaning of the original query
    - Expand and clarify the query to make it more specific and informative for retrieving relevant context
    - Avoid introducing new topics or queries that deviate from the original query
    - DONT EVER ANSWER the Original query, but instead focus on rephrasing and expanding it into a new query
    - The rewritten query should be in its original language.
    
    Return ONLY the rewritten query text, without any additional formatting or explanations.
    
    <|eot_id|>
        
    <|begin_of_text|><|start_header_id|>user<|end_header_id|>
    Conversation History:
    {chat_history}
    
    Original query: [{question}]
    
    Hint:
    * Refer to chat history and add the subject to the question
    * Replace the pronouns in the question with the correct person or thing, please refer to chat history
    
    Rewritten query: 
    <|eot_id|>
    
    <|start_header_id|>assistant<|end_header_id|>
    """
    CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template)

    def _format_chat_history(chat_history: List[Tuple[str, str]]) -> List:
        buffer = []
        for human, ai in chat_history:
            buffer.append(HumanMessage(content=human))
            buffer.append(AIMessage(content=ai))
        return buffer

    _search_query = RunnableBranch(
        # If input includes chat_history, we condense it with the follow-up question
        (
            RunnableLambda(lambda x: bool(x.get("chat_history"))).with_config(
                run_name="HasChatHistoryCheck"
            ),  # Condense follow-up question and chat into a standalone_question
            RunnablePassthrough.assign(
                chat_history=lambda x: _format_chat_history(x["chat_history"])
            )
            | CONDENSE_QUESTION_PROMPT
            | llm
            | StrOutputParser(),
        ),
        # Else, we have no chat history, so just pass through the question
        RunnableLambda(lambda x : x["question"]),
    )

    return _search_query

if __name__ == "__main__":
    _search_query = get_search_query()
    chat_history = [
        {
            "q": "北海建準廠2023年的類別3排放量是多少？",
            "a": """根據北海建準廠2023年的數據，類別3的排放量是2,162.62公噸CO2e。
                類別3指的是溫室氣體排放量盤查作業中的一個範疇，該範疇涵蓋了事業之溫室氣體排放量的盤查和登錄。"""
        }
        ]
    chat_history = [(history["q"] , history["a"] ) for history in chat_history if history["a"] != "" and history["a"]  != "string"]
    print(chat_history)
    
    question = "廣興廠"
    modified_question = _search_query.invoke({"question": question, "chat_history": chat_history})
    print(modified_question)