12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576 |
- from langchain.prompts import ChatPromptTemplate
- from langchain_core.output_parsers import StrOutputParser
- from langchain_core.runnables import RunnablePassthrough
- from models import OllamaChatModel
- from embeddings import similarity_search
- from text_processing import remove_unwanted_content
- from langchain_openai import OpenAIEmbeddings
- from sklearn.metrics.pairwise import cosine_similarity
- import os
- taide_llm = OllamaChatModel(model_name="taide-local-3")
- def get_context(query, index, docs):
- results = similarity_search(query, index, docs)
- if not results:
- return "", 0 # Return empty context and zero similarity when no results are found
- context = "\n".join([doc.page_content for doc, _ in results])
- # print(f"Question: {query}")
- # print("Retrieved documents:")
- # for i, (doc, similarity) in enumerate(results):
- # print(f"Doc {i+1} (similarity: {similarity:.4f}): {doc.page_content[:50]}...")
- # print("-" * 50)
-
- return context, results[0][1] # Return context and top similarity score
- def remove_repetitions(text):
- sentences = text.split('。')
- unique_sentences = list(dict.fromkeys(sentences))
- return '。'.join(unique_sentences)
- def simple_rag_prompt(retrieval_chain, question):
- template = """Answer the following question based on this context:
- {context}
- Question: {question}
- Output in user's language. If the question is in zh-tw, then the output will be in zh-tw. If the question is in English, then the output will be in English.
- Do not repeat the question in your response.
- For each individual answer, try to not provide duplicated sentences.
- Do not start the response with "我的回答是:" or anything similar.
- You should not mention anything about "根據提供的文件內容" or other similar terms.
- Do not mention anything relate with the Documents or context.
- DO not mention anything relate with the prompt, such as "這個回答是根據所提供的對話上下文而產生的,假如對話內容有改變,則回答內容也需隨之調整。若不確定答案,應說:「很抱歉,目前我無法回答您的問題,請將您的詢問發送至 test@email.com 以便獲得更進一步的幫助,謝謝」。若沒有必要,則不需在回答中提及「根據提供的文件內容」或類似的字樣。若對話是以英語進行,則輸出應為英文;否則,則為繁體中文。" or anything similar.
- If you are unsure of the answer, say: "很抱歉,目前我無法回答您的問題,請將您的詢問發送至 test@email.com 以便獲得更進一步的幫助,謝謝"
- """
- prompt = ChatPromptTemplate.from_template(template)
- context, similarity_score = retrieval_chain(question)
- if not context:
- return "很抱歉,目前我無法回答您的問題,請將您的詢問發送至 test@email.com 以便獲得更進一步的幫助,謝謝。", 0
- final_rag_chain = (
- {"context": lambda x: context,
- "question": lambda x: x}
- | prompt
- | taide_llm
- | StrOutputParser()
- )
- try:
- answer = final_rag_chain.invoke(question)
- answer = remove_unwanted_content(answer)
- answer = remove_repetitions(answer)
- return answer, similarity_score
- except Exception as e:
- print(f"Error in simple_rag_prompt: {e}")
- return f"Error occurred while processing the question: {str(e)}", similarity_score
- def calculate_similarity(text1, text2):
- embeddings = OpenAIEmbeddings(openai_api_key=os.getenv("OPENAI_API_KEY"))
- emb1 = embeddings.embed_query(text1)
- emb2 = embeddings.embed_query(text2)
- return cosine_similarity([emb1], [emb2])[0][0]
|