|
@@ -1,83 +0,0 @@
|
|
|
-import time
|
|
|
-import pandas as pd
|
|
|
-import os
|
|
|
-from dotenv import load_dotenv
|
|
|
-from config import (
|
|
|
- current_dir, CSV_FILE, system_prompt,
|
|
|
- EMBEDDINGS_FILE, FAISS_INDEX_FILE
|
|
|
-)
|
|
|
-from langchain.globals import set_llm_cache
|
|
|
-from langchain_community.cache import SQLiteCache
|
|
|
-from embeddings import load_embeddings
|
|
|
-from rag_chain import simple_rag_prompt, calculate_similarity, get_context
|
|
|
-
|
|
|
-
|
|
|
-# Load environment variables
|
|
|
-load_dotenv('environment.env')
|
|
|
-
|
|
|
-# Set up cache
|
|
|
-set_llm_cache(SQLiteCache(database_path=".langchain.db"))
|
|
|
-
|
|
|
-def main():
|
|
|
- # Number of questions to test
|
|
|
- n = 10
|
|
|
-
|
|
|
- # Load embeddings and index
|
|
|
- embeddings, docs, df, index = load_embeddings()
|
|
|
-
|
|
|
- # Define retrieval chain
|
|
|
- retrieval_chain = lambda q: get_context(q, index, docs)
|
|
|
-
|
|
|
- # Load questions from CSV
|
|
|
- csv_path = os.path.join(current_dir, CSV_FILE)
|
|
|
- qa_df = pd.read_csv(csv_path)
|
|
|
-
|
|
|
- # Output file
|
|
|
- output_file = 'rag_output.txt'
|
|
|
-
|
|
|
- with open(output_file, 'w', encoding='utf-8') as f:
|
|
|
- for i in range(n):
|
|
|
- try:
|
|
|
- question = qa_df.iloc[i]['question']
|
|
|
- original_answer = qa_df.iloc[i]['answer']
|
|
|
-
|
|
|
- print(f"Processing question {i+1}: {question}")
|
|
|
-
|
|
|
- start_time = time.time()
|
|
|
- rag_answer, similarity_score = simple_rag_prompt(retrieval_chain, question)
|
|
|
- end_time = time.time()
|
|
|
-
|
|
|
- response_time = end_time - start_time
|
|
|
- # answer_similarity = calculate_similarity(original_answer, rag_answer)
|
|
|
-
|
|
|
- # Check if rag_answer is a string before calculating similarity
|
|
|
- if isinstance(rag_answer, str):
|
|
|
- answer_similarity = calculate_similarity(original_answer, rag_answer)
|
|
|
- else:
|
|
|
- answer_similarity = 0
|
|
|
- print(f"Warning: RAG answer for question {i+1} is not a string. Answer: {rag_answer}")
|
|
|
-
|
|
|
- # Write results to file
|
|
|
- f.write(f"Question {i+1}: {question}\n")
|
|
|
- f.write(f"Original Answer: {original_answer}\n")
|
|
|
- f.write(f"RAG Answer: {rag_answer}\n")
|
|
|
- f.write(f"Response Time: {response_time:.2f} seconds\n")
|
|
|
- f.write(f"Retrieval Similarity Score: {similarity_score:.4f}\n")
|
|
|
- f.write(f"Answer Similarity Score: {answer_similarity:.4f}\n")
|
|
|
- f.write("-" * 50 + "\n")
|
|
|
-
|
|
|
- f.flush()
|
|
|
- print(f"Processed question {i+1}")
|
|
|
-
|
|
|
- # Add a small delay to avoid rate limiting
|
|
|
- time.sleep(1)
|
|
|
- except Exception as e:
|
|
|
- print(f"Error processing question {i+1}: {str(e)}")
|
|
|
- f.write(f"Error processing question {i+1}: {str(e)}\n")
|
|
|
- f.write("-" * 50 + "\n")
|
|
|
- f.flush()
|
|
|
-
|
|
|
- print(f"Output has been saved to {output_file}")
|
|
|
-
|
|
|
-if __name__ == "__main__":
|
|
|
- main()
|