1 year ago · 33d09d55e4
--- a/RAG/main.py
+++ b/RAG/main.py
@@ -1,83 +0,0 @@
 
				-import time
			
 
				-import pandas as pd
			
 
				-import os
			
 
				-from dotenv import load_dotenv
			
 
				-from config import (
			
 
				-    current_dir, CSV_FILE, system_prompt, 
			
 
				-    EMBEDDINGS_FILE, FAISS_INDEX_FILE
			
 
				-)
			
 
				-from langchain.globals import set_llm_cache
			
 
				-from langchain_community.cache import SQLiteCache
			
 
				-from embeddings import load_embeddings
			
 
				-from rag_chain import simple_rag_prompt, calculate_similarity, get_context
			
 
				-
			
 
				-
			
 
				-# Load environment variables
			
 
				-load_dotenv('environment.env')
			
 
				-
			
 
				-# Set up cache
			
 
				-set_llm_cache(SQLiteCache(database_path=".langchain.db"))
			
 
				-
			
 
				-def main():
			
 
				-    # Number of questions to test
			
 
				-    n = 10
			
 
				-    
			
 
				-    # Load embeddings and index
			
 
				-    embeddings, docs, df, index = load_embeddings()
			
 
				-    
			
 
				-    # Define retrieval chain
			
 
				-    retrieval_chain = lambda q: get_context(q, index, docs)
			
 
				-    
			
 
				-    # Load questions from CSV
			
 
				-    csv_path = os.path.join(current_dir, CSV_FILE)
			
 
				-    qa_df = pd.read_csv(csv_path)
			
 
				-    
			
 
				-    # Output file
			
 
				-    output_file = 'rag_output.txt'
			
 
				-    
			
 
				-    with open(output_file, 'w', encoding='utf-8') as f:
			
 
				-        for i in range(n):  
			
 
				-            try:
			
 
				-                question = qa_df.iloc[i]['question']
			
 
				-                original_answer = qa_df.iloc[i]['answer']
			
 
				-                
			
 
				-                print(f"Processing question {i+1}: {question}")
			
 
				-                
			
 
				-                start_time = time.time()
			
 
				-                rag_answer, similarity_score = simple_rag_prompt(retrieval_chain, question)
			
 
				-                end_time = time.time()
			
 
				-                
			
 
				-                response_time = end_time - start_time
			
 
				-                # answer_similarity = calculate_similarity(original_answer, rag_answer)
			
 
				-
			
 
				-                # Check if rag_answer is a string before calculating similarity
			
 
				-                if isinstance(rag_answer, str):
			
 
				-                    answer_similarity = calculate_similarity(original_answer, rag_answer)
			
 
				-                else:
			
 
				-                    answer_similarity = 0
			
 
				-                    print(f"Warning: RAG answer for question {i+1} is not a string. Answer: {rag_answer}")
			
 
				-                
			
 
				-                # Write results to file
			
 
				-                f.write(f"Question {i+1}: {question}\n")
			
 
				-                f.write(f"Original Answer: {original_answer}\n")
			
 
				-                f.write(f"RAG Answer: {rag_answer}\n")
			
 
				-                f.write(f"Response Time: {response_time:.2f} seconds\n")
			
 
				-                f.write(f"Retrieval Similarity Score: {similarity_score:.4f}\n")
			
 
				-                f.write(f"Answer Similarity Score: {answer_similarity:.4f}\n")
			
 
				-                f.write("-" * 50 + "\n")
			
 
				-                
			
 
				-                f.flush()
			
 
				-                print(f"Processed question {i+1}")
			
 
				-                
			
 
				-                # Add a small delay to avoid rate limiting
			
 
				-                time.sleep(1) 
			
 
				-            except Exception as e:
			
 
				-                print(f"Error processing question {i+1}: {str(e)}")
			
 
				-                f.write(f"Error processing question {i+1}: {str(e)}\n")
			
 
				-                f.write("-" * 50 + "\n")
			
 
				-                f.flush()
			
 
				-    
			
 
				-    print(f"Output has been saved to {output_file}")
			
 
				-
			
 
				-if __name__ == "__main__":
			
 
				-    main()