import os from dotenv import load_dotenv from langchain_openai import OpenAIEmbeddings from langchain_community.document_loaders.csv_loader import CSVLoader from langchain_chroma import Chroma import openai # Load environment variables load_dotenv('../environment.env') # Set up OpenAI API openai_api_key = os.getenv("OPENAI_API_KEY") if not openai_api_key: raise ValueError("No OpenAI API key found in environment variables") openai.api_key = openai_api_key # Initialize embeddings model embeddings_model = OpenAIEmbeddings() def extract_field(doc, field_name): for line in doc.page_content.split('\n'): if line.startswith(f"{field_name}:"): return line.split(':', 1)[1].strip() return None # Check if Chroma DB already exists if not os.path.exists("./chroma_db"): try: # Load and process CSV data loader = CSVLoader(file_path="log_record_rows.csv") data = loader.load() field_name = "question" questions = [extract_field(doc, field_name) for doc in data] # Create and save Chroma vector store vectorstore = Chroma.from_texts( texts=questions, embedding=embeddings_model, persist_directory="./chroma_db" ) print("Chroma database created successfully.") except Exception as e: print(f"An error occurred while creating the Chroma database: {e}") else: print("Chroma database already exists.")