123456789101112131415161718192021222324252627282930313233343536373839404142434445 |
- import os
- from dotenv import load_dotenv
- from langchain_openai import OpenAIEmbeddings
- from langchain_community.document_loaders.csv_loader import CSVLoader
- from langchain_chroma import Chroma
- import openai
- # Load environment variables
- load_dotenv('../environment.env')
- # Set up OpenAI API
- openai_api_key = os.getenv("OPENAI_API_KEY")
- if not openai_api_key:
- raise ValueError("No OpenAI API key found in environment variables")
- openai.api_key = openai_api_key
- # Initialize embeddings model
- embeddings_model = OpenAIEmbeddings()
- def extract_field(doc, field_name):
- for line in doc.page_content.split('\n'):
- if line.startswith(f"{field_name}:"):
- return line.split(':', 1)[1].strip()
- return None
- # Check if Chroma DB already exists
- if not os.path.exists("./chroma_db"):
- try:
- # Load and process CSV data
- loader = CSVLoader(file_path="log_record_rows.csv")
- data = loader.load()
- field_name = "question"
- questions = [extract_field(doc, field_name) for doc in data]
- # Create and save Chroma vector store
- vectorstore = Chroma.from_texts(
- texts=questions,
- embedding=embeddings_model,
- persist_directory="./chroma_db"
- )
- print("Chroma database created successfully.")
- except Exception as e:
- print(f"An error occurred while creating the Chroma database: {e}")
- else:
- print("Chroma database already exists.")
|