### Python = 3.9 import os from dotenv import load_dotenv load_dotenv('.env') import openai openai_api_key = os.getenv("OPENAI_API_KEY") openai.api_key = openai_api_key from langchain_openai import OpenAIEmbeddings embeddings_model = OpenAIEmbeddings() from langchain_community.document_loaders.csv_loader import CSVLoader from langchain_community.vectorstores import Chroma import pandas as pd import re from langchain_community.embeddings.openai import OpenAIEmbeddings from langchain_community.vectorstores import SupabaseVectorStore from supabase.client import create_client def create_qa_vectordb(supabase, vectordb_directory="./chroma_db"): if os.path.isdir(vectordb_directory): vectorstore = Chroma(persist_directory=vectordb_directory, embedding_function=embeddings_model) vectorstore.delete_collection() response = supabase.table("INNOLUX_cache").select("question, answer").execute() questions = [row["question"] for row in response.data] vectorstore = Chroma.from_texts( texts=questions, embedding=embeddings_model, persist_directory=vectordb_directory ) return vectorstore def semantic_cache(supabase, q, SIMILARITY_THRESHOLD=0.83, k=1, vectordb_directory="./chroma_db"): if os.path.isdir(vectordb_directory): vectorstore = Chroma(persist_directory=vectordb_directory, embedding_function=embeddings_model) else: print("create new vector db ...") vectorstore = create_qa_vectordb(supabase, vectordb_directory) docs_and_scores = vectorstore.similarity_search_with_relevance_scores(q, k=1) doc, score = docs_and_scores[0] print(score) if score >= SIMILARITY_THRESHOLD: cache_question = doc.page_content print(cache_question) # response = supabase.table("INNOLUX_cache").select("question, answer").eq("question", cache_question).execute() response = supabase.table("INNOLUX_cache").select("question, answer, video_url").eq("question", cache_question).execute() print(response.data) answer = response.data[0]["answer"] video_cache = response.data[0]["video_url"] return cache_question, answer, video_cache else: return None, None, None