12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364 |
- ### Python = 3.9
- import os
- from dotenv import load_dotenv
- load_dotenv('.env')
- import openai
- openai_api_key = os.getenv("OPENAI_API_KEY")
- openai.api_key = openai_api_key
- from langchain_openai import OpenAIEmbeddings
- embeddings_model = OpenAIEmbeddings()
- from langchain_community.document_loaders.csv_loader import CSVLoader
- from langchain_community.vectorstores import Chroma
- import pandas as pd
- import re
- from langchain_community.embeddings.openai import OpenAIEmbeddings
- from langchain_community.vectorstores import SupabaseVectorStore
- from supabase.client import create_client
- def create_qa_vectordb(supabase, vectordb_directory="./chroma_db"):
- if os.path.isdir(vectordb_directory):
- vectorstore = Chroma(persist_directory=vectordb_directory, embedding_function=embeddings_model)
- vectorstore.delete_collection()
- response = supabase.table("INNOLUX_cache").select("question, answer").execute()
- questions = [row["question"] for row in response.data]
- vectorstore = Chroma.from_texts(
- texts=questions,
- embedding=embeddings_model,
- persist_directory=vectordb_directory
- )
-
- return vectorstore
- def semantic_cache(supabase, q, SIMILARITY_THRESHOLD=0.83, k=1, vectordb_directory="./chroma_db"):
- if os.path.isdir(vectordb_directory):
- vectorstore = Chroma(persist_directory=vectordb_directory, embedding_function=embeddings_model)
- else:
- print("create new vector db ...")
- vectorstore = create_qa_vectordb(supabase, vectordb_directory)
- docs_and_scores = vectorstore.similarity_search_with_relevance_scores(q, k=1)
- doc, score = docs_and_scores[0]
- print(score)
-
- if score >= SIMILARITY_THRESHOLD:
- cache_question = doc.page_content
- print(cache_question)
- # response = supabase.table("INNOLUX_cache").select("question, answer").eq("question", cache_question).execute()
- response = supabase.table("INNOLUX_cache").select("question, answer, video_url").eq("question", cache_question).execute()
- print(response.data)
- answer = response.data[0]["answer"]
- video_cache = response.data[0]["video_url"]
- return cache_question, answer, video_cache
- else:
- return None, None, None
|