Kaynağa Gözat

101 semantic search

Sherry 8 ay önce
işleme
85e06ab358
2 değiştirilmiş dosya ile 76 ekleme ve 0 silme
  1. 76 0
      101_semantic_search.py
  2. 0 0
      requirements.txt

+ 76 - 0
101_semantic_search.py

@@ -0,0 +1,76 @@
+### Python = 3.9
+import os
+from dotenv import load_dotenv
+load_dotenv('environment.env')
+
+import openai 
+openai_api_key = os.getenv("OPENAI_API_KEY")
+openai.api_key = openai_api_key
+
+from langchain_openai import OpenAIEmbeddings
+embeddings_model = OpenAIEmbeddings()
+
+from langchain_community.document_loaders.csv_loader import CSVLoader
+from langchain_chroma import Chroma
+
+# from supabase import create_client, Client 
+# supabase_url = os.getenv("SUPABASE_URL")
+# supabase_key = os.getenv("SUPABASE_KEY")
+# supabase: Client = create_client(supabase_url, supabase_key)
+
+############# Load data #############
+def extract_field(doc, field_name):
+    for line in doc.page_content.split('\n'):
+        if line.startswith(f"{field_name}:"):
+            return line.split(':', 1)[1].strip()
+    return None
+
+loader = CSVLoader(file_path="video_cache_rows.csv")
+data = loader.load()
+field_name = "question"
+question = [extract_field(doc, field_name) for doc in data]
+
+# ####### load data from supabase #######
+# embeddings_model = OpenAIEmbeddings()
+# response = supabase.table("video_cache_rows").select("question").execute()
+# data = response.data 
+# created_at = []
+# question = []
+# ids = []
+# answer = []
+# video_url = []
+
+# for item in data:
+#     ids.append(item['id'])
+#     created_at.append(item['created_at'])
+#     question.append(item['question'])
+#     answer.append(item['answer'])
+#     video_url.append(item['video_url'])
+
+
+########## generate embedding ###########
+embedding = embeddings_model.embed_documents(question)
+
+########## Write embedding to the supabase table  #######
+# for id, new_embedding in zip(ids, embedding):
+#     supabase.table("video_cache_rows_duplicate").insert({"embedding": embedding.tolist()}).eq("id", id).execute()
+
+######### Vector Store ##########
+# Put pre-compute embeddings to vector store. ## save to disk
+vectorstore = Chroma.from_texts(
+    texts=question,
+    embedding=embeddings_model,
+    persist_directory="./chroma_db"
+    )
+
+####### load from disk  #######
+query = "101可以帶狗嗎"
+vectorstore = Chroma(persist_directory="./chroma_db", embedding_function=embeddings_model)
+docs = vectorstore.similarity_search(query)
+print(f"Query: {query}  | 最接近文檔:{docs[0].page_content}")
+
+####### Query it #########
+query = "101可以帶狗嗎"
+docs = vectorstore.similarity_search(query)
+print(f"Query: {query}  | 最接近文檔:{docs[0].page_content}")
+

+ 0 - 0
requirements.txt