1 年之前 · e69c81773f
--- a/__pycache__/main.cpython-312.pyc
+++ b/__pycache__/main.cpython-312.pyc
--- a/api/__pycache__/db_router.cpython-312.pyc
+++ b/api/__pycache__/db_router.cpython-312.pyc
--- a/api/db_router.py
+++ b/api/db_router.py
@@ -1,14 +1,12 @@
 
				-from fastapi import APIRouter
			
 
				+from fastapi import APIRouter,UploadFile, File,Body
			
 
				 from supabase import create_client, Client
			
 
				 from dotenv import load_dotenv
			
 
				 import os
			
 
				 from datetime import datetime
			
 
				 from random import choice
			
 
				-from openai import OpenAI
			
 
				 from typing import Annotated
			
 
				 from pydantic import Field
			
 
				 
			
 
				-client = OpenAI()
			
 
				 
			
 
				 load_dotenv()
			
 
				 
			
@@ -226,7 +224,7 @@ def read_root(type:str,language :str = "ch"):
 
				     return {"data": random_row}
			
 
				 
			
 
				 @dbRouter.post("/message_not_in_cache")
			
 
				-def message_not_in_cache(question :str ,answer :str,data_list :str='[]',client_id : str = "0" ):
			
 
				+def message_not_in_cache(question :str ,answer :str,client_id : str = "0",language:str = "ch" ):
			
 
				 
			
 
				     try:
			
 
				         data, count = supabase.table('client_message').select('*').eq("question",question).execute()
			
@@ -234,7 +232,7 @@ def message_not_in_cache(question :str ,answer :str,data_list :str='[]',client_i
 
				         if len(data[1]) != 0 :
			
 
				             return {"state": 200 , "message" : "have saved"}
			
 
				         
			
 
				-        data, count = supabase.table('client_message').insert({"client_id": client_id, "question": question,"answer":answer}).execute()
			
 
				+        data, count = supabase.table('client_message').insert({"client_id": client_id, "question": question,"answer":answer,"language":language}).execute()
			
 
				         return {"state": 200 , "message" : "success"}
			
 
				     
			
 
				     except Exception as e:
			
@@ -251,12 +249,46 @@ class MessageSaveRequest(BaseModel):
 
				 
			
 
				     
			
 
				 @dbRouter.post("/message_save")
			
 
				-def message_save(request:MessageSaveRequest):
			
 
				+async def message_save(request:MessageSaveRequest ):
			
 
				     try :
			
 
				+
			
 
				         data, count = supabase.table("log_record").insert({
			
 
				             "question": request.question,
			
 
				             "answer": request.answer,
			
 
				-            "data_list": request.data_list
			
 
				+            "data_list": request.data_list,
			
 
				+            # "mp3_url": mp3_url
			
 
				+        }).execute()
			
 
				+        return {"state": 200 , "message" : "success"}
			
 
				+    
			
 
				+    except Exception as e:
			
 
				+
			
 
				+        return {"state": 500 , "message" : str(e)}
			
 
				+    
			
 
				+@dbRouter.post("/message_save_mp3")
			
 
				+async def message_save(question:str = Body(None),answer: str = Body(None),data_list: str= Body(None), mp3_file: UploadFile = File(None) ):
			
 
				+    try :
			
 
				+        mp3_url = None  # 初始化 mp3_url
			
 
				+        date_time = datetime.now().strftime('%Y-%m-%d_%H:%M:%S')
			
 
				+        new_filename = f"{mp3_file.filename.split('.')[0]}_{date_time}.mp3"
			
 
				+        
			
 
				+        if mp3_file:  # 检查是否提供了 MP3 文件
			
 
				+            # 定义保存文件的路径
			
 
				+            save_path = os.path.join("static", "mp3", new_filename)
			
 
				+            
			
 
				+            # 确保目录存在
			
 
				+            os.makedirs(os.path.dirname(save_path), exist_ok=True)
			
 
				+            
			
 
				+            # 将上传的文件保存到指定路径
			
 
				+            with open(save_path, "wb") as f:
			
 
				+                f.write(await mp3_file.read())
			
 
				+            
			
 
				+            mp3_url = save_path  # 设置 mp3_url
			
 
				+
			
 
				+        data, count = supabase.table("log_record").insert({
			
 
				+            "question": question,
			
 
				+            "answer": answer,
			
 
				+            "data_list": data_list,
			
 
				+            "mp3_url": f"/{mp3_url}"
			
 
				         }).execute()
			
 
				         return {"state": 200 , "message" : "success"}
			
 
				     
			
@@ -344,31 +376,31 @@ def insert_table(data: dataform):
 
				 
			
 
				         return {"state": 500 , "message" : str(e)}
			
 
				     
			
 
				-@dbRouter.post("/video_save_into_cache")
			
 
				-def message_not_in_cache(video_name : Annotated[str, Field(description="檔案請丟進/home/mia/101/static/video_cache/others/資料夾裡")],client_message_id :str  = None,question:str = None):
			
 
				-    try:
			
 
				-        data = []
			
 
				-        if client_message_id :
			
 
				-            data, count = supabase.table('client_message').select('*').eq("id",client_message_id).execute()
			
 
				-        elif question:
			
 
				-            data, count = supabase.table('client_message').select('*').eq("question",question).execute()
			
 
				-
			
 
				-        info = data[1][0]
			
 
				-
			
 
				-        response = supabase.table('video_cache').insert({"question": info["question"],"answer":info["answer"],"video_url":f"/static/video_cache/others/{video_name}"}).execute()
			
 
				+# @dbRouter.post("/video_save_into_cache")
			
 
				+# def message_not_in_cache(video_name : Annotated[str, Field(description="檔案請丟進/home/mia/101/static/video_cache/others/資料夾裡")],client_message_id :str  = None,question:str = None):
			
 
				+#     try:
			
 
				+#         data = []
			
 
				+#         if client_message_id :
			
 
				+#             data, count = supabase.table('client_message').select('*').eq("id",client_message_id).execute()
			
 
				+#         elif question:
			
 
				+#             data, count = supabase.table('client_message').select('*').eq("question",question).execute()
			
 
				+
			
 
				+#         info = data[1][0]
			
 
				+
			
 
				+#         response = supabase.table('video_cache').insert({"question": info["question"],"answer":info["answer"],"video_url":f"/static/video_cache/others/{video_name}"}).execute()
			
 
				         
			
 
				-        response = supabase.table('client_message').delete().eq('id', info["id"]).execute()
			
 
				+#         response = supabase.table('client_message').delete().eq('id', info["id"]).execute()
			
 
				         
			
 
				-        return {"state": 200 , "message" : "success"}
			
 
				+#         return {"state": 200 , "message" : "success"}
			
 
				     
			
 
				-    except Exception as e:
			
 
				+#     except Exception as e:
			
 
				 
			
 
				-        return {"state": 500 , "message" : str(e)}
			
 
				+#         return {"state": 500 , "message" : str(e)}
			
 
				     
			
 
				 from sherry.semantic_search import ask_question
			
 
				     
			
 
				 @dbRouter.post("/video_cache")
			
 
				-def video_cache(client_message :str ):
			
 
				+def video_cache(client_message :str,language:str ="ch"):
			
 
				 
			
 
				     try:
			
 
				 
			
@@ -379,14 +411,14 @@ def video_cache(client_message :str ):
 
				 
			
 
				         # return {"state": 200 , "message" : data[1]}
			
 
				 
			
 
				-        result = ask_question(client_message)
			
 
				+        result = ask_question(client_message,language=language)
			
 
				 
			
 
				         # result[0]["answer"]
			
 
				 
			
 
				         if result == None :
			
 
				             return {"state": 500 , "message" : "no data"}
			
 
				 
			
 
				-        data, count = supabase.table("log_record").insert({"question":client_message, "answer":result[0]["answer"]}).execute()
			
 
				+        # data, count = supabase.table("log_record").insert({"question":client_message, "answer":result[0]["answer"]}).execute()
			
 
				         
			
 
				         return {"state": 200 , "message" : result }
			
 
				     
			
@@ -395,4 +427,48 @@ def video_cache(client_message :str ):
 
				         return {"state": 500 , "message" : str(e)}
			
 
				 
			
 
				 
			
 
				+# from openai import OpenAI
			
 
				+# import json
			
 
				+
			
 
				+# client = OpenAI(
			
 
				+#     # This is the default and can be omitted
			
 
				+#     api_key=os.environ.get("OPENAI_API_KEY"),
			
 
				+# )
			
 
				+
			
 
				+# def access_openai(prompt_value):
			
 
				+#     chat_completion = client.chat.completions.create(
			
 
				+#         messages=[
			
 
				+#             {
			
 
				+#                 "role": "user",
			
 
				+#                 "content": f"請將以下的內容翻譯為英文：\n\n {prompt_value}",
			
 
				+#             }
			
 
				+#         ],
			
 
				+#         model="gpt-3.5-turbo",
			
 
				+#     )
			
 
				+    
			
 
				+
			
 
				+#     return chat_completion.choices[0].message.content
			
 
				+
			
 
				+
			
 
				+
			
 
				+# @dbRouter.post("/translate")
			
 
				+# def translate():
			
 
				+#     try:
			
 
				+#         response = supabase.table('video_cache').select('*').eq('language', 'ch').execute()
			
 
				+
			
 
				+#         datas = response.data
			
 
				+
			
 
				+#         for data in datas :
			
 
				+#             translated_question = access_openai(data['question'])
			
 
				+#             translated_answer = access_openai(data['answer'])
			
 
				+
			
 
				+#             print(data['question'])
			
 
				+#             print(translated_question)
			
 
				+
			
 
				+#             insert = supabase.table('client_message').insert({"client_id":"0", "question":translated_question,"answer":translated_answer,"language":"en"}).execute()
			
 
				+
			
 
				+#         return {"state": 200 }
			
 
				+    
			
 
				+#     except Exception as e:
			
 
				 
			
 
				+#         return {"state": 500 , "message" : str(e)}
			
--- a/main.py
+++ b/main.py
@@ -164,6 +164,7 @@ from apscheduler.schedulers.background import BackgroundScheduler
 
				 scheduler = BackgroundScheduler()
			
 
				 
			
 
				 TS_DIRECTORY = Path("/home/mia/101/static/stream")
			
 
				+MP3_DIRECTORY = Path("/home/mia/101/static/mp3")
			
 
				 
			
 
				 def clean_old_files():
			
 
				     ts_files = list(TS_DIRECTORY.glob("segment_*.ts"))
			
@@ -176,6 +177,17 @@ def clean_old_files():
 
				                 print(f"Deleted old file: {file}")
			
 
				             except Exception as e:
			
 
				                 print(f"Error deleting file {file}: {e}")
			
 
				+    mp3_files = list(MP3_DIRECTORY.glob("recording_*.mp3"))
			
 
				+    mp3_files.sort(key=lambda f: f.stat().st_mtime)  # 按文件名排序，最旧的文件在前
			
 
				+    if len(mp3_files) > 20:
			
 
				+        files_to_delete = mp3_files[:len(mp3_files) - 20]  # 超过 20 个的文件需要删除
			
 
				+        for file in files_to_delete:
			
 
				+            try:
			
 
				+                os.remove(file)
			
 
				+                print(f"Deleted old file: {file}")
			
 
				+            except Exception as e:
			
 
				+                print(f"Error deleting file {file}: {e}")
			
 
				+    
			
 
				 
			
 
				 # 添加定时任务
			
 
				 scheduler.add_job(clean_old_files, 'interval', minutes=1)
			
--- a/sherry/semantic_search.py
+++ b/sherry/semantic_search.py
@@ -32,11 +32,33 @@ supabase: Client = create_client(supabase_url, supabase_key)
 
				 
			
 
				 # ####### load data from supabase #######
			
 
				 # embeddings_model = OpenAIEmbeddings()
			
 
				-response,count = supabase.table("video_cache").select("question","id").order("id").execute()
			
 
				-data = response[1]
			
 
				-question = [item['question'] for item in data if 'question' in item]
			
 
				-ids = [item['id'] for item in data if 'id' in item]
			
 
				-question_id_map = {item['question']: item['id'] for item in data if 'id' in item and 'question' in item}
			
 
				+
			
 
				+def generated(language:str ="ch"):
			
 
				+    global response,count,question,ids,question_id_map,vectorstore
			
 
				+    response,count = supabase.table("video_cache").select("question","id").eq("language",language).order("id").execute()
			
 
				+    data = response[1]
			
 
				+    question = [item['question'] for item in data if 'question' in item]
			
 
				+    ids = [item['id'] for item in data if 'id' in item]
			
 
				+    question_id_map = {item['question']: item['id'] for item in data if 'id' in item and 'question' in item}
			
 
				+
			
 
				+    ########## generate embedding ###########
			
 
				+    embedding = embeddings_model.embed_documents(question)
			
 
				+
			
 
				+    ########## Write embedding to the supabase table  #######
			
 
				+    # for id, new_embedding in zip(ids, embedding):
			
 
				+    #     supabase.table("video_cache_rows_duplicate").insert({"embedding": embedding.tolist()}).eq("id", id).execute()
			
 
				+
			
 
				+    ######### Vector Store ##########
			
 
				+    # Put pre-compute embeddings to vector store. ## save to disk
			
 
				+    vectorstore = Chroma.from_texts(
			
 
				+        texts=question,
			
 
				+        embedding=embeddings_model,
			
 
				+        persist_directory="./chroma_db"
			
 
				+        )
			
 
				+
			
 
				+    vectorstore = Chroma(persist_directory="./chroma_db", embedding_function=embeddings_model)
			
 
				+
			
 
				+    print("gernerate")
			
 
				 
			
 
				 def get_id_by_question(question):
			
 
				     return question_id_map.get(question)
			
@@ -56,28 +78,12 @@ def get_id_by_question(question):
 
				 #     video_url.append(item['video_url'])
			
 
				 
			
 
				 
			
 
				-########## generate embedding ###########
			
 
				-embedding = embeddings_model.embed_documents(question)
			
 
				-
			
 
				-########## Write embedding to the supabase table  #######
			
 
				-# for id, new_embedding in zip(ids, embedding):
			
 
				-#     supabase.table("video_cache_rows_duplicate").insert({"embedding": embedding.tolist()}).eq("id", id).execute()
			
 
				-
			
 
				-######### Vector Store ##########
			
 
				-# Put pre-compute embeddings to vector store. ## save to disk
			
 
				-vectorstore = Chroma.from_texts(
			
 
				-    texts=question,
			
 
				-    embedding=embeddings_model,
			
 
				-    persist_directory="./chroma_db"
			
 
				-    )
			
 
				-
			
 
				-vectorstore = Chroma(persist_directory="./chroma_db", embedding_function=embeddings_model)
			
 
				-
			
 
				-
			
 
				-def ask_question(question:str, SIMILARITY_THRESHOLD:int = 0.83):
			
 
				+def ask_question(question:str, SIMILARITY_THRESHOLD:int = 0.83,language:str ="ch"):
			
 
				+    generated(language=language)
			
 
				     docs_and_scores = vectorstore.similarity_search_with_relevance_scores(question, k=1)
			
 
				     doc, score = docs_and_scores[0]
			
 
				     print(doc,score)
			
 
				+    
			
 
				     if score >= SIMILARITY_THRESHOLD:
			
 
				         id = get_id_by_question(doc.page_content)
			
 
				         data,count = supabase.table("video_cache").select("*").eq("id",id).execute()
			
@@ -91,7 +97,7 @@ def ask_question(question:str, SIMILARITY_THRESHOLD:int = 0.83):
 
				 
			
 
				 
			
 
				 if __name__ == "__main__" :
			
 
				-####### load from disk  #######
			
 
				+    ####### load from disk  #######
			
 
				     query = "美食街在哪裡"
			
 
				     docs = vectorstore.similarity_search(query)
			
 
				     print(f"Query: {query}  | 最接近文檔：{docs[0].page_content}")