Browse Source

cache 新增語言

Mia 4 months ago
parent
commit
e69c81773f
5 changed files with 145 additions and 51 deletions
  1. BIN
      __pycache__/main.cpython-312.pyc
  2. BIN
      api/__pycache__/db_router.cpython-312.pyc
  3. 102 26
      api/db_router.py
  4. 12 0
      main.py
  5. 31 25
      sherry/semantic_search.py

BIN
__pycache__/main.cpython-312.pyc


BIN
api/__pycache__/db_router.cpython-312.pyc


+ 102 - 26
api/db_router.py

@@ -1,14 +1,12 @@
-from fastapi import APIRouter
+from fastapi import APIRouter,UploadFile, File,Body
 from supabase import create_client, Client
 from dotenv import load_dotenv
 import os
 from datetime import datetime
 from random import choice
-from openai import OpenAI
 from typing import Annotated
 from pydantic import Field
 
-client = OpenAI()
 
 load_dotenv()
 
@@ -226,7 +224,7 @@ def read_root(type:str,language :str = "ch"):
     return {"data": random_row}
 
 @dbRouter.post("/message_not_in_cache")
-def message_not_in_cache(question :str ,answer :str,data_list :str='[]',client_id : str = "0" ):
+def message_not_in_cache(question :str ,answer :str,client_id : str = "0",language:str = "ch" ):
 
     try:
         data, count = supabase.table('client_message').select('*').eq("question",question).execute()
@@ -234,7 +232,7 @@ def message_not_in_cache(question :str ,answer :str,data_list :str='[]',client_i
         if len(data[1]) != 0 :
             return {"state": 200 , "message" : "have saved"}
         
-        data, count = supabase.table('client_message').insert({"client_id": client_id, "question": question,"answer":answer}).execute()
+        data, count = supabase.table('client_message').insert({"client_id": client_id, "question": question,"answer":answer,"language":language}).execute()
         return {"state": 200 , "message" : "success"}
     
     except Exception as e:
@@ -251,12 +249,46 @@ class MessageSaveRequest(BaseModel):
 
     
 @dbRouter.post("/message_save")
-def message_save(request:MessageSaveRequest):
+async def message_save(request:MessageSaveRequest ):
     try :
+
         data, count = supabase.table("log_record").insert({
             "question": request.question,
             "answer": request.answer,
-            "data_list": request.data_list
+            "data_list": request.data_list,
+            # "mp3_url": mp3_url
+        }).execute()
+        return {"state": 200 , "message" : "success"}
+    
+    except Exception as e:
+
+        return {"state": 500 , "message" : str(e)}
+    
+@dbRouter.post("/message_save_mp3")
+async def message_save(question:str = Body(None),answer: str = Body(None),data_list: str= Body(None), mp3_file: UploadFile = File(None) ):
+    try :
+        mp3_url = None  # 初始化 mp3_url
+        date_time = datetime.now().strftime('%Y-%m-%d_%H:%M:%S')
+        new_filename = f"{mp3_file.filename.split('.')[0]}_{date_time}.mp3"
+        
+        if mp3_file:  # 检查是否提供了 MP3 文件
+            # 定义保存文件的路径
+            save_path = os.path.join("static", "mp3", new_filename)
+            
+            # 确保目录存在
+            os.makedirs(os.path.dirname(save_path), exist_ok=True)
+            
+            # 将上传的文件保存到指定路径
+            with open(save_path, "wb") as f:
+                f.write(await mp3_file.read())
+            
+            mp3_url = save_path  # 设置 mp3_url
+
+        data, count = supabase.table("log_record").insert({
+            "question": question,
+            "answer": answer,
+            "data_list": data_list,
+            "mp3_url": f"/{mp3_url}"
         }).execute()
         return {"state": 200 , "message" : "success"}
     
@@ -344,31 +376,31 @@ def insert_table(data: dataform):
 
         return {"state": 500 , "message" : str(e)}
     
-@dbRouter.post("/video_save_into_cache")
-def message_not_in_cache(video_name : Annotated[str, Field(description="檔案請丟進/home/mia/101/static/video_cache/others/資料夾裡")],client_message_id :str  = None,question:str = None):
-    try:
-        data = []
-        if client_message_id :
-            data, count = supabase.table('client_message').select('*').eq("id",client_message_id).execute()
-        elif question:
-            data, count = supabase.table('client_message').select('*').eq("question",question).execute()
-
-        info = data[1][0]
-
-        response = supabase.table('video_cache').insert({"question": info["question"],"answer":info["answer"],"video_url":f"/static/video_cache/others/{video_name}"}).execute()
+# @dbRouter.post("/video_save_into_cache")
+# def message_not_in_cache(video_name : Annotated[str, Field(description="檔案請丟進/home/mia/101/static/video_cache/others/資料夾裡")],client_message_id :str  = None,question:str = None):
+#     try:
+#         data = []
+#         if client_message_id :
+#             data, count = supabase.table('client_message').select('*').eq("id",client_message_id).execute()
+#         elif question:
+#             data, count = supabase.table('client_message').select('*').eq("question",question).execute()
+
+#         info = data[1][0]
+
+#         response = supabase.table('video_cache').insert({"question": info["question"],"answer":info["answer"],"video_url":f"/static/video_cache/others/{video_name}"}).execute()
         
-        response = supabase.table('client_message').delete().eq('id', info["id"]).execute()
+#         response = supabase.table('client_message').delete().eq('id', info["id"]).execute()
         
-        return {"state": 200 , "message" : "success"}
+#         return {"state": 200 , "message" : "success"}
     
-    except Exception as e:
+#     except Exception as e:
 
-        return {"state": 500 , "message" : str(e)}
+#         return {"state": 500 , "message" : str(e)}
     
 from sherry.semantic_search import ask_question
     
 @dbRouter.post("/video_cache")
-def video_cache(client_message :str ):
+def video_cache(client_message :str,language:str ="ch"):
 
     try:
 
@@ -379,14 +411,14 @@ def video_cache(client_message :str ):
 
         # return {"state": 200 , "message" : data[1]}
 
-        result = ask_question(client_message)
+        result = ask_question(client_message,language=language)
 
         # result[0]["answer"]
 
         if result == None :
             return {"state": 500 , "message" : "no data"}
 
-        data, count = supabase.table("log_record").insert({"question":client_message, "answer":result[0]["answer"]}).execute()
+        # data, count = supabase.table("log_record").insert({"question":client_message, "answer":result[0]["answer"]}).execute()
         
         return {"state": 200 , "message" : result }
     
@@ -395,4 +427,48 @@ def video_cache(client_message :str ):
         return {"state": 500 , "message" : str(e)}
 
 
+# from openai import OpenAI
+# import json
+
+# client = OpenAI(
+#     # This is the default and can be omitted
+#     api_key=os.environ.get("OPENAI_API_KEY"),
+# )
+
+# def access_openai(prompt_value):
+#     chat_completion = client.chat.completions.create(
+#         messages=[
+#             {
+#                 "role": "user",
+#                 "content": f"請將以下的內容翻譯為英文:\n\n {prompt_value}",
+#             }
+#         ],
+#         model="gpt-3.5-turbo",
+#     )
+    
+
+#     return chat_completion.choices[0].message.content
+
+
+
+# @dbRouter.post("/translate")
+# def translate():
+#     try:
+#         response = supabase.table('video_cache').select('*').eq('language', 'ch').execute()
+
+#         datas = response.data
+
+#         for data in datas :
+#             translated_question = access_openai(data['question'])
+#             translated_answer = access_openai(data['answer'])
+
+#             print(data['question'])
+#             print(translated_question)
+
+#             insert = supabase.table('client_message').insert({"client_id":"0", "question":translated_question,"answer":translated_answer,"language":"en"}).execute()
+
+#         return {"state": 200 }
+    
+#     except Exception as e:
 
+#         return {"state": 500 , "message" : str(e)}

+ 12 - 0
main.py

@@ -164,6 +164,7 @@ from apscheduler.schedulers.background import BackgroundScheduler
 scheduler = BackgroundScheduler()
 
 TS_DIRECTORY = Path("/home/mia/101/static/stream")
+MP3_DIRECTORY = Path("/home/mia/101/static/mp3")
 
 def clean_old_files():
     ts_files = list(TS_DIRECTORY.glob("segment_*.ts"))
@@ -176,6 +177,17 @@ def clean_old_files():
                 print(f"Deleted old file: {file}")
             except Exception as e:
                 print(f"Error deleting file {file}: {e}")
+    mp3_files = list(MP3_DIRECTORY.glob("recording_*.mp3"))
+    mp3_files.sort(key=lambda f: f.stat().st_mtime)  # 按文件名排序,最旧的文件在前
+    if len(mp3_files) > 20:
+        files_to_delete = mp3_files[:len(mp3_files) - 20]  # 超过 20 个的文件需要删除
+        for file in files_to_delete:
+            try:
+                os.remove(file)
+                print(f"Deleted old file: {file}")
+            except Exception as e:
+                print(f"Error deleting file {file}: {e}")
+    
 
 # 添加定时任务
 scheduler.add_job(clean_old_files, 'interval', minutes=1)

+ 31 - 25
sherry/semantic_search.py

@@ -32,11 +32,33 @@ supabase: Client = create_client(supabase_url, supabase_key)
 
 # ####### load data from supabase #######
 # embeddings_model = OpenAIEmbeddings()
-response,count = supabase.table("video_cache").select("question","id").order("id").execute()
-data = response[1]
-question = [item['question'] for item in data if 'question' in item]
-ids = [item['id'] for item in data if 'id' in item]
-question_id_map = {item['question']: item['id'] for item in data if 'id' in item and 'question' in item}
+
+def generated(language:str ="ch"):
+    global response,count,question,ids,question_id_map,vectorstore
+    response,count = supabase.table("video_cache").select("question","id").eq("language",language).order("id").execute()
+    data = response[1]
+    question = [item['question'] for item in data if 'question' in item]
+    ids = [item['id'] for item in data if 'id' in item]
+    question_id_map = {item['question']: item['id'] for item in data if 'id' in item and 'question' in item}
+
+    ########## generate embedding ###########
+    embedding = embeddings_model.embed_documents(question)
+
+    ########## Write embedding to the supabase table  #######
+    # for id, new_embedding in zip(ids, embedding):
+    #     supabase.table("video_cache_rows_duplicate").insert({"embedding": embedding.tolist()}).eq("id", id).execute()
+
+    ######### Vector Store ##########
+    # Put pre-compute embeddings to vector store. ## save to disk
+    vectorstore = Chroma.from_texts(
+        texts=question,
+        embedding=embeddings_model,
+        persist_directory="./chroma_db"
+        )
+
+    vectorstore = Chroma(persist_directory="./chroma_db", embedding_function=embeddings_model)
+
+    print("gernerate")
 
 def get_id_by_question(question):
     return question_id_map.get(question)
@@ -56,28 +78,12 @@ def get_id_by_question(question):
 #     video_url.append(item['video_url'])
 
 
-########## generate embedding ###########
-embedding = embeddings_model.embed_documents(question)
-
-########## Write embedding to the supabase table  #######
-# for id, new_embedding in zip(ids, embedding):
-#     supabase.table("video_cache_rows_duplicate").insert({"embedding": embedding.tolist()}).eq("id", id).execute()
-
-######### Vector Store ##########
-# Put pre-compute embeddings to vector store. ## save to disk
-vectorstore = Chroma.from_texts(
-    texts=question,
-    embedding=embeddings_model,
-    persist_directory="./chroma_db"
-    )
-
-vectorstore = Chroma(persist_directory="./chroma_db", embedding_function=embeddings_model)
-
-
-def ask_question(question:str, SIMILARITY_THRESHOLD:int = 0.83):
+def ask_question(question:str, SIMILARITY_THRESHOLD:int = 0.83,language:str ="ch"):
+    generated(language=language)
     docs_and_scores = vectorstore.similarity_search_with_relevance_scores(question, k=1)
     doc, score = docs_and_scores[0]
     print(doc,score)
+    
     if score >= SIMILARITY_THRESHOLD:
         id = get_id_by_question(doc.page_content)
         data,count = supabase.table("video_cache").select("*").eq("id",id).execute()
@@ -91,7 +97,7 @@ def ask_question(question:str, SIMILARITY_THRESHOLD:int = 0.83):
 
 
 if __name__ == "__main__" :
-####### load from disk  #######
+    ####### load from disk  #######
     query = "美食街在哪裡"
     docs = vectorstore.similarity_search(query)
     print(f"Query: {query}  | 最接近文檔:{docs[0].page_content}")