Browse Source

上一個repositor 壞掉了

Mia 5 months ago
commit
e7e88a2c8f

+ 3 - 0
.env

@@ -0,0 +1,3 @@
+SUPABASE_URL = "http://139.144.120.184:8000"
+SUPABASE_KEY = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyAgCiAgICAicm9sZSI6ICJzZXJ2aWNlX3JvbGUiLAogICAgImlzcyI6ICJzdXBhYmFzZS1kZW1vIiwKICAgICJpYXQiOiAxNjQxNzY5MjAwLAogICAgImV4cCI6IDE3OTk1MzU2MDAKfQ.DaYlNEoUrrEn2Ig7tqibS-PHK5vgusbcbo7X36XVt4Q"
+OPENAI_API_KEY = "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"

+ 7 - 0
.gitignore

@@ -0,0 +1,7 @@
+static/
+101evn/
+sherry/chroma_db/
+sherry/__pycache__/
+chroma_db/
+token.pickle
+log/

+ 7 - 0
101_restart.sh

@@ -0,0 +1,7 @@
+tmux new -d -s  101
+tmux new-window -t 101:0 -d
+tmux send-keys -t 101:0 "source 101evn/bin/activate" Enter
+tmux send-keys -t 101:0 "cd /home/mia/101" Enter
+tmux send-keys -t 101:0 "export OPENAI_API_KEY=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" Enter
+tmux send-keys -t 101:0 "sudo python3 main.py" Enter
+

+ 6 - 0
README.md

@@ -0,0 +1,6 @@
+* 如何啟動: python3 main.py
+* **記得要輸OPENAI 金鑰(export)**
+* ignore : static/、101evn/
+* speech2text.py 由 Tomoya 撰寫,其餘為 Mia
+* 安裝套件:pip install -r requirements.txt
+* 本API在cmm.ai:9101 上有做反向代理

BIN
__pycache__/chatapi.cpython-38.pyc


BIN
__pycache__/image_operate.cpython-38.pyc


BIN
__pycache__/main.cpython-312.pyc


BIN
__pycache__/main.cpython-38.pyc


BIN
__pycache__/skylight.cpython-38.pyc


BIN
__pycache__/ttspy.cpython-38.pyc


BIN
api/__pycache__/db_router.cpython-312.pyc


BIN
api/__pycache__/db_router.cpython-38.pyc


BIN
api/__pycache__/image_operate.cpython-312.pyc


BIN
api/__pycache__/image_operate.cpython-38.pyc


BIN
api/__pycache__/skylight.cpython-312.pyc


BIN
api/__pycache__/skylight.cpython-38.pyc


BIN
api/__pycache__/tendent_router.cpython-312.pyc


BIN
api/__pycache__/tendent_router.cpython-38.pyc


BIN
api/__pycache__/tts_router.cpython-312.pyc


BIN
api/__pycache__/tts_router.cpython-38.pyc


BIN
api/__pycache__/ttspy.cpython-312.pyc


BIN
api/__pycache__/ttspy.cpython-38.pyc


+ 329 - 0
api/db_router.py

@@ -0,0 +1,329 @@
+from fastapi import APIRouter
+from supabase import create_client, Client
+from dotenv import load_dotenv
+import os
+from datetime import datetime
+from random import choice
+from openai import OpenAI
+from typing import Annotated
+from pydantic import Field
+
+client = OpenAI()
+
+load_dotenv()
+
+# supaspace 連線
+url: str =  os.environ.get('SUPABASE_URL')
+key: str = os.environ.get('SUPABASE_KEY')
+
+supabase: Client = create_client(url, key)
+
+dbRouter = APIRouter()
+
+
+
+@dbRouter.get("/click")
+def add_click_time():
+
+    try:
+
+        response = supabase.table('click_time').select("*").execute()
+
+        click_time  = response.data[0]['click_time'] + 1
+    
+        data, count = supabase.table('click_time') \
+            .update({'click_time':click_time,'update_time':str(datetime.now())})\
+            .eq('id', 1)\
+            .execute()
+        
+        return {"state":"success","click_time" : click_time}
+
+    except Exception as e:
+        
+        return {"state":str(e)}
+    
+
+@dbRouter.get("/find_brand")
+def find_brand(keyword:str = None,language :str = "ch",page_num : int = None,page_amount: int = None,search_name : str = None):
+
+
+    if keyword is None :
+        query = supabase.table('101_brand').select('*').eq("language", language)
+    else :
+        keyword_list = keyword.split(",")
+        query= supabase.table('101_brand').select('*').eq("language", language)
+        for keyword_tmp in keyword_list :
+            query = query.like('tags', f'%{keyword_tmp}%')
+
+    if search_name:
+        query = query.like('name', f'%{search_name}%')
+
+    result,_ = query.execute()
+    count = len(result[1])
+
+    if page_num and page_amount :
+        offset = (page_num - 1) * page_amount
+        query = query.range(offset, offset + page_amount-1)
+
+    try:
+        data,_ = query.execute()
+        result = []
+
+        for shop in data[1] :
+
+            json = {
+                "type" : shop["type"],
+                "info" : shop
+            }
+
+            if language != "ch" :
+                if shop["floor"] == "館外" :
+                    json["info"]["floor"] = "outside"
+
+            result.append(json)
+
+
+        return {"state":"success","all_num" : count,"data" : result}
+
+    except Exception as e:
+        return {"state":"fail","message" :str(e)}
+    
+@dbRouter.get("/arviews")
+def arviews(start:str,end:str,language:str = "ch"):
+    try :
+        data, count = supabase.table('101_arviews')\
+        .select('*')\
+        .eq('start_loc', start) \
+        .like('tour_place', f'%{end}%') \
+        .execute()
+
+        result :str
+        words :str
+
+        if len(data[1]) != 0:
+            if language == "ch" :
+                result = data[1][0]["url"]
+                words = data[1][0]["words"]
+            else:
+                result = data[1][0]["en_url"]
+                words = data[1][0]["en_words"]
+        else :
+            result = "no this route"
+
+
+        return {"state":"success","url" : result,"words" : words}
+
+    except Exception as e:
+        return {"state":"fail","message" :str(e)}
+
+@dbRouter.get("/static_tickets")
+async def static_tickets(is_Chinese : int = None):
+    try:
+        data =None
+
+
+        if is_Chinese :
+            data, count = supabase.table('101_ticket')\
+            .select('*')\
+            .in_('id', [1,3,6,7])\
+            .execute()
+        else :
+            data, count = supabase.table('101_ticket')\
+            .select('*')\
+            .in_('id', [182,183,180])\
+            .execute()
+
+        result = []
+        
+        for shop in data[1] :
+
+            json = {
+                "type" : shop["type"],
+                "info" : shop
+            }
+
+            result.append(json)
+
+        return {"state":"success","result" : result}
+
+    except Exception as e:
+        return {"state":"fail","message" :str(e)}
+
+@dbRouter.get("/ad/{type}")
+def read_root(type:str,language :str = "ch"):
+
+    keyword1 :str 
+    keyword2 :str
+
+    if type == "美食伴手禮":
+        keyword1 = "餐飲"
+        keyword2 = "伴手禮" 
+    else :
+        keyword1 = "住宿"
+        keyword2 = "伴手禮" 
+
+    data, count = supabase.table('101_brand')\
+        .select('*')\
+        .eq("floor","館外")\
+        .eq("language", language)\
+        .or_(f"tags.ilike.%{keyword1}%,tags.ilike.%{keyword2}%")\
+        .execute()
+    
+    result = data[1]
+
+    # 從結果中隨機選擇一筆資料
+    random_row = choice(result)
+
+    if language != "ch" :
+        if random_row["floor"] == "館外" :
+            random_row["floor"] = "outside"
+
+    #print(random_row)
+    
+    return {"data": random_row}
+
+@dbRouter.post("/message_not_in_cache")
+def message_not_in_cache(question :str ,answer :str,client_id : str = "0" ):
+
+    try:
+        data, count = supabase.table('client_message').select('*').eq("question",question).execute()
+
+        if len(data[1]) != 0 :
+            return {"state": 200 , "message" : "have saved"}
+        
+        data, count = supabase.table('client_message').insert({"client_id": client_id, "question": question,"answer":answer}).execute()
+        
+        return {"state": 200 , "message" : "success"}
+    
+    except Exception as e:
+
+        return {"state": 500 , "message" : str(e)}
+    
+
+from pydantic import BaseModel, EmailStr
+import base64
+import pickle
+from email.mime.text import MIMEText
+from google.auth.transport.requests import Request
+from google.oauth2.credentials import Credentials
+from google_auth_oauthlib.flow import InstalledAppFlow
+from googleapiclient.discovery import build
+import os
+
+SCOPES = ['https://www.googleapis.com/auth/gmail.send']
+
+class dataform(BaseModel):
+    title: str
+    content: str
+    client_name: str
+    gender: str
+    email: EmailStr
+    phone: str
+    type:str
+
+def send_email(to_email,from_email,message):
+    creds = None
+    # 如果存在 token.pickle 文件,讀取
+    if os.path.exists('token.pickle'):
+        with open('token.pickle', 'rb') as token:
+            creds = pickle.load(token)
+
+    # 如果沒有有效的憑據,就進行登入
+    if not creds or not creds.valid:
+        if creds and creds.expired and creds.refresh_token:
+            creds.refresh(Request())
+        else:
+            flow = InstalledAppFlow.from_client_secrets_file(
+                'credentials.json', SCOPES)
+            creds = flow.run_local_server(port=0)
+
+        # 保存憑據
+        with open('token.pickle', 'wb') as token:
+            pickle.dump(creds, token)
+
+    service = build('gmail', 'v1', credentials=creds)
+
+    # 設定郵件
+    message = MIMEText(message)
+    message['to'] = to_email
+    message['from'] = from_email
+    message['subject'] = '101 ai客服 表單新資料'
+    raw = base64.urlsafe_b64encode(message.as_bytes()).decode()
+
+    # 發送郵件
+    try:
+        message = service.users().messages().send(userId='me', body={'raw': raw}).execute()
+        print(f'已發送郵件: {message["id"]}')
+        return "success"
+    except Exception as error:
+        print(f'發送郵件時出錯: {error}')
+        return "fail"
+    
+@dbRouter.post("/insert_table")
+def insert_table(data: dataform):
+
+    try:
+        response,count = supabase.table('lost_property').insert(data.dict()).execute()
+
+        email_content = response[1][0]
+
+        try:
+            send_email("mia@choozmo.com",data.email,str(email_content))
+        except Exception as e:
+            print(str(e))
+
+        
+        return {"state": 200 ,"message": "資料已成功提交"}
+    
+    except Exception as e:
+
+        return {"state": 500 , "message" : str(e)}
+    
+@dbRouter.post("/video_save_into_cache")
+def message_not_in_cache(video_name : Annotated[str, Field(description="檔案請丟進/home/mia/101/static/video_cache/others/資料夾裡")],client_message_id :str  = None,question:str = None):
+    try:
+        data = []
+        if client_message_id :
+            data, count = supabase.table('client_message').select('*').eq("id",client_message_id).execute()
+        elif question:
+            data, count = supabase.table('client_message').select('*').eq("question",question).execute()
+
+        info = data[1][0]
+
+        response = supabase.table('video_cache').insert({"question": info["question"],"answer":info["answer"],"video_url":f"/static/video_cache/others/{video_name}"}).execute()
+        
+        response = supabase.table('client_message').delete().eq('id', info["id"]).execute()
+        
+        return {"state": 200 , "message" : "success"}
+    
+    except Exception as e:
+
+        return {"state": 500 , "message" : str(e)}
+    
+from sherry.semantic_search import ask_question
+    
+@dbRouter.post("/video_cache")
+def video_cache(client_message :str ):
+
+    try:
+
+        # data, count = supabase.table('video_cache').select('*').like('question', f'%{client_message}%').execute()
+
+        # if len(data[1]) == 0 :
+        #     return {"state": 500 , "message" : "no data"}
+
+        # return {"state": 200 , "message" : data[1]}
+
+        result = ask_question(client_message)
+
+        if result == None :
+            return {"state": 500 , "message" : "no data"}
+        
+        return {"state": 200 , "message" : result }
+    
+    except Exception as e:
+
+        return {"state": 500 , "message" : str(e)}
+
+
+

+ 66 - 0
api/image_operate.py

@@ -0,0 +1,66 @@
+from rembg import remove
+import cv2
+import numpy as np
+import os
+import time
+from datetime import datetime, timedelta
+
+
+def delete_old_files(folder_path, days_old):
+    # 獲取當前時間
+    now = time.time()
+    # 計算指定的時間差
+    cutoff = now - (days_old * 86400)  # 86400 是一天的秒數
+
+    # 遍歷資料夾中的所有檔案
+    for filename in os.listdir(folder_path):
+        file_path = os.path.join(folder_path, filename)
+        # 確認這是個檔案
+        if os.path.isfile(file_path):
+            # 獲取檔案的最後修改時間
+            file_mtime = os.path.getmtime(file_path)
+            # 如果最後修改時間早於指定的時間差,則刪除該檔案
+            if file_mtime < cutoff:
+                os.remove(file_path)
+                print(f"Deleted {file_path}")
+
+async def remove_background(input_path:str,output_path:str):
+    input = cv2.imread(input_path)
+    output = remove(input,bgcolor=(255,255,255,0))
+
+
+    cv2.imwrite(output_path, output)
+    
+    delete_old_files(f"{os.path.split(os.path.abspath('main.py'))[0]}/static/image",2)
+
+async def detect_face(image_file_path):
+    try :
+        img = cv2.imread(image_file_path)
+
+        file_list = image_file_path.split("/")
+        filename = f"{os.path.split(os.path.abspath('main.py'))[0]}/static/image/check/{file_list[-1]}"
+
+        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)   # 將圖片轉成灰階
+
+        face_cascade = cv2.CascadeClassifier(f"{os.path.split(os.path.abspath('main.py'))[0]}/static/haarcascade_frontalface_default.xml")   # 載入人臉模型
+        faces = face_cascade.detectMultiScale(gray)    # 偵測人臉
+
+        print(len(faces))
+
+        if len(faces ) == 0 :
+            return {"state":"fail","msg":"no face"}
+
+        for (x, y, w, h) in faces:
+            cv2.rectangle(img, (x, y), (x+w, y+h), (0, 255, 0), 8)    # 利用 for 迴圈,抓取每個人臉屬性,繪製方框
+            break
+
+        cv2.imwrite(filename, img)
+
+        delete_old_files(f"{os.path.split(os.path.abspath('main.py'))[0]}/static/image/remove",2)
+    
+        return {"state":"success","filename":f"static/image/remove/{file_list[-1]}"}
+
+    except Exception as e :
+        return {"state":"fail","msg":str(e)}
+
+    

+ 207 - 0
api/skylight.py

@@ -0,0 +1,207 @@
+from PIL import Image, ImageDraw, ImageFont
+import os
+
+def create_image(text, output_path, font_size=300, bg_color=(255, 255, 255), text_color=(0, 0, 0), max_width=200):
+    
+
+    # 查看語言
+    lines : list
+    check_tag = "en"
+    font : ImageFont
+
+    if detect_language(text) == "English":
+        font = ImageFont.truetype(f"{os.path.split(os.path.abspath('main.py'))[0]}/static/MasaFont-Regular.ttf", 200, encoding="utf-8")
+        if len(text) > 80:
+            return "超過字數限制"
+        lines = split_text_by_length(text, 15)
+        check_tag = "en"
+    else:
+        # 選擇中文字型和大小
+        font = ImageFont.truetype(f"{os.path.split(os.path.abspath('main.py'))[0]}/static/MasaFont-Regular.ttf", font_size, encoding="utf-8")
+        if len(text) > 15 :
+            return "超過字數限制"
+        lines = split_chinese_text(text, max_length=4)
+        check_tag = "ch"
+
+    # 設定初始位置
+    x_position = 0
+    y_position = 0
+    
+    line_height = font.getlength(text[0])
+    print(line_height)
+
+    max_width = int(font.getlength(text[0])*len(lines))
+    max_height = int(font.getlength(text[0])*find_longest_segment(lines))
+
+    print(max_width,max_height)
+    print(lines)
+
+    if check_tag == "en" :
+        tmp = max_width
+        max_width = max_height
+        max_height = tmp*2
+
+    # 建立一個白色背景的圖片
+    image = Image.new('RGBA', (max_width,max_height), (255, 255, 255, 0))
+    draw = ImageDraw.Draw(image)
+
+    line_num = 0
+
+    text_width = draw.textlength(lines[0][0], font=font)
+    
+
+    if check_tag == "en" :
+        y_position = 0
+
+        # 繪製每一行文字
+        for line in lines:
+            
+            x_position = 0
+            for char in line:
+                
+                draw.text((x_position, y_position), char, font=font, fill=text_color)
+                x_position += text_width 
+
+            y_position += line_height +50
+
+            line_num += 1
+
+    else :
+        x_position = max_width - text_width 
+
+        # 繪製每一行文字
+        for line in lines:
+            
+            y_position = 0
+            for char in line:
+                
+                draw.text((x_position, y_position), char, font=font, fill=text_color)
+                y_position += text_width
+
+            x_position -= line_height
+
+            line_num += 1
+
+
+    # 儲存圖片
+    image.save(output_path)
+
+    return "成功製作文字"
+
+def overlay_images(background_path, overlay_path, output_path):
+    image1 = Image.open(background_path).convert('RGBA')
+    image2 = Image.open(overlay_path).convert('RGBA')
+
+    x = (image1.width - image2.width) // 2
+    y = (image1.height - image2.height) // 2 - 160
+
+    # 將第二張圖片疊加在第一張圖片上
+    image1.paste(image2, (x, y),image2)
+
+    # 保存疊加後的圖片
+    image1.save(output_path)
+
+    # 顯示疊加後的圖片
+    # image1.show()
+
+    print(f"finished, saving image at {output_path}")
+
+    im = Image.open(output_path)
+    name =output_path.lower().split('/')[::-1][0]
+    webp = name.replace('png', 'webp')
+    im.save(f"{os.path.split(os.path.abspath('main.py'))[0]}/static/tendents/{webp}", 'WebP', quality=40, )
+
+    os.remove(output_path)
+
+def detect_language(text):
+    for char in text:
+        # Check if the character falls within the range of Chinese characters
+        if '\u4e00' <= char <= '\u9fff':
+            return 'Chinese'
+    # If no Chinese characters are found, assume it's English
+    return 'English'
+
+def split_text_by_length(text, length):
+    paragraphs = []
+    current_paragraph = ""
+    words = text.split()
+
+    for word in words:
+        # If adding the current word exceeds the maximum length, start a new paragraph
+        if len(current_paragraph) + len(word) + 1 > length:
+            paragraphs.append(current_paragraph.strip())
+            current_paragraph = ""
+
+        # Add the current word to the current paragraph
+        current_paragraph += word + " "
+
+    # Add the remaining part as the last paragraph
+    if current_paragraph:
+        paragraphs.append(current_paragraph.strip())
+
+    return paragraphs
+
+def split_chinese_text(text, max_length=5):
+    """
+    Split the Chinese text into segments with a maximum length.
+
+    Args:
+    text (str): The input Chinese text.
+    max_length (int): The maximum length of each segment. Default is 5.
+
+    Returns:
+    list: A list of segments.
+    """
+    segments = []
+    current_segment = ""
+
+    for char in text:
+        # 如果当前片段加上当前字符的长度超过最大长度,就添加当前片段到segments列表中,并且重置当前片段
+        if len(current_segment) + len(char) > max_length:
+            segments.append(current_segment)
+            current_segment = ""
+        
+        # 如果当前字符不是空格,就添加到当前片段中
+        if char != ' ':
+            current_segment += char
+        else :
+            segments.append(current_segment)
+            current_segment = ""
+
+    # 添加最后一个片段到segments列表中
+    if current_segment:
+        segments.append(current_segment)
+
+    return segments
+
+def find_longest_segment(segments):
+    """
+    Find the longest segment from the given list of segments.
+
+    Args:
+    segments (list): The list of segments.
+
+    Returns:
+    str: The longest segment.
+    """
+    longest_segment = ""
+    max_length = 0
+
+    for segment in segments:
+        if len(segment) > max_length:
+            longest_segment = segment
+            max_length = len(segment)
+
+    return len(longest_segment)
+
+
+if __name__ == "__main__":
+    text = "心想事成 萬事如意"
+    output_path = "tendents/vertical_chinese_text.png"
+    create_image(text, output_path)
+    print(f"圖片已儲存至 {output_path}")
+
+    output_path = "combined_image.png"
+
+    # 執行疊加
+    overlay_images("tendentest.png", "vertical_chinese_text.png", output_path)

+ 39 - 0
api/speech2text.py

@@ -0,0 +1,39 @@
+import tempfile
+from typing import List, Any
+from fastapi import Request, APIRouter, UploadFile, File
+from fastapi.responses import FileResponse, PlainTextResponse
+from fastapi.exceptions import HTTPException
+from fastapi.encoders import jsonable_encoder
+from urllib.parse import urlparse, urljoin
+from pathlib import Path
+from icecream import ic
+from google.oauth2 import service_account
+from google.cloud import speech
+
+client_file = Path(__file__).parent.parent/'keys/pure-lodge-426406-e4-af94156a748a.json'
+credentials = service_account.Credentials.from_service_account_file(client_file)
+client = speech.SpeechClient(credentials=credentials)
+
+router = APIRouter()
+
+@router.post('/gcp')
+def gcp(language_code: str=None, file: UploadFile = File()):
+    extension = file.filename.split(".")[-1] 
+    if extension not in ("mp3", "wav", "webm"):
+        return HTTPException(status_code=400, detail="Audio must be mp3, wav, webm or webm format!")
+    content = file.file.read()
+    audio = speech.RecognitionAudio(content=content)
+    if extension=='mp3': encoding=speech.RecognitionConfig.AudioEncoding.MP3
+    elif extension=='wav': encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16
+    elif extension=='webm': encoding=speech.RecognitionConfig.AudioEncoding.WEBM_OPUS
+    else: return HTTPException(status_code=400, detail="no such encoding.")
+    config = speech.RecognitionConfig(
+        encoding=encoding,
+        sample_rate_hertz=48000,
+        language_code=language_code
+    )
+    response = client.recognize(config=config, audio=audio)
+    results = [results.alternatives[0].transcript for results in response.results]
+    ic(results)
+    
+    return results

+ 32 - 0
api/tendent_router.py

@@ -0,0 +1,32 @@
+from fastapi import APIRouter
+import random
+from api.skylight import create_image,overlay_images
+import os
+
+tendentRouter = APIRouter()
+
+@tendentRouter.post("/tendentest")
+def read_root(client_message :str = "心想事成"):
+
+    pic_num = random.randint(0,19)
+
+    # 產生
+    text_img_output_path = f"{os.path.split(os.path.abspath('main.py'))[0]}/static/tendents/vertical_chinese_text_{pic_num}.png"
+    
+    create_image(client_message, text_img_output_path)
+
+    print(f"save to {text_img_output_path}")
+
+    output_path = f"{os.path.split(os.path.abspath('main.py'))[0]}/static/tendents/combined_image_{pic_num}.png"
+
+    try:
+        os.remove(output_path)
+    except OSError as e:
+        print('Delete Problem: ', e)
+
+    # 執行疊加
+    overlay_images(f"{os.path.split(os.path.abspath('main.py'))[0]}/static/tendentest.png", text_img_output_path, output_path)
+    
+    url = f"static/tendents/combined_image_{pic_num}.webp"
+
+    return {"state":"success","url": url }

+ 77 - 0
api/tts_router.py

@@ -0,0 +1,77 @@
+from fastapi import APIRouter
+import time
+from datetime import datetime
+from dotenv import load_dotenv
+from api.ttspy import txt_to_speach,download_voice
+import time
+from moviepy.editor import VideoFileClip, concatenate_videoclips,AudioFileClip
+import random
+import os
+from moviepy.editor import *
+
+ttsRouter = APIRouter()
+
+@ttsRouter.post("/tts")
+def read_root(message :str = "我在測試",type : str = "商會"):
+    
+    # url = txt_to_speach(message)
+    start_time = time.time()
+    url,text_list = txt_to_speach(message)
+    # url,execution_time = download_voice(message)
+    
+    
+    # 合併mp3跟mp4
+    output_url = f"static/tts/add_video{random.randint(1,25)}.mp4"
+    output = os.path.split(os.path.abspath('main.py'))[0] +"/" + output_url
+    merge_video_with_audio(f"{os.path.split(os.path.abspath('main.py'))[0]}/{url}" , output,type)
+
+    end_time = time.time()
+    execution_time = end_time - start_time
+
+    return {"state":"success","url": output_url ,"mp3_url" : url,"reply_time":execution_time,"text_list":text_list}
+
+@ttsRouter.post("/cut_video")
+def cut_video(s :int = 5,type : int = 1):
+    video_path = ""
+    if type == 1:
+        video_path = f"{os.path.split(os.path.abspath('main.py'))[0]}/static/2.15min.mp4" 
+    else :
+        video_path = f"{os.path.split(os.path.abspath('main.py'))[0]}/static/沒聲音動嘴巴2min.mp4"
+
+
+    video_clip = VideoFileClip(video_path)
+    video_clip = video_clip.subclip(2,2+s)
+
+    output_url = f"static/tts/add_video{random.randint(1,25)}.mp4"
+    output = os.path.split(os.path.abspath('main.py'))[0] +"/" + output_url
+
+    video_clip.write_videofile(output, codec='libx264', audio_codec='aac')
+
+    return {"state":"success","url": output_url}
+
+
+
+def merge_video_with_audio(audio_path, output_path,type : str = "商會"):
+
+    video_path = ""
+    if type == "商會":
+        video_path = f"{os.path.split(os.path.abspath('main.py'))[0]}/static/2min.mp4" 
+    else :
+        video_path = f"{os.path.split(os.path.abspath('main.py'))[0]}/static/2.15min.mp4"
+    # 讀取視頻和音頻文件
+    video_clip = VideoFileClip(video_path)
+    audio_clip = AudioFileClip(audio_path)
+
+    # 截取音頻文件的長度以匹配視頻
+    video_clip = video_clip.set_duration(audio_clip.duration)
+
+    # 將音頻添加到視頻中
+    final_clip = video_clip.set_audio(audio_clip)
+
+    # 保存合併後的視頻
+    final_clip.write_videofile(output_path, codec='libx264', audio_codec='aac')
+
+    # 釋放資源
+    final_clip.close()
+    video_clip.close()
+    audio_clip.close()

+ 72 - 0
api/tts_try.py

@@ -0,0 +1,72 @@
+import edge_tts
+import asyncio
+from fastapi import APIRouter
+from datetime import datetime
+from dotenv import load_dotenv
+import time
+from moviepy.editor import VideoFileClip, concatenate_videoclips,AudioFileClip
+import random
+import os
+
+
+ttsTryRouter = APIRouter()
+
+async def my_function(output : str,TEXT = "我在測試"):
+    voice = 'zh-TW-HsiaoChenNeural'
+    rate = '-4%'
+    volume = '+0%'
+
+    tts = edge_tts.Communicate(text=TEXT, voice=voice, rate=rate, volume=volume)
+    await tts.save(output)
+
+@ttsTryRouter.post("/tts_try")
+async def read_root(message :str = "我在測試",type : str = "商會"):
+    
+    # url = txt_to_speach(message)
+    start_time = time.time()
+
+    # text_list = message.replace(" ","").replace(",",",").split("。")
+    # text_list = [item.split(',') if len(item) > 30 else [item] for item in text_list ]
+
+    filename = f"static/tts/mp3/output{random.randint(1,25)}.mp3"
+    filenames = []
+
+    await my_function(output =filename,TEXT = message)
+    
+    # url,execution_time = download_voice(message)
+    
+    
+    # 合併mp3跟mp4
+    output_url = f"static/tts/add_video{random.randint(1,25)}.mp4"
+    output = os.path.split(os.path.abspath('main.py'))[0] +"/" + output_url
+    merge_video_with_audio(f"{os.path.split(os.path.abspath('main.py'))[0]}/{filename}" , output,type)
+
+    end_time = time.time()
+    execution_time = end_time - start_time
+
+    return {"state":"success","url": output_url,"reply_time":execution_time}
+
+def merge_video_with_audio(audio_path, output_path,type : str = "商會"):
+
+    video_path = ""
+    if type == "商會":
+        video_path = f"{os.path.split(os.path.abspath('main.py'))[0]}/static/2min.mp4" 
+    else :
+        video_path = f"{os.path.split(os.path.abspath('main.py'))[0]}/static/2.15min.mp4"
+    # 讀取視頻和音頻文件
+    video_clip = VideoFileClip(video_path)
+    audio_clip = AudioFileClip(audio_path)
+
+    # 截取音頻文件的長度以匹配視頻
+    video_clip = video_clip.set_duration(audio_clip.duration)
+
+    # 將音頻添加到視頻中
+    final_clip = video_clip.set_audio(audio_clip)
+
+    # 保存合併後的視頻
+    final_clip.write_videofile(output_path, codec='libx264', audio_codec='aac')
+
+    # 釋放資源
+    final_clip.close()
+    video_clip.close()
+    audio_clip.close()

+ 171 - 0
api/ttspy.py

@@ -0,0 +1,171 @@
+import pyttsx3
+import requests
+from openai import OpenAI
+import openai
+import random
+import os
+import time
+import json
+import threading
+from itertools import chain
+
+from gtts import gTTS
+import os
+
+client = OpenAI()
+
+def txt_to_speach(text):
+
+    # text_list_1 = text.replace(" ","").replace(",",",").split("。")
+    text_list = text.replace(" ","").replace(",",",").split("。")
+
+    filename = f"static/tts/mp3/output{random.randint(1,25)}.mp3"
+    filenames = []
+
+    text_list = [item.split(',') if len(item) > 30 else [item] for item in text_list ]
+    
+
+    text_list  = list(chain.from_iterable(text_list ))
+    # 建立存放執行序的list(存放thread)
+    
+    threads = []
+
+    # 放入執行序
+    for i,text_split in enumerate(text_list):
+        text_split = text_split.strip()
+    
+        # 檢查字串是否為空
+        if not text_split:
+            continue
+        t = threading.Thread(target=text_split_to_text, args=(text_split,i,filename)) 
+        
+        threads.append(t) # 將程序放入threads
+        filenames.append(f"/home/mia/101/{filename}-{i}.mp3")
+        print(filenames)
+
+    # 開始
+    for t in threads:
+        t.start()
+
+    # 等待所有子執行緒結束
+    for t in threads:
+        t.join()
+
+
+    merge_audio_files(filenames, f"/home/mia/101/{filename}")
+
+
+    return filename,text_list
+
+def text_split_to_text(text_split,i,filename):
+    
+    response = client.audio.speech.create(
+        model="tts-1",
+        voice="nova",
+        input=text_split
+    )
+
+    filename_tmp = f"/home/mia/101/{filename}-{i}.mp3"
+
+    response.stream_to_file(filename_tmp)
+
+
+import subprocess
+from pydub import AudioSegment
+from moviepy.editor import VideoFileClip, AudioFileClip, concatenate_videoclips
+
+
+def merge_audio_files(files, output_file):
+    # 生成 ffmpeg 的命令
+    cmd = ['ffmpeg', '-i', 'concat:' + '|'.join(files), '-c', 'copy', '-y',output_file]
+
+    # 执行命令
+    subprocess.run(cmd)
+
+    # 刪除暫時生成的音頻文件
+    for filename in files:
+        os.remove(filename)
+
+    # combined = AudioSegment.empty()
+
+    # # 逐一載入每個音頻文件並合併
+    # for file in files:
+    #     audio = AudioSegment.from_file(file, format="mp3")
+    #     combined += audio
+
+    # # 將合併後的音頻保存為新文件
+    # combined.export(output_file, format="mp3")
+
+
+
+
+def download_voice(text,voice="zh-TW-HsiaoChenNeural", pronunciations=None):
+    output_url = f"static/tts/mp/output{random.randint(1,25)}.mp3"
+    output = "/home/mia/101/" + output_url
+    my_data = {
+    "voice": voice,
+    "content": [str(text)]  #["你好,很高興認識你","喜歡","討厭"]
+      # "ssml": string[]
+      #  "title": string,          // Optional
+      # "narrationStyle": string, // Optional         
+      # "globalSpeed": string,    // Optional      
+      # "pronunciations": { key: string, value: string }[], // Optional
+      # "trimSilence": boolean,   // Optional
+
+      }
+    headers =  {
+    # 'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.62 Safari/537.36',
+    "Authorization":"61ddf2a47cdd42548671be21ccdcf285",
+    "X-User-ID":'HEQLQR1WgpYtN0SEyKoWBsLiZXX2',
+    "Content-Type": "application/json"
+    }
+    start_time = time.time()
+
+    # 將資料加入 POST 請求中
+    r = requests.post('https://play.ht/api/v1/convert',headers=headers,data=json.dumps(my_data))
+    c1 = r.json()
+    print(c1)
+    c1 = r.json()['transcriptionId']
+    # print(c1)
+
+    time.sleep(len(text))
+
+    success_flag = False
+    r =''
+    
+    while True:
+        r = requests.post('https://play.ht/api/v1/convert',headers=headers,data=json.dumps(my_data))
+        c1 = r.json()['transcriptionId']
+        print(f"{text}:{c1}")
+        # time.sleep(0.5+(len(text)/4))
+        counter = 0
+        while True:
+            r = requests.get('https://play.ht/api/v1/articleStatus?transcriptionId=%s'%c1, headers=headers)
+            if 'json' not in r.headers.get('content-type') or r.json()['converted'] == False:
+                print(f"audio {c1} is not ready.")
+                # time.sleep(0.5)
+                counter += 1
+                if counter == 6:
+                  break
+            else:
+                success_flag = True
+                break
+            
+        if success_flag:
+            break
+        else:
+            print('redownload')
+
+    
+
+    file = requests.get(r.json()['audioUrl'])
+    with open(output,"wb") as f:
+        for chunk in file.iter_content(chunk_size=1024):
+            if chunk:
+                f.write(chunk)
+
+    end_time = time.time()
+    execution_time = end_time - start_time
+    print("reply time:", execution_time, "s")
+
+    return output_url,execution_time

+ 1 - 0
credentials.json

@@ -0,0 +1 @@
+{"installed":{"client_id":"592277472140-bqv0v9d3mea83gdcgefeefpga160lets.apps.googleusercontent.com","project_id":"gmail-429704","auth_uri":"https://accounts.google.com/o/oauth2/auth","token_uri":"https://oauth2.googleapis.com/token","auth_provider_x509_cert_url":"https://www.googleapis.com/oauth2/v1/certs","client_secret":"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","redirect_uris":["http://localhost"]}}

+ 13 - 0
keys/pure-lodge-426406-e4-af94156a748a.json

@@ -0,0 +1,13 @@
+{
+  "type": "service_account",
+  "project_id": "pure-lodge-426406-e4",
+  "private_key_id": "af94156a748ad63f013138435bd77f585d191ada",
+  "private_key": "-----BEGIN PRIVATE KEY-----\nMIIEvgIBADANBgkqhkiG9w0BAQEFAASCBKgwggSkAgEAAoIBAQC83tPxVS72N7q/\nSh8MMcBvnog4lj2r/MIcG0iMDXV36A5hS0NgiLeLTdrfzDuLGWvgu4QOFjcYUdu7\n8lI3puk77Is1kEgS+hrgUOR2PTHXGHKAUnsDGWGh8uJ295sLbzoXIuH3zAxQXeKF\nFNGk7thj0usCGQOYraLbkkAzixdKmXYs5zXZ10Ok1OWz/qILDrDXRECd3SWEw/v4\nP9tH+4+dDry7etVhB4Vk51LEKNmvYiBSI0PjlvLgqKJssgw3vLBFX/IZPNlgzkW5\nMK5EfIAzTLZZhbuXYPDHuaCCNskQvTDNjir9oWviRoZ6fnLoh6ldFNRdM23lKxhF\nM5lE1KCnAgMBAAECggEAVwzk5rB6SqV4/z2sfbRYm5mX82zb0+9VkBrLHzH82F0u\nc4166n1ZC23A2bGKKBx5akVDIjL/BciE3GVdm6TkjugBaZisf4Ki7IycIBsoY5ky\n/Q4HAHCsXTZTRr3syiJjxdIgEFQNwOTkyK3Fyp0DQ08tXfQlgjNVgpov2RHclFbG\nUEk8lek8HxNAJ3T9UNWOvyYcjBSe8Ytd2zOZojSW9qL9bXwozcAP3iqsAeWEOEX6\n2fmGbbNA9Q0cyQ0hNVHaXOKLUfleynovTL4j+dg7yFxjuufVil+NLJUxwtJwnvXf\ni4oNxpwgLhq7zf/ivetBvuCYhFm34XY0SUqaQET9UQKBgQD8V/HsBehQL65IGVvA\nPy2MyH2DyMMv52NU3bBQyQ2GoLW2Y1i+J4geAf1F+L6azVBgQpygVIHMt5TbpMkI\nGoUDCd0vw+rk+Bt0rUC2pTzdlavsaouFfWNhnTOld6TqCfd31csTwET4n8I4iqet\nOKIb40hOOyCBeXVLbEb2qsHjbQKBgQC/m27H05Q3ReYXrIarku+jCw6TUc/Gvu+8\nL2FYLA7bncytC9OXb+9DD1ydi4zocD8d2M6oPmbpX8rKPMQhTHdChjP0ns+IZzLY\nCgsu7lHgeqzW9RD0bzUKlHWe5kK3164oSQZsTrgTpDGJehrfREeHL6N5vAOqk2yG\nunUJs/Zz4wKBgCDutVLxPm1gm6UrMg8375DlBYRyag7RauRDfmQ8qPg99xkThBxk\n+l24Mr60BOQdEXVOnDRMcxgtiU7VBat6Eovf+K7oKbqyKpsilUSA8h2VD/ftu8Gz\nKIovwvLAV5R3soVAMJ2mRRoRd357kgCIWJe9JM2Y59gd6tHqEqTUijNNAoGBAJsP\nAjucMZn5axdFe0ZmBp//VrBJyMehxQiZn0Fnhi6cKQLxSy6GWy39xImKYWHJVIqm\n5bapqUa7SrUXL/FynCCt0mXoDzLnsPG8TwiPvJrZK/5ZHHtzc404T2dkGCIG3N2l\nGxmTKsxaV+xVcGG2M9xm5Pii+qST4YO2aPiNNqjZAoGBAPr3wuX0y4NlWJRp9phE\nAaAs9Rv58rg0aX4ziVphBv40ku/e4IYC0MriDVGulxYXQuFs3CliRe+3eX1+eKRB\nOltSvyZ21zBDvx1O/bShVPTWxecgt2TF//+Q5/OLPMfD3lvHo+2MsXsnPhT7jaBI\nxOQ8fp6wTMrQWw/MX9bl22dc\n-----END PRIVATE KEY-----\n",
+  "client_email": "speech-to-tex-0@pure-lodge-426406-e4.iam.gserviceaccount.com",
+  "client_id": "112355225752184158962",
+  "auth_uri": "https://accounts.google.com/o/oauth2/auth",
+  "token_uri": "https://oauth2.googleapis.com/token",
+  "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
+  "client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/speech-to-tex-0%40pure-lodge-426406-e4.iam.gserviceaccount.com",
+  "universe_domain": "googleapis.com"
+}

+ 134 - 0
main.py

@@ -0,0 +1,134 @@
+from fastapi import FastAPI, Form, UploadFile, File, HTTPException
+import uvicorn
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.middleware.httpsredirect import HTTPSRedirectMiddleware
+from fastapi.middleware.trustedhost import TrustedHostMiddleware
+from datetime import datetime
+from fastapi.staticfiles import StaticFiles
+from datetime import datetime
+from fastapi.responses import RedirectResponse
+import logging
+from logging.handlers import TimedRotatingFileHandler
+
+# 設定日誌配置
+log_folder = 'log'
+log_file = f'{log_folder}/app.log'
+
+# 設定日誌格式
+log_format = '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+date_format = '%Y-%m-%d %H:%M:%S'
+
+# 設定 TimedRotatingFileHandler
+handler = TimedRotatingFileHandler(
+    log_file,
+    when='midnight',
+    interval=1,
+    backupCount=14  # 保留7天的日誌
+)
+handler.setFormatter(logging.Formatter(log_format, datefmt=date_format))
+
+# 設定根日誌
+logging.basicConfig(
+    handlers=[handler],
+    level=logging.INFO,
+    format=log_format,
+    datefmt=date_format
+)
+console_handler = logging.StreamHandler()
+
+
+app = FastAPI()
+
+# app.add_middleware(HTTPSRedirectMiddleware)
+# app.add_middleware(TrustedHostMiddleware)
+
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+
+app.mount("/static", StaticFiles(directory="static"), name="static")
+
+# 根目錄導向docs
+@app.get("/")
+async def root():
+    logging.info("Root endpoint was called")
+    return RedirectResponse(url="/docs#")
+
+
+from api.tts_router import ttsRouter
+from api.db_router import dbRouter
+from api.tendent_router import tendentRouter
+# from api.speech2text import router
+# from api.tts_try import ttsTryRouter
+
+app.include_router(ttsRouter, prefix="", tags=["文字轉語音"])
+app.include_router(dbRouter, prefix="", tags=["supa 操作相關"])
+app.include_router(tendentRouter, prefix="", tags=["天燈"])
+# app.include_router(router, prefix='/speech2text', tags=["speech2text"])
+# app.include_router(ttsTryRouter, prefix='/ttsTry', tags=["測試本地端tts"])
+
+@app.get("/ad")
+def read_root(language :str = "ch"):
+    message = {}
+    if language == "ch" :
+        message = { 
+            "type": "store",
+            "body": {
+                "cover_img": "https://cmm.ai:9101/static/ad_img/ad-img.png",
+                "title": "台北101國際貴賓卡", 
+                "description":"國際貴賓卡專屬禮遇\n●即日起來台北101,提供2024年特別禮遇-申辦台北101國際貴賓卡,可享用國際旅客限定專屬三重好禮:\n●購物-品牌9折起特別優惠\n●禮遇-Welcome Pack+ NTD300現金折抵券\n●退稅-消費2000元以上提供5%快速退稅服務\n<a href='https://stage.taipei101mall.com.tw/join-member/AIsystem' class='ar-link mt-3' target='_blank'>立即申辦</a>", 
+                "date": "即日起", 
+                "price": "", 
+                "original_price": "", 
+                "website_url": "",
+                "store_info_url": "", 
+                "included": [],
+                "branch": [],
+                "location" : ""
+            },
+        }
+    else :
+        message = { 
+            "type": "store",
+            "body": {
+                "cover_img": "https://cmm.ai:9101/static/ad_img/ad-img.png",
+                "title": "Taipei 101 International VIP Card", 
+                "description":"TOURIST CARD Exclusive Privileges\nStarting today at Taipei 101, we are offering special privileges for the year 2024 - apply for the Taipei 101 Tourist Card and enjoy exclusive triple benefits reserved for international travelers.\n● Shopping - Special offers starting from 10% off brand items.\n● PRIVILEGES-Welcome Pack + NTD300 cash voucher.\n● TAX REFUND- Offering 5% expedited processing service.\n<a href='https://stage.taipei101mall.com.tw/join-member/AIsystem' class='ar-link mt-3' target='_blank'>Apply now</a>", 
+                "date": "Starting from today", 
+                "price": "", 
+                "original_price": "", 
+                "website_url": "",
+                "store_info_url": "", 
+                "included": [],
+                "branch": [],
+                "location" : ""
+            },
+        }
+    return {"data": message}
+
+from api.image_operate import remove_background,detect_face
+    
+@app.post("/image_check")
+async def image_check(image_file : UploadFile):
+    currentDateAndTime = datetime.now()
+    imgname = currentDateAndTime.strftime("%m-%d-%H-%M-%S")+ "-" + image_file.filename
+
+    with open(f"/home/mia/101/static/image/{imgname}","wb") as save_img :
+        contents = await image_file.read()
+        save_img.write(contents)
+
+    # await remove_background(f"/home/mia/101/static/image/{imgname}",f"/home/mia/101/static/image/remove/{imgname}")
+
+    result = await detect_face(f"/home/mia/101/static/image/{imgname}")
+
+    return result
+
+
+
+
+if __name__ == "__main__":
+    uvicorn.run("main:app", host="0.0.0.0", port=9101, reload=False, log_config=None)

+ 134 - 0
requirements.txt

@@ -0,0 +1,134 @@
+aiohttp==3.9.5
+aiosignal==1.3.1
+annotated-types==0.7.0
+anyio==4.3.0
+asttokens==2.4.1
+async-timeout==4.0.3
+attrs==23.2.0
+cachetools==5.3.3
+certifi==2024.2.2
+charset-normalizer==3.3.2
+click==8.1.7
+colorama==0.4.6
+coloredlogs==15.0.1
+DateTime==5.5
+decorator==4.4.2
+deprecation==2.1.0
+distro==1.9.0
+dnspython==2.6.1
+edge-tts==6.1.12
+email_validator==2.1.1
+exceptiongroup==1.2.1
+executing==2.0.1
+fastapi==0.111.0
+fastapi-cli==0.0.4
+filelock==3.14.0
+flatbuffers==24.3.25
+frozenlist==1.4.1
+fsspec==2024.6.0
+google-api-core==2.19.0
+google-auth==2.30.0
+google-cloud-speech==2.26.0
+googleapis-common-protos==1.63.1
+gotrue==2.4.2
+grpcio==1.64.1
+grpcio-status==1.62.2
+gTTS==2.5.1
+h11==0.14.0
+httpcore==1.0.5
+httptools==0.6.1
+httpx==0.27.0
+humanfriendly==10.0
+icecream==2.1.3
+idna==3.7
+imageio==2.34.1
+imageio-ffmpeg==0.4.9
+Jinja2==3.1.4
+jsonschema==4.22.0
+jsonschema-specifications==2023.12.1
+lazy_loader==0.4
+llvmlite==0.42.0
+markdown-it-py==3.0.0
+MarkupSafe==2.1.5
+mdurl==0.1.2
+more-itertools==10.3.0
+moviepy==1.0.3
+mpmath==1.3.0
+multidict==6.0.5
+networkx==3.3
+numba==0.59.1
+numpy==1.26.4
+nvidia-cublas-cu12==12.1.3.1
+nvidia-cuda-cupti-cu12==12.1.105
+nvidia-cuda-nvrtc-cu12==12.1.105
+nvidia-cuda-runtime-cu12==12.1.105
+nvidia-cudnn-cu12==8.9.2.26
+nvidia-cufft-cu12==11.0.2.54
+nvidia-curand-cu12==10.3.2.106
+nvidia-cusolver-cu12==11.4.5.107
+nvidia-cusparse-cu12==12.1.0.106
+nvidia-nccl-cu12==2.20.5
+nvidia-nvjitlink-cu12==12.5.40
+nvidia-nvtx-cu12==12.1.105
+onnxruntime==1.18.0
+openai==1.30.1
+openai-whisper @ git+https://github.com/openai/whisper.git@ba3f3cd54b0e5b8ce1ab3de13e32122d0d5f98ab
+opencv-python==4.9.0.80
+opencv-python-headless==4.9.0.80
+orjson==3.10.3
+packaging==24.0
+pillow==10.3.0
+platformdirs==4.2.2
+pooch==1.8.1
+postgrest==0.16.4
+proglog==0.1.10
+proto-plus==1.23.0
+protobuf==4.25.3
+pyasn1==0.6.0
+pyasn1_modules==0.4.0
+pydantic==2.7.1
+pydantic_core==2.18.2
+pydub==0.25.1
+Pygments==2.18.0
+PyMatting==1.1.12
+python-dateutil==2.9.0.post0
+python-dotenv==1.0.1
+python-multipart==0.0.9
+pyttsx3==2.90
+pytz==2024.1
+PyYAML==6.0.1
+realtime==1.0.4
+referencing==0.35.1
+regex==2024.5.15
+rembg==2.0.56
+requests==2.32.1
+rich==13.7.1
+rpds-py==0.18.1
+rsa==4.9
+scikit-image==0.23.2
+scipy==1.13.0
+shellingham==1.5.4
+six==1.16.0
+sniffio==1.3.1
+starlette==0.37.2
+storage3==0.7.4
+StrEnum==0.4.15
+supabase==2.4.5
+supafunc==0.4.5
+sympy==1.12
+tifffile==2024.5.10
+tiktoken==0.7.0
+torch==2.3.1
+tqdm==4.66.4
+triton==2.3.1
+typer==0.12.3
+typing_extensions==4.11.0
+ujson==5.10.0
+urllib3==2.2.1
+uvicorn==0.29.0
+uvloop==0.19.0
+watchfiles==0.21.0
+websockets==12.0
+whisper==1.1.10
+yarl==1.9.4
+zope.interface==6.4

+ 6 - 0
sherry/requirements.txt

@@ -0,0 +1,6 @@
+python-dotenv
+openai
+langchain-openai
+langchain-community
+langchain-chroma
+chromadb

+ 103 - 0
sherry/semantic_search.py

@@ -0,0 +1,103 @@
+### Python = 3.9
+import os
+from dotenv import load_dotenv
+load_dotenv()
+
+import openai 
+openai_api_key = os.getenv("OPENAI_API_KEY")
+openai.api_key = openai_api_key
+
+from langchain_openai import OpenAIEmbeddings
+embeddings_model = OpenAIEmbeddings()
+
+from langchain_community.document_loaders.csv_loader import CSVLoader
+from langchain_chroma import Chroma
+
+from supabase import create_client, Client 
+supabase_url = os.getenv("SUPABASE_URL")
+supabase_key = os.getenv("SUPABASE_KEY")
+supabase: Client = create_client(supabase_url, supabase_key)
+
+############# Load data #############
+# def extract_field(doc, field_name):
+#     for line in doc.page_content.split('\n'):
+#         if line.startswith(f"{field_name}:"):
+#             return line.split(':', 1)[1].strip()
+#     return None
+
+# loader = CSVLoader(file_path="video_cache_rows.csv")
+# data = loader.load()
+# field_name = "question"
+# question = [extract_field(doc, field_name) for doc in data]
+
+# ####### load data from supabase #######
+# embeddings_model = OpenAIEmbeddings()
+response,count = supabase.table("video_cache").select("question","id").order("id").execute()
+data = response[1]
+question = [item['question'] for item in data if 'question' in item]
+ids = [item['id'] for item in data if 'id' in item]
+question_id_map = {item['question']: item['id'] for item in data if 'id' in item and 'question' in item}
+
+def get_id_by_question(question):
+    return question_id_map.get(question)
+
+# print(question)
+# created_at = []
+# question = []
+# ids = []
+# answer = []
+# video_url = []
+
+# for item in data:
+#     ids.append(item['id'])
+#     created_at.append(item['created_at'])
+#     question.append(item['question'])
+#     answer.append(item['answer'])
+#     video_url.append(item['video_url'])
+
+
+########## generate embedding ###########
+embedding = embeddings_model.embed_documents(question)
+
+########## Write embedding to the supabase table  #######
+# for id, new_embedding in zip(ids, embedding):
+#     supabase.table("video_cache_rows_duplicate").insert({"embedding": embedding.tolist()}).eq("id", id).execute()
+
+######### Vector Store ##########
+# Put pre-compute embeddings to vector store. ## save to disk
+vectorstore = Chroma.from_texts(
+    texts=question,
+    embedding=embeddings_model,
+    persist_directory="./chroma_db"
+    )
+
+vectorstore = Chroma(persist_directory="./chroma_db", embedding_function=embeddings_model)
+
+
+def ask_question(question:str, SIMILARITY_THRESHOLD:int = 0.83):
+    docs_and_scores = vectorstore.similarity_search_with_relevance_scores(question, k=1)
+    doc, score = docs_and_scores[0]
+    print(doc,score)
+    if score >= SIMILARITY_THRESHOLD:
+        id = get_id_by_question(doc.page_content)
+        data,count = supabase.table("video_cache").select("*").eq("id",id).execute()
+
+        if data[1][0]["answer"] == None :
+            return None
+
+        return data[1]
+    else:
+        return None
+
+
+if __name__ == "__main__" :
+####### load from disk  #######
+    query = "美食街在哪裡"
+    docs = vectorstore.similarity_search(query)
+    print(f"Query: {query}  | 最接近文檔:{docs[0].page_content}")
+
+    ####### Query it #########
+    query = "101可以帶狗嗎"
+    docs = vectorstore.similarity_search(query)
+    print(f"Query: {query}  | 最接近文檔:{docs[0].page_content}")
+