Browse Source

search data、cache 增加日韓文,天燈加名字

Mia 4 months ago
parent
commit
b2c16b20ed

+ 1 - 0
.gitignore

@@ -8,4 +8,5 @@ log/
 chroma_db_en/
 chroma_db_ch/
 chroma_db_jp/
+chroma_db_ko/
 .env

BIN
api/__pycache__/db_router.cpython-312.pyc


BIN
api/__pycache__/skylight.cpython-312.pyc


BIN
api/__pycache__/tendent_router.cpython-312.pyc


+ 69 - 12
api/db_router.py

@@ -380,7 +380,7 @@ def insert_table(data: dataform):
 from sherry.semantic_search import ask_question,ask_question_find_brand
     
 @dbRouter.post("/video_cache")
-def video_cache(client_message :str,language:str ="ch"):
+async def video_cache(client_message :str,language:str ="ch"):
 
     try:
 
@@ -393,7 +393,7 @@ def video_cache(client_message :str,language:str ="ch"):
 
         result = ask_question(client_message,language=language)
 
-        data = search_date(client_message,language=language)
+        data = await search_date(client_message,language=language)
 
         # result[0]["answer"]
 
@@ -459,34 +459,63 @@ import spacy
 import jieba
 
 @dbRouter.post("/search_date")
-def search_date(question:str,language:str="ch"):
+async def search_date(question:str,language:str="ch"):
     try:
-        global nlp,exclude_conditions
+        global nlp,exclude_conditions,keywords
         if language == "ch":
             
             nlp = spacy.load("zh_core_web_sm")
             exclude_languages = ["韓文", "日文", "英文"]
+            stop_words = ["請問","停車","收費","方式"]
+            # 處理輸入
+            doc = jieba.lcut(question)
+
+            # 提取關鍵字
+            keywords =  [word for word in doc if len(word) > 1 and word != '101' and word not in stop_words] 
+
             
         elif language == "en":
             nlp = spacy.load("en_core_web_sm")
             exclude_languages = ["韓文", "日文", "中文"]
-            
 
-        # 處理輸入
-        doc = jieba.lcut(question)
+            stop_words = nlp.Defaults.stop_words
+
+            doc = nlp(question)
+            keywords = [token.text for token in doc if token.text != '101' and not token.is_stop]
+
+        elif language == "jp":
+            nlp = spacy.load("ja_core_news_sm")
+            exclude_languages = ["韓文", "英文", "中文"]
+            doc = nlp(question)
+            keywords = [token.text for token in doc if token.text != '101']
 
-        # 提取關鍵字
-        keywords =  [word for word in doc if len(word) > 1] 
+        elif language == "ko":
+            nlp = spacy.load("ko_core_news_sm")
+            exclude_languages = ["日文", "英文", "中文"]
 
+            doc = nlp(question)
+            keywords = [token.text for token in doc if token.text != '101']
+            
+        if len(keywords) == 0 :
+            return None
+        
         print(keywords)
 
         # 構築條件
         brand_query = supabase.from_("101_brand").select("*").eq("language",language)
     
-        keywords_condition = ",".join([f"tags.ilike.%{keyword}%" for keyword in keywords])
+        # 生成查询条件,分别针对 tags 和 content 字段
+        keywords_condition = []
+        for keyword in keywords:
+            keywords_condition.append(f"tags.ilike.%{keyword}%")
+            keywords_condition.append(f"content.ilike.%{keyword}%")
+
+        # 使用 'or' 运算符连接条件
+        conditions_str = ",".join(keywords_condition)
+
 
         # 查询 101_brand 表
-        brand_query = brand_query.or_(keywords_condition)
+        brand_query = brand_query.or_(conditions_str)
 
         # 排除其他國家語言標籤
         # for lang in exclude_languages:
@@ -494,8 +523,16 @@ def search_date(question:str,language:str="ch"):
 
         brand_results = brand_query.execute()
 
+        keywords_condition = []
+        for keyword in keywords:
+            keywords_condition.append(f"tags.ilike.%{keyword}%")
+            keywords_condition.append(f"description.ilike.%{keyword}%")
+
+        # 使用 'or' 运算符连接条件
+        conditions_str = ",".join(keywords_condition)
+
         # 查詢 101_ticket 表
-        ticket_query = supabase.from_("101_ticket").select("*").or_(keywords_condition)
+        ticket_query = supabase.from_("101_ticket").select("*").eq("is_avilible",True).or_(conditions_str)
 
         # 排除其他國家語言標籤
         for lang in exclude_languages:
@@ -523,3 +560,23 @@ def search_date(question:str,language:str="ch"):
     except Exception as e:
 
         return {"state": 500 , "message" : str(e)}
+
+@dbRouter.post("/close_not_stage")
+async def close_not_stage():
+    try :
+        request = supabase.table("101_ticket").select("*").execute()
+
+        
+
+        for data in request.data :
+            # print(data["title"])
+            if "stage101" not in data["website_url"] :
+                _ = supabase.table('101_ticket') \
+                    .update({'is_avilible':False})\
+                    .eq('id', data["id"])\
+                    .execute()
+                print(data["title"]," close")
+
+    except Exception as e:
+
+        return {"state": 500 , "message" : str(e)}

+ 84 - 5
api/skylight.py

@@ -1,6 +1,67 @@
 from PIL import Image, ImageDraw, ImageFont
 import os
 
+def create_name(name,img_url,output_path):
+    
+    font : ImageFont
+
+    image1 = Image.open(img_url).convert('RGBA')
+
+    if detect_language(name) == "English":
+        font = ImageFont.truetype(f"{os.path.split(os.path.abspath('main.py'))[0]}/static/MasaFont-Regular.ttf", 120, encoding="utf-8")
+        if len(name) > 50:
+            return "超過字數限制"
+        
+        # 設定初始位置
+        x_position = 0
+        y_position = 0
+
+        max_width = int(font.getlength(name[0])*len(name))
+        max_height = int(font.getlength(name[0])*len(name))
+
+        image = Image.new('RGBA', (max_width,max_height), (255, 255, 255, 0))
+        draw = ImageDraw.Draw(image)
+
+        text_width = draw.textlength(name[0], font=font)
+
+        for char in name:
+            draw.text((x_position, y_position), char, font=font, fill=(0, 0, 0))
+            x_position += text_width 
+
+        image1.paste(image, (500, 1500),image)
+
+        # 保存疊加後的圖片
+        image1.save(output_path)
+
+    else:
+        # 選擇中文字型和大小
+        font = ImageFont.truetype(f"{os.path.split(os.path.abspath('main.py'))[0]}/static/MasaFont-Regular.ttf", 150, encoding="utf-8")
+        if len(name) > 5 :
+            return "超過字數限制"
+
+        # 設定初始位置
+        x_position = 0
+        y_position = 0
+
+        max_width = int(font.getlength(name[0]))
+        max_height = int(font.getlength(name[0])*len(name))
+
+        image = Image.new('RGBA', (max_width,max_height), (255, 255, 255, 0))
+        draw = ImageDraw.Draw(image)
+        text_width = draw.textlength(name[0], font=font)
+
+        for char in name:
+            draw.text((x_position, y_position), char, font=font, fill=(0, 0, 0))
+            y_position += text_width 
+
+        image1.paste(image, (150, 900),image)
+
+        # 保存疊加後的圖片
+        image1.save(output_path)
+
+    
+
+
 def create_image(text, output_path, font_size=300, bg_color=(255, 255, 255), text_color=(0, 0, 0), max_width=200):
     
 
@@ -26,6 +87,10 @@ def create_image(text, output_path, font_size=300, bg_color=(255, 255, 255), tex
     # 設定初始位置
     x_position = 0
     y_position = 0
+
+    lines_tmp = [x.strip() for x in lines if x.strip()!='']
+    lines = lines_tmp
+    print(lines)
     
     line_height = font.getlength(text[0])
     print(line_height)
@@ -34,7 +99,7 @@ def create_image(text, output_path, font_size=300, bg_color=(255, 255, 255), tex
     max_height = int(font.getlength(text[0])*find_longest_segment(lines))
 
     print(max_width,max_height)
-    print(lines)
+    
 
     if check_tag == "en" :
         tmp = max_width
@@ -88,7 +153,7 @@ def create_image(text, output_path, font_size=300, bg_color=(255, 255, 255), tex
 
     return "成功製作文字"
 
-def overlay_images(background_path, overlay_path, output_path):
+def overlay_images(background_path, overlay_path, output_path,name):
     image1 = Image.open(background_path).convert('RGBA')
     image2 = Image.open(overlay_path).convert('RGBA')
 
@@ -99,12 +164,19 @@ def overlay_images(background_path, overlay_path, output_path):
     image1.paste(image2, (x, y),image2)
 
     # 保存疊加後的圖片
-    image1.save(output_path)
+    image1.save(f"{output_path}_tmp.png")
+
+    
 
     # 顯示疊加後的圖片
     # image1.show()
 
     print(f"finished, saving image at {output_path}")
+    try:
+        create_name(name,f"{output_path}_tmp.png",output_path)
+    except Exception as e:
+
+        print( str(e)) 
 
     im = Image.open(output_path)
     name =output_path.lower().split('/')[::-1][0]
@@ -112,6 +184,7 @@ def overlay_images(background_path, overlay_path, output_path):
     im.save(f"{os.path.split(os.path.abspath('main.py'))[0]}/static/tendents/{webp}", 'WebP', quality=40, )
 
     os.remove(output_path)
+    os.remove(f"{output_path}_tmp.png")
 
 def detect_language(text):
     for char in text:
@@ -137,8 +210,10 @@ def split_text_by_length(text, length):
 
     # Add the remaining part as the last paragraph
     if current_paragraph:
-        paragraphs.append(current_paragraph.strip())
+        if current_paragraph.strip():
+            paragraphs.append(current_paragraph.strip())
 
+    #print(paragraphs)
     return paragraphs
 
 def split_chinese_text(text, max_length=5):
@@ -170,7 +245,11 @@ def split_chinese_text(text, max_length=5):
 
     # 添加最后一个片段到segments列表中
     if current_segment:
-        segments.append(current_segment)
+        
+        if current_segment.strip() and current_segment != '':
+            segments.append(current_segment)
+
+
 
     return segments
 

+ 2 - 2
api/tendent_router.py

@@ -6,7 +6,7 @@ import os
 tendentRouter = APIRouter()
 
 @tendentRouter.post("/tendentest")
-def read_root(client_message :str = "心想事成"):
+def read_root(client_message :str = "心想事成",name="匿名"):
 
     pic_num = random.randint(0,19)
 
@@ -25,7 +25,7 @@ def read_root(client_message :str = "心想事成"):
         print('Delete Problem: ', e)
 
     # 執行疊加
-    overlay_images(f"{os.path.split(os.path.abspath('main.py'))[0]}/static/tendentest.png", text_img_output_path, output_path)
+    overlay_images(f"{os.path.split(os.path.abspath('main.py'))[0]}/static/tendentest.png", text_img_output_path, output_path,name)
     
     url = f"static/tendents/combined_image_{pic_num}.webp"
 

+ 2 - 0
sherry/semantic_search.py

@@ -76,10 +76,12 @@ def generated(language:str ="ch"):
 generated("ch")
 generated("en")
 generated("jp")
+generated("ko")
 
 scheduler.add_job(generated, 'cron' ,hour='*/2',kwargs={"language" : "ch"})
 scheduler.add_job(generated, 'cron' ,hour='*/2',kwargs={"language" : "en"})
 scheduler.add_job(generated, 'cron' ,hour='*/2',kwargs={"language" : "jp"})
+scheduler.add_job(generated, 'cron' ,hour='*/2',kwargs={"language" : "ko"})
 
 scheduler.start()