há 3 anos atrás · 388cd8af6d
--- a/OpenshotService/openshot_video_generator.py
+++ b/OpenshotService/openshot_video_generator.py
@@ -29,6 +29,8 @@ from pytranscriber.control.ctr_autosub import Ctr_Autosub
 
															 import multiprocessing
														
 
															 from itertools import groupby
														
 
															 from operator import itemgetter
														
 
															+from util.parser import parser
														
 
															+
														
 
															 dir_sound = 'mp3_track/'
														
 
															 dir_photo = 'photo/'
														
 
															 dir_text = 'text_file/'
														
@@ -186,9 +188,12 @@ def file_prepare_long(name, name_hash,text_content,image_urls,multiLang,lang='zh
 
															         img_num+=1
														
 
															     #make mp3
														
 
															+    text_parser = parser()
														
 
															     txt_idx = 0
														
 
															     for txt in text_content:
														
 
															-        txt = txt.replace
														
 
															+        rep_list = text_parser.replace_list(k)
														
 
															+        for reptxt in rep_list:
														
 
															+            txt = txt.replace(reptxt,'')
														
 
															         if lang!='zh' or multiLang==1:
														
 
															             if lang!='zh':
														
 
															                 tts = gTTS(txt)
														
@@ -373,11 +378,18 @@ def parse_script(file_path,gt_list):
 
															     with open(file_path, 'r',encoding="utf-8") as f:
														
 
															         raw_lines = [line.strip() for line in f]
														
 
															     lines = adjustSub_by_text_similarity(gt_list,raw_lines)
														
 
															+    text_parser = parser()
														
 
															     #make dict
														
 
															     dict_list = []
														
 
															     for idx in range(len(lines)):
														
 
															         script={}
														
 
															-        script['content'] = lines[idx]
														
 
															+        rep_ls = text_parser.replace_list(lines[idx])
														
 
															+        line_content = lines[idx]
														
 
															+        for reptxt in rep_ls:
														
 
															+            line_content = line_content.replace(reptxt,'')
														
 
															+        if len(rep_ls)!=0:
														
 
															+            script['image_idx'] = int(rep_ls[0].replace('{','').replace('}',''))
														
 
															+        script['content'] = line_content
														
 
															         time_raw = raw_lines[idx * 4 +1 ].split(' --> ')
														
 
															         start = time_raw[0].split(':')
														
 
															         stop = time_raw[1].split(':')
														
@@ -410,10 +422,11 @@ def parse_script(file_path,gt_list):
 
															         dic_idx = 0
														
 
															         accumulated_duration = 0
														
 
															         duration = dic['stop']-dic['start']
														
 
															-        print(duration)
														
 
															         for sub_dic in split_sentence(dic['content'],13):
														
 
															             new_dic = {}
														
 
															             new_dic['index'] = new_idx
														
 
															+            if 'image_idx' in dic:
														
 
															+                new_dic['image_obj'] = {'start':dic['start'],'idx':dic['image_idx']}
														
 
															             new_idx+=1
														
 
															             ind_duration = duration * sub_dic['time_ratio']
														
 
															             new_dic['start'] = dic['start'] + accumulated_duration
														
@@ -427,7 +440,15 @@ def parse_script(file_path,gt_list):
 
															-def adjustSub_by_text_similarity(gts,gens_raw):
														
 
															+def adjustSub_by_text_similarity(gts_in,gens_raw):
														
 
															+    #call by value only
														
 
															+    gts = gts_in[:]
														
 
															+    text_parser = parser()
														
 
															+    for i in range(len(gts)):
														
 
															+        rep_ls = text_parser.replace_list(gts[i])
														
 
															+        for reptxt in rep_ls:
														
 
															+            gts[i] = gts[i].replace(reptxt)
														
 
															+    print(gts)
														
 
															     gens = []
														
 
															     for idx in range(int((len(gens_raw)+1)/4)):
														
 
															         gens.append(gens_raw[idx*4+2])
														
@@ -448,6 +469,12 @@ def adjustSub_by_text_similarity(gts,gens_raw):
 
															         else:
														
 
															             adjusted[idx] = match_text[0]
														
 
															             duplicated_list.append(match_text[0])
														
 
															+    combine2_tag = [''.join([i,j]) for i,j in zip(gts_in, gts_in[1:])]
														
 
															+    combine3_tag = [''.join([i,j,k]) for i,j,k in zip(gts_in, gts_in[1:], gts_in[2:])]
														
 
															+    alls_tag = gts_in + combine2_tag + combine3_tag
														
 
															+    for idx in range(len(adjusted)):
														
 
															+        match_text = difflib.get_close_matches(adjusted[idx], alls_tag, cutoff=0.1)
														
 
															+        adjusted[idx] = match_text
														
 
															     return adjusted
														
 
															 def trim_punctuation(s):
														
--- a/OpenshotService/test.py
+++ b/OpenshotService/test.py
@@ -0,0 +1,7 @@
 
															+from util.parser import parser
														
 
															+
														
 
															+k = '我在這{1}我在這{2}'
														
 
															+
														
 
															+parser1 = parser()
														
 
															+ls = parser1.replace_list(k)
														
 
															+print(ls)
														
--- a/OpenshotService/util/__pycache__/parser.cpython-39.pyc
+++ b/OpenshotService/util/__pycache__/parser.cpython-39.pyc
--- a/OpenshotService/util/parser.py
+++ b/OpenshotService/util/parser.py
@@ -1,8 +1,11 @@
 
															+import re
														
 
															+class parser:
														
 
															-class parser():
														
 
															+    def __init__(self):
														
 
															+        print("Address of self = ",id(self))
														
 
															-    def check_image_count(self, image_list, text):
														
 
															+    def check_image_count(self,image_list, text):
														
 
															         pair_obj = findTag(text)
														
 
															         if pair_obj['code'] == 1:
														
 
															             if len(image_list)!=len(pair_obj['pair']):
														
@@ -13,15 +16,22 @@ class parser():
 
															             return pair_obj
														
 
															-    def findTag(self, text):
														
 
															-        left_tag = [m.start() for m in re.finditer('{(', in_str)]
														
 
															-        right_tag = [m.start() for m in re.finditer(')}', in_str)]
														
 
															+    def findTag(self,text):
														
 
															+        left_tag = [m.start() for m in re.finditer('{', text)]
														
 
															+        right_tag = [m.start() for m in re.finditer('}', text)]
														
 
															         if len(left_tag)!=len(right_tag):
														
 
															             return {'msg':'圖片標籤錯誤，左右數量不符','code':-1}
														
 
															         pair = []
														
 
															         for idx in range(len(left_tag)):
														
 
															-            pair.append({'left':left_tag[idx],'right':right_tag[idx]])
														
 
															+            pair.append({'left':left_tag[idx],'right':right_tag[idx]})
														
 
															         obj = {'code':1,'pair':pair}
														
 
															         return obj
														
 
															+    def replace_list(self,text):
														
 
															+        rep_ls = []
														
 
															+        pair_obj = self.findTag(text)
														
 
															+        for p in pair_obj['pair']:
														
 
															+            rep_ls.append(text[p['left']:p['right']+1])
														
 
															+        return rep_ls
														
 
															+        
														
--- a/api/main.py
+++ b/api/main.py
@@ -282,7 +282,8 @@ async def make_anchor_video_gSlide(req:models.gSlide_req,token: str = Depends(oa
 
															 @app.post("/make_anchor_video_long")
														
 
															 async def make_anchor_video_long(req:models.request,token: str = Depends(oauth2_scheme)):
														
 
															-    if len(req.image_urls) != len(req.text_content):
														
 
															+    left_tag = [m.start() for m in re.finditer('{', req.text_content[0])]
														
 
															+    if len(req.image_urls) != len(left_tag):
														
 
															         return {'msg':'副標題數量、圖片(影片)數量以及台詞數量必須一致'}
														
 
															     for idx in range(len(req.image_urls)):
														
 
															         if 'http' not in req.image_urls[idx]: