Forráskód Böngészése

add image clip parser

ming 3 éve
szülő
commit
388cd8af6d

+ 31 - 4
OpenshotService/openshot_video_generator.py

@@ -29,6 +29,8 @@ from pytranscriber.control.ctr_autosub import Ctr_Autosub
 import multiprocessing
 import multiprocessing
 from itertools import groupby
 from itertools import groupby
 from operator import itemgetter
 from operator import itemgetter
+from util.parser import parser
+
 dir_sound = 'mp3_track/'
 dir_sound = 'mp3_track/'
 dir_photo = 'photo/'
 dir_photo = 'photo/'
 dir_text = 'text_file/'
 dir_text = 'text_file/'
@@ -186,9 +188,12 @@ def file_prepare_long(name, name_hash,text_content,image_urls,multiLang,lang='zh
         img_num+=1
         img_num+=1
 
 
     #make mp3
     #make mp3
+    text_parser = parser()
     txt_idx = 0
     txt_idx = 0
     for txt in text_content:
     for txt in text_content:
-        txt = txt.replace
+        rep_list = text_parser.replace_list(k)
+        for reptxt in rep_list:
+            txt = txt.replace(reptxt,'')
         if lang!='zh' or multiLang==1:
         if lang!='zh' or multiLang==1:
             if lang!='zh':
             if lang!='zh':
                 tts = gTTS(txt)
                 tts = gTTS(txt)
@@ -373,11 +378,18 @@ def parse_script(file_path,gt_list):
     with open(file_path, 'r',encoding="utf-8") as f:
     with open(file_path, 'r',encoding="utf-8") as f:
         raw_lines = [line.strip() for line in f]
         raw_lines = [line.strip() for line in f]
     lines = adjustSub_by_text_similarity(gt_list,raw_lines)
     lines = adjustSub_by_text_similarity(gt_list,raw_lines)
+    text_parser = parser()
     #make dict
     #make dict
     dict_list = []
     dict_list = []
     for idx in range(len(lines)):
     for idx in range(len(lines)):
         script={}
         script={}
-        script['content'] = lines[idx]
+        rep_ls = text_parser.replace_list(lines[idx])
+        line_content = lines[idx]
+        for reptxt in rep_ls:
+            line_content = line_content.replace(reptxt,'')
+        if len(rep_ls)!=0:
+            script['image_idx'] = int(rep_ls[0].replace('{','').replace('}',''))
+        script['content'] = line_content
         time_raw = raw_lines[idx * 4 +1 ].split(' --> ')
         time_raw = raw_lines[idx * 4 +1 ].split(' --> ')
         start = time_raw[0].split(':')
         start = time_raw[0].split(':')
         stop = time_raw[1].split(':')
         stop = time_raw[1].split(':')
@@ -410,10 +422,11 @@ def parse_script(file_path,gt_list):
         dic_idx = 0
         dic_idx = 0
         accumulated_duration = 0
         accumulated_duration = 0
         duration = dic['stop']-dic['start']
         duration = dic['stop']-dic['start']
-        print(duration)
         for sub_dic in split_sentence(dic['content'],13):
         for sub_dic in split_sentence(dic['content'],13):
             new_dic = {}
             new_dic = {}
             new_dic['index'] = new_idx
             new_dic['index'] = new_idx
+            if 'image_idx' in dic:
+                new_dic['image_obj'] = {'start':dic['start'],'idx':dic['image_idx']}
             new_idx+=1
             new_idx+=1
             ind_duration = duration * sub_dic['time_ratio']
             ind_duration = duration * sub_dic['time_ratio']
             new_dic['start'] = dic['start'] + accumulated_duration
             new_dic['start'] = dic['start'] + accumulated_duration
@@ -427,7 +440,15 @@ def parse_script(file_path,gt_list):
 
 
 
 
 
 
-def adjustSub_by_text_similarity(gts,gens_raw):
+def adjustSub_by_text_similarity(gts_in,gens_raw):
+    #call by value only
+    gts = gts_in[:]
+    text_parser = parser()
+    for i in range(len(gts)):
+        rep_ls = text_parser.replace_list(gts[i])
+        for reptxt in rep_ls:
+            gts[i] = gts[i].replace(reptxt)
+    print(gts)
     gens = []
     gens = []
     for idx in range(int((len(gens_raw)+1)/4)):
     for idx in range(int((len(gens_raw)+1)/4)):
         gens.append(gens_raw[idx*4+2])
         gens.append(gens_raw[idx*4+2])
@@ -448,6 +469,12 @@ def adjustSub_by_text_similarity(gts,gens_raw):
         else:
         else:
             adjusted[idx] = match_text[0]
             adjusted[idx] = match_text[0]
             duplicated_list.append(match_text[0])
             duplicated_list.append(match_text[0])
+    combine2_tag = [''.join([i,j]) for i,j in zip(gts_in, gts_in[1:])]
+    combine3_tag = [''.join([i,j,k]) for i,j,k in zip(gts_in, gts_in[1:], gts_in[2:])]
+    alls_tag = gts_in + combine2_tag + combine3_tag
+    for idx in range(len(adjusted)):
+        match_text = difflib.get_close_matches(adjusted[idx], alls_tag, cutoff=0.1)
+        adjusted[idx] = match_text
     return adjusted
     return adjusted
 
 
 def trim_punctuation(s):
 def trim_punctuation(s):

+ 7 - 0
OpenshotService/test.py

@@ -0,0 +1,7 @@
+from util.parser import parser
+
+k = '我在這{1}我在這{2}'
+
+parser1 = parser()
+ls = parser1.replace_list(k)
+print(ls)

BIN
OpenshotService/util/__pycache__/parser.cpython-39.pyc


+ 16 - 6
OpenshotService/util/parser.py

@@ -1,8 +1,11 @@
+import re
 
 
+class parser:
 
 
-class parser():
+    def __init__(self):
+        print("Address of self = ",id(self))
 
 
-    def check_image_count(self, image_list, text):
+    def check_image_count(self,image_list, text):
         pair_obj = findTag(text)
         pair_obj = findTag(text)
         if pair_obj['code'] == 1:
         if pair_obj['code'] == 1:
             if len(image_list)!=len(pair_obj['pair']):
             if len(image_list)!=len(pair_obj['pair']):
@@ -13,15 +16,22 @@ class parser():
             return pair_obj
             return pair_obj
             
             
 
 
-    def findTag(self, text):
-        left_tag = [m.start() for m in re.finditer('{(', in_str)]
-        right_tag = [m.start() for m in re.finditer(')}', in_str)]
+    def findTag(self,text):
+        left_tag = [m.start() for m in re.finditer('{', text)]
+        right_tag = [m.start() for m in re.finditer('}', text)]
         if len(left_tag)!=len(right_tag):
         if len(left_tag)!=len(right_tag):
             return {'msg':'圖片標籤錯誤,左右數量不符','code':-1}
             return {'msg':'圖片標籤錯誤,左右數量不符','code':-1}
         pair = []
         pair = []
         for idx in range(len(left_tag)):
         for idx in range(len(left_tag)):
-            pair.append({'left':left_tag[idx],'right':right_tag[idx]])
+            pair.append({'left':left_tag[idx],'right':right_tag[idx]})
         obj = {'code':1,'pair':pair}
         obj = {'code':1,'pair':pair}
         return obj
         return obj
 
 
+    def replace_list(self,text):
+        rep_ls = []
+        pair_obj = self.findTag(text)
+        for p in pair_obj['pair']:
+            rep_ls.append(text[p['left']:p['right']+1])
+        return rep_ls
+        
     
     

+ 2 - 1
api/main.py

@@ -282,7 +282,8 @@ async def make_anchor_video_gSlide(req:models.gSlide_req,token: str = Depends(oa
 
 
 @app.post("/make_anchor_video_long")
 @app.post("/make_anchor_video_long")
 async def make_anchor_video_long(req:models.request,token: str = Depends(oauth2_scheme)):
 async def make_anchor_video_long(req:models.request,token: str = Depends(oauth2_scheme)):
-    if len(req.image_urls) != len(req.text_content):
+    left_tag = [m.start() for m in re.finditer('{', req.text_content[0])]
+    if len(req.image_urls) != len(left_tag):
         return {'msg':'副標題數量、圖片(影片)數量以及台詞數量必須一致'}
         return {'msg':'副標題數量、圖片(影片)數量以及台詞數量必須一致'}
     for idx in range(len(req.image_urls)):
     for idx in range(len(req.image_urls)):
         if 'http' not in req.image_urls[idx]:
         if 'http' not in req.image_urls[idx]: