ming před 3 roky
rodič
revize
12c05b3845
1 změnil soubory, kde provedl 45 přidání a 2 odebrání
  1. 45 2
      OpenshotService/openshot_video_generator.py

+ 45 - 2
OpenshotService/openshot_video_generator.py

@@ -238,12 +238,13 @@ def call_anchor(fileName,avatar):
 def parse_script(file_path):
     with open(file_path, 'r') as f:
         lines = [line.strip() for line in f]
+    lines = adjustSub_by_text_similarity(gt_list,lines)
     dict_list = []
     
     for idx in range(int((len(lines)+1)/4)):
         script={}
         script['index'] = idx
-        time_raw = lines[idx * 4 + 1]
+        time_raw = lines[idx * 4]
         script['content'] = lines[idx * 4 + 2]
         start = time_raw.split(' --> ')[0].split(':')
         stop = time_raw.split(' --> ')[1].split(':')
@@ -256,7 +257,49 @@ def parse_script(file_path):
         script['stop'] = stop_sec
         script['duration'] = abs(duration)
         dict_list.append(script)
-    return dict_list
+    new_idx = 0
+    splitted_dict = []
+    for dic in dict_list:
+        #螢幕寬度只能容納13個字
+        if dic['content']>13:
+            times = math.ceil(len(dic['content'])/13)
+            time_ratio = []
+            for t in range(times):
+                time_ratio[t] = len(dic['content'][t*13:t*13+13])/13 * dic['duration']
+            for t in range(times):
+                if time_ratio[t]>0.22:#about less than 3 words
+                    new_dic = {}
+                    new_dic['index'] = new_idx
+                    new_dic['content'] = dic['content'][t*13:t*13+13]
+                    start_plus = 0
+                    for t2 in range(t):
+                        start_plus += time_ratio[t2]
+                    new_dic['start'] =  dic['start'] + start_plus
+                    new_dic['stop'] = new_dic['start']+time_ratio[t]
+                    new_dic['duration'] = time_ratio[t]
+                    splitted_dict.append(new_dic)
+                    new_idx+=1
+                else:
+                    splitted_dict[new_idx-1]['content']+=dic['content']
+        else:
+            dic['index'] = new_idx
+            new_idx+=1
+            splitted_dict.append(dic)
+    return splitted_dict
+
+def adjustSub_by_text_similarity(gts,gens):
+    combine2 = [''.join([i,j]) for i,j in zip(gts, gts[1:])]
+    combine3 = [''.join([i,j,k]) for i,j,k in zip(gts, gts[1:], gts[2:])]
+    alls = gts+combine2+combine3
+ 
+    for idx in range(len(gens)):
+        match_text = difflib.get_close_matches(gens[idx], alls, cutoff=0.1)
+        if len(match_text) != 0:
+            print('{ '+gens[idx]+' }校正後: '+match_text[0])
+            gens[idx] = match_text[0]
+        else:
+           print('無校正:'+gens[idx])
+    return gens
 
 def trim_punctuation(s):
     pat_block = u'[^\u4e00-\u9fff0-9a-zA-Z]+';