ming 3 anos atrás
pai
commit
d047be1803
3 arquivos alterados com 18 adições e 56 exclusões
  1. BIN
      .DS_Store
  2. 18 17
      OpenshotService/openshot_video_generator.py
  3. 0 39
      subGenerator/out.txt

BIN
.DS_Store


+ 18 - 17
OpenshotService/openshot_video_generator.py

@@ -234,18 +234,17 @@ def call_anchor(fileName,avatar):
 
     fr.close()
     fw.close()
-
 def parse_script(file_path,gt_list):
     with open(file_path, 'r') as f:
-        lines = [line.strip() for line in f]
-    lines = adjustSub_by_text_similarity(gt_list,lines)
+        raw_lines = [line.strip() for line in f]
+    lines = adjustSub_by_text_similarity(gt_list,raw_lines)
     dict_list = []
-    
+
     for idx in range(int((len(lines)+1)/4)):
         script={}
         script['index'] = idx
-        time_raw = lines[idx * 4 +1 ]
-        script['content'] = lines[idx * 4 + 2]
+        time_raw = raw_lines[idx * 4 +1 ]
+        script['content'] = lines[idx*4+2]
         start = time_raw.split(' --> ')[0].split(':')
         stop = time_raw.split(' --> ')[1].split(':')
         start[2] = start[2].replace(',','.')
@@ -259,11 +258,12 @@ def parse_script(file_path,gt_list):
         dict_list.append(script)
     new_idx = 0
     splitted_dict = []
+ 
     for dic in dict_list:
         #螢幕寬度只能容納13個字
         if len(dic['content'])>13:
             times = math.ceil(len(dic['content'])/13)
-            time_ratio = []
+            time_ratio = [None] *times
             left_words = len(dic['content'])%13
             for t in range(times):
                 if t != (times-1):
@@ -271,7 +271,8 @@ def parse_script(file_path,gt_list):
                 else:
                     time_ratio[t] = left_words/13 * dic['duration'] / times
             for t in range(times):
-                if time_ratio[t]>0.22:#about less than 3 words
+                if True:
+                #if time_ratio[t]>0.22:#about less than 3 words
                     new_dic = {}
                     new_dic['index'] = new_idx
                     new_dic['content'] = dic['content'][t*13:t*13+13]
@@ -283,27 +284,27 @@ def parse_script(file_path,gt_list):
                     new_dic['duration'] = time_ratio[t]
                     splitted_dict.append(new_dic)
                     new_idx+=1
-                else:
-                    splitted_dict[new_idx-1]['content']+=dic['content']
+                #else:
+                 #   splitted_dict[new_idx-1]['content']+=dic['content']
         else:
             dic['index'] = new_idx
             new_idx+=1
             splitted_dict.append(dic)
     return splitted_dict
-
 def adjustSub_by_text_similarity(gts,gens):
+    adjusted = [None]*len(gens)
     combine2 = [''.join([i,j]) for i,j in zip(gts, gts[1:])]
     combine3 = [''.join([i,j,k]) for i,j,k in zip(gts, gts[1:], gts[2:])]
     alls = gts+combine2+combine3
  
     for idx in range(len(gens)):
         match_text = difflib.get_close_matches(gens[idx], alls, cutoff=0.1)
-        if len(match_text) != 0:
-            print('{ '+gens[idx]+' }校正後: '+match_text[0])
-            gens[idx] = match_text[0]
-        else:
-           print('無校正:'+gens[idx])
-    return gens
+        if len(match_text) != 0 and idx:
+            #print(gens[idx]+'校正後: '+match_text[0])
+            adjusted[idx] = match_text[0]
+  
+    
+    return adjusted
 
 def trim_punctuation(s):
     pat_block = u'[^\u4e00-\u9fff0-9a-zA-Z]+';

+ 0 - 39
subGenerator/out.txt

@@ -1,39 +0,0 @@
-1
-00:00:04,608 --> 00:00:10,240
-Hello Kitty瑜2018年加入YouTube開始活動
-
-2
-00:00:11,008 --> 00:00:13,568
-再次我介紹的影片裡
-
-3
-00:00:14,080 --> 00:00:19,712
-Kitty表示一直憧憬著長在YouTube跟大家見面
-
-4
-00:00:20,480 --> 00:00:26,624
-一開頻道就吸引許多粉絲訂閱付錢有28萬訂閱者
-
-5
-00:00:27,904 --> 00:00:34,048
-接下來這位花生君瑜2017年加入YouTube開始湖
-
-6
-00:00:34,304 --> 00:00:34,816
-活動
-
-7
-00:00:35,584 --> 00:00:41,472
-他的外型太過特別花生桃紅色圍巾魚紙尿布
-
-8
-00:00:42,240 --> 00:00:48,384
-被觀眾評價為第一眼看上很噁心但看著看著還挺
-
-9
-00:00:48,640 --> 00:00:49,408
-秦可愛
-
-10
-00:00:49,920 --> 00:00:52,736
-目前有12萬訂閱者