| 
					
				 | 
			
			
				@@ -234,18 +234,17 @@ def call_anchor(fileName,avatar): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     fr.close() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     fw.close() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 def parse_script(file_path,gt_list): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     with open(file_path, 'r') as f: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        lines = [line.strip() for line in f] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    lines = adjustSub_by_text_similarity(gt_list,lines) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        raw_lines = [line.strip() for line in f] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    lines = adjustSub_by_text_similarity(gt_list,raw_lines) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     dict_list = [] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-     
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     for idx in range(int((len(lines)+1)/4)): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         script={} 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         script['index'] = idx 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        time_raw = lines[idx * 4 +1 ] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        script['content'] = lines[idx * 4 + 2] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        time_raw = raw_lines[idx * 4 +1 ] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        script['content'] = lines[idx*4+2] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         start = time_raw.split(' --> ')[0].split(':') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         stop = time_raw.split(' --> ')[1].split(':') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         start[2] = start[2].replace(',','.') 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -259,11 +258,12 @@ def parse_script(file_path,gt_list): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         dict_list.append(script) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     new_idx = 0 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     splitted_dict = [] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     for dic in dict_list: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         #螢幕寬度只能容納13個字 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         if len(dic['content'])>13: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             times = math.ceil(len(dic['content'])/13) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            time_ratio = [] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            time_ratio = [None] *times 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             left_words = len(dic['content'])%13 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             for t in range(times): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 if t != (times-1): 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -271,7 +271,8 @@ def parse_script(file_path,gt_list): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 else: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                     time_ratio[t] = left_words/13 * dic['duration'] / times 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             for t in range(times): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                if time_ratio[t]>0.22:#about less than 3 words 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                if True: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                #if time_ratio[t]>0.22:#about less than 3 words 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                     new_dic = {} 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                     new_dic['index'] = new_idx 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                     new_dic['content'] = dic['content'][t*13:t*13+13] 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -283,27 +284,27 @@ def parse_script(file_path,gt_list): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                     new_dic['duration'] = time_ratio[t] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                     splitted_dict.append(new_dic) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                     new_idx+=1 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                else: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                    splitted_dict[new_idx-1]['content']+=dic['content'] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                #else: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                 #   splitted_dict[new_idx-1]['content']+=dic['content'] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         else: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             dic['index'] = new_idx 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             new_idx+=1 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             splitted_dict.append(dic) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     return splitted_dict 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 def adjustSub_by_text_similarity(gts,gens): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    adjusted = [None]*len(gens) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     combine2 = [''.join([i,j]) for i,j in zip(gts, gts[1:])] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     combine3 = [''.join([i,j,k]) for i,j,k in zip(gts, gts[1:], gts[2:])] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     alls = gts+combine2+combine3 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				   
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     for idx in range(len(gens)): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         match_text = difflib.get_close_matches(gens[idx], alls, cutoff=0.1) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        if len(match_text) != 0: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            print('{ '+gens[idx]+' }校正後: '+match_text[0]) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            gens[idx] = match_text[0] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        else: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-           print('無校正:'+gens[idx]) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    return gens 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        if len(match_text) != 0 and idx: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            #print(gens[idx]+'校正後: '+match_text[0]) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            adjusted[idx] = match_text[0] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+   
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+     
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    return adjusted 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 def trim_punctuation(s): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     pat_block = u'[^\u4e00-\u9fff0-9a-zA-Z]+'; 
			 |