4 years ago · d1a9796ac4
--- a/OpenshotService/openshot_video_generator.py
+++ b/OpenshotService/openshot_video_generator.py
@@ -383,9 +383,7 @@ def parse_script(file_path,gt_list):
 
				     dict_list = []
			
 
				     for idx in range(len(lines)):
			
 
				         script={}
			
 
				-        print(lines[idx])
			
 
				         rep_ls = text_parser.replace_list(lines[idx])
			
 
				-        print(rep_ls)
			
 
				         line_content = lines[idx]
			
 
				         for reptxt in rep_ls:
			
 
				             line_content = line_content.replace(reptxt,'')
			
@@ -398,32 +396,41 @@ def parse_script(file_path,gt_list):
 
				         script['start'] = float(start[0])*3600 + float(start[1])*60 + float(start[2].replace(',','.'))
			
 
				         script['stop'] = float(stop[0])*3600 + float(stop[1])*60 + float(stop[2].replace(',','.'))
			
 
				         dict_list.append(script)
			
 
				+ 
			
 
				     #merge duplicated sentences
			
 
				+    skip_list = []
			
 
				     script_not_dup_list = []
			
 
				     for idx in range(len(dict_list)):
			
 
				-        dup_list = []
			
 
				-        for idx_inner in range(len(dict_list)):
			
 
				-            if dict_list[idx_inner]['content']==dict_list[idx]['content']:
			
 
				-                dup_list.append(idx_inner)
			
 
				-        for dup_idx in dup_list:
			
 
				-            if dup_idx == min(dup_list):
			
 
				-                dict_list[dup_idx]['type'] = 'lead_sentence'
			
 
				-            else:
			
 
				-                dict_list[dup_idx]['type'] = 'duplicated'
			
 
				-        dict_list[dup_list[0]]['stop'] = dict_list[dup_list[-1]]['stop']
			
 
				-        if dict_list[idx]['type'] == 'lead_sentence':
			
 
				-            script_not_dup_list.append(dict_list[idx])
			
 
				-                
			
 
				-    #avoid subtitle overlapping ?   Timeline overlapping not found currently
			
 
				-    #cut by max length---->  eng seperated problem   {eng_idx}
			
 
				-    #ENG counts, zh counts, space counts
			
 
				-
			
 
				+        if idx not in skip_list:
			
 
				+            dup_list = []
			
 
				+            found = 0
			
 
				+            for idx_inner in range(len(dict_list)):
			
 
				+                if dict_list[idx_inner]['content'] == dict_list[idx]['content'] and idx <= idx_inner:
			
 
				+                    dup_list.append(idx_inner)
			
 
				+                    skip_list.append(idx_inner)
			
 
				+                    found += 1
			
 
				+                if found != 0 and dict_list[idx_inner]['content']!=dict_list[idx]['content'] and idx <= idx_inner:
			
 
				+                    found = 0
			
 
				+                    break
			
 
				+        
			
 
				+            for dup_idx in dup_list:
			
 
				+                if dup_idx == min(dup_list):
			
 
				+                    dict_list[dup_idx]['type'] = 'lead_sentence'
			
 
				+                else:
			
 
				+                    dict_list[dup_idx]['type'] = 'duplicated'
			
 
				+            dict_list[dup_list[0]]['stop'] = dict_list[dup_list[-1]]['stop']
			
 
				+          
			
 
				+            if dict_list[idx]['type'] == 'lead_sentence':
			
 
				+                script_not_dup_list.append(dict_list[idx])
			
 
				+            
			
 
				+    
			
 
				     new_idx = 0
			
 
				     splitted_dict = []
			
 
				     for dic in script_not_dup_list:
			
 
				         dic_idx = 0
			
 
				         accumulated_duration = 0
			
 
				         duration = dic['stop']-dic['start']
			
 
				+
			
 
				         for sub_dic in split_sentence(dic['content'],13):
			
 
				             new_dic = {}
			
 
				             new_dic['index'] = new_idx
			
@@ -434,14 +441,8 @@ def parse_script(file_path,gt_list):
 
				             new_dic['start'] = dic['start'] + accumulated_duration
			
 
				             accumulated_duration += ind_duration
			
 
				             new_dic['content'] = sub_dic['content']
			
 
				-            new_dic['duration'] = ind_duration*0.9
			
 
				-            if new_dic['duration'] > 3:
			
 
				-                print('-----------------------------')
			
 
				-                print('origin duration : ', duration)
			
 
				-                print(dic)
			
 
				-                print('-----------------------------')
			
 
				+            new_dic['duration'] = ind_duration*0.7
			
 
				             splitted_dict.append(new_dic)
			
 
				-    
			
 
				     return splitted_dict
			
 
				 
			
 
				 
			
@@ -462,21 +463,28 @@ def adjustSub_by_text_similarity(gts_in,gens_raw):
 
				     combine2 = [''.join([i,j]) for i,j in zip(gts, gts[1:])]
			
 
				     combine3 = [''.join([i,j,k]) for i,j,k in zip(gts, gts[1:], gts[2:])]
			
 
				     alls = gts + combine2 + combine3
			
 
				-
			
 
				     adjusted = [None]*len(gens)
			
 
				     duplicated_list = []
			
 
				     for idx in range(len(gens)):
			
 
				         match_text = difflib.get_close_matches(gens[idx], alls, cutoff=0.1)
			
 
				-        if match_text[0] in duplicated_list:
			
 
				-            for mt in match_text:
			
 
				-                if mt == adjusted[idx-1] or mt not in duplicated_list:
			
 
				-                    adjusted[idx] = mt
			
 
				-                    break
			
 
				-        else:
			
 
				-            adjusted[idx] = match_text[0]
			
 
				-            duplicated_list.append(match_text[0])
			
 
				-        if None == adjusted[idx]:
			
 
				-            adjusted[idx] = gens[idx]
			
 
				+        if len(match_text) != 0:
			
 
				+            if match_text[0] not in duplicated_list:
			
 
				+                adjusted[idx] = match_text[0]
			
 
				+                duplicated_list.append(match_text[0])
			
 
				+            else:
			
 
				+                if match_text[0] == adjusted[idx-1]:
			
 
				+                    adjusted[idx] = match_text[0]
			
 
				+                else:
			
 
				+                    found = 0
			
 
				+                    for mt in match_text:
			
 
				+                        if mt not in duplicated_list:
			
 
				+                            adjusted[idx] = mt
			
 
				+                            found += 1
			
 
				+                            break
			
 
				+                    if found ==0:
			
 
				+                        adjusted[idx] = ' '
			
 
				+        else :
			
 
				+            adjusted[idx] = ' '
			
 
				 
			
 
				     combine2_tag = [''.join([i,j]) for i,j in zip(gts_in, gts_in[1:])]
			
 
				     combine3_tag = [''.join([i,j,k]) for i,j,k in zip(gts_in, gts_in[1:], gts_in[2:])]
			
@@ -485,7 +493,6 @@ def adjustSub_by_text_similarity(gts_in,gens_raw):
 
				     for idx in range(len(adjusted)):
			
 
				         match_text = difflib.get_close_matches(adjusted[idx], alls_tag, cutoff=0.1)
			
 
				         adjusted[idx] = match_text[0]
			
 
				-
			
 
				     return adjusted
			
 
				 
			
 
				 def trim_punctuation(s):
			
@@ -666,8 +673,7 @@ def video_gen(name_hash,name,text_content, image_urls,multiLang,avatar):
 
				     img_list = [None]*len(img_clip_list)
			
 
				 
			
 
				     img_file_ls = listdir(dir_photo+name_hash)
			
 
				-    print(img_file_ls)
			
 
				-    print(img_dict_ls)
			
 
				+
			
 
				     for img_idx in range(len(img_file_ls)):
			
 
				         img_list[img_idx] = openshot.FFmpegReader(dir_photo+name_hash+'/'+img_file_ls[img_idx])
			
 
				         img_list[img_idx].Open()
			
@@ -692,20 +698,10 @@ def video_gen(name_hash,name,text_content, image_urls,multiLang,avatar):
 
				     t.Close()
			
 
				     w.Close()
			
 
				 
			
 
				-    path = tmp_video_dir+name_hash+"script.txt"
			
 
				-    f = open(path, 'r')
			
 
				-    print(f.read())
			
 
				-    f.close()
			
 
				-    #os.remove(tmp_video_dir+name_hash+"raw.mp4")
			
 
				-    #os.remove(tmp_video_dir+name_hash+"script.txt")
			
 
				-    print(name+"ALL DONE : www.choozmo.com:8168/"+video_sub_folder+name_hash+"raw.mp4")
			
 
				 
			
 
				-    Ctr_Autosub.init()
			
 
				-    Ctr_Autosub.generate_subtitles(tmp_video_dir+name_hash+".mp4",'zh',listener_progress,output=tmp_video_dir+name_hash+"script.txt",concurrency=DEFAULT_CONCURRENCY,subtitle_file_format=DEFAULT_SUBTITLE_FORMAT)
			
 
				-    path = tmp_video_dir+name_hash+"script.txt"
			
 
				-    f = open(path, 'r')
			
 
				-    print(f.read())
			
 
				-    f.close()
			
 
				+    os.remove(tmp_video_dir+name_hash+"raw.mp4")
			
 
				+    os.remove(tmp_video_dir+name_hash+"script.txt")
			
 
				+    print(name+"ALL DONE : www.choozmo.com:8168/"+video_sub_folder+name_hash+"raw.mp4")
			
 
				 
			
 
				 
			
 
				 def anchor_video_v2(name_hash,name,text_content, image_urls,multiLang,avatar):