Browse Source

Merge branch 'master' of http://git.choozmo.com:3000/choozmo/AI_Anchor_2 into master

jeter20131220 3 years ago
parent
commit
d1a9796ac4
1 changed files with 49 additions and 53 deletions
  1. 49 53
      OpenshotService/openshot_video_generator.py

+ 49 - 53
OpenshotService/openshot_video_generator.py

@@ -383,9 +383,7 @@ def parse_script(file_path,gt_list):
     dict_list = []
     for idx in range(len(lines)):
         script={}
-        print(lines[idx])
         rep_ls = text_parser.replace_list(lines[idx])
-        print(rep_ls)
         line_content = lines[idx]
         for reptxt in rep_ls:
             line_content = line_content.replace(reptxt,'')
@@ -398,32 +396,41 @@ def parse_script(file_path,gt_list):
         script['start'] = float(start[0])*3600 + float(start[1])*60 + float(start[2].replace(',','.'))
         script['stop'] = float(stop[0])*3600 + float(stop[1])*60 + float(stop[2].replace(',','.'))
         dict_list.append(script)
+ 
     #merge duplicated sentences
+    skip_list = []
     script_not_dup_list = []
     for idx in range(len(dict_list)):
-        dup_list = []
-        for idx_inner in range(len(dict_list)):
-            if dict_list[idx_inner]['content']==dict_list[idx]['content']:
-                dup_list.append(idx_inner)
-        for dup_idx in dup_list:
-            if dup_idx == min(dup_list):
-                dict_list[dup_idx]['type'] = 'lead_sentence'
-            else:
-                dict_list[dup_idx]['type'] = 'duplicated'
-        dict_list[dup_list[0]]['stop'] = dict_list[dup_list[-1]]['stop']
-        if dict_list[idx]['type'] == 'lead_sentence':
-            script_not_dup_list.append(dict_list[idx])
-                
-    #avoid subtitle overlapping ?   Timeline overlapping not found currently
-    #cut by max length---->  eng seperated problem   {eng_idx}
-    #ENG counts, zh counts, space counts
-
+        if idx not in skip_list:
+            dup_list = []
+            found = 0
+            for idx_inner in range(len(dict_list)):
+                if dict_list[idx_inner]['content'] == dict_list[idx]['content'] and idx <= idx_inner:
+                    dup_list.append(idx_inner)
+                    skip_list.append(idx_inner)
+                    found += 1
+                if found != 0 and dict_list[idx_inner]['content']!=dict_list[idx]['content'] and idx <= idx_inner:
+                    found = 0
+                    break
+        
+            for dup_idx in dup_list:
+                if dup_idx == min(dup_list):
+                    dict_list[dup_idx]['type'] = 'lead_sentence'
+                else:
+                    dict_list[dup_idx]['type'] = 'duplicated'
+            dict_list[dup_list[0]]['stop'] = dict_list[dup_list[-1]]['stop']
+          
+            if dict_list[idx]['type'] == 'lead_sentence':
+                script_not_dup_list.append(dict_list[idx])
+            
+    
     new_idx = 0
     splitted_dict = []
     for dic in script_not_dup_list:
         dic_idx = 0
         accumulated_duration = 0
         duration = dic['stop']-dic['start']
+
         for sub_dic in split_sentence(dic['content'],13):
             new_dic = {}
             new_dic['index'] = new_idx
@@ -434,14 +441,8 @@ def parse_script(file_path,gt_list):
             new_dic['start'] = dic['start'] + accumulated_duration
             accumulated_duration += ind_duration
             new_dic['content'] = sub_dic['content']
-            new_dic['duration'] = ind_duration*0.9
-            if new_dic['duration'] > 3:
-                print('-----------------------------')
-                print('origin duration : ', duration)
-                print(dic)
-                print('-----------------------------')
+            new_dic['duration'] = ind_duration*0.7
             splitted_dict.append(new_dic)
-    
     return splitted_dict
 
 
@@ -462,21 +463,28 @@ def adjustSub_by_text_similarity(gts_in,gens_raw):
     combine2 = [''.join([i,j]) for i,j in zip(gts, gts[1:])]
     combine3 = [''.join([i,j,k]) for i,j,k in zip(gts, gts[1:], gts[2:])]
     alls = gts + combine2 + combine3
-
     adjusted = [None]*len(gens)
     duplicated_list = []
     for idx in range(len(gens)):
         match_text = difflib.get_close_matches(gens[idx], alls, cutoff=0.1)
-        if match_text[0] in duplicated_list:
-            for mt in match_text:
-                if mt == adjusted[idx-1] or mt not in duplicated_list:
-                    adjusted[idx] = mt
-                    break
-        else:
-            adjusted[idx] = match_text[0]
-            duplicated_list.append(match_text[0])
-        if None == adjusted[idx]:
-            adjusted[idx] = gens[idx]
+        if len(match_text) != 0:
+            if match_text[0] not in duplicated_list:
+                adjusted[idx] = match_text[0]
+                duplicated_list.append(match_text[0])
+            else:
+                if match_text[0] == adjusted[idx-1]:
+                    adjusted[idx] = match_text[0]
+                else:
+                    found = 0
+                    for mt in match_text:
+                        if mt not in duplicated_list:
+                            adjusted[idx] = mt
+                            found += 1
+                            break
+                    if found ==0:
+                        adjusted[idx] = ' '
+        else :
+            adjusted[idx] = ' '
 
     combine2_tag = [''.join([i,j]) for i,j in zip(gts_in, gts_in[1:])]
     combine3_tag = [''.join([i,j,k]) for i,j,k in zip(gts_in, gts_in[1:], gts_in[2:])]
@@ -485,7 +493,6 @@ def adjustSub_by_text_similarity(gts_in,gens_raw):
     for idx in range(len(adjusted)):
         match_text = difflib.get_close_matches(adjusted[idx], alls_tag, cutoff=0.1)
         adjusted[idx] = match_text[0]
-
     return adjusted
 
 def trim_punctuation(s):
@@ -666,8 +673,7 @@ def video_gen(name_hash,name,text_content, image_urls,multiLang,avatar):
     img_list = [None]*len(img_clip_list)
 
     img_file_ls = listdir(dir_photo+name_hash)
-    print(img_file_ls)
-    print(img_dict_ls)
+
     for img_idx in range(len(img_file_ls)):
         img_list[img_idx] = openshot.FFmpegReader(dir_photo+name_hash+'/'+img_file_ls[img_idx])
         img_list[img_idx].Open()
@@ -692,20 +698,10 @@ def video_gen(name_hash,name,text_content, image_urls,multiLang,avatar):
     t.Close()
     w.Close()
 
-    path = tmp_video_dir+name_hash+"script.txt"
-    f = open(path, 'r')
-    print(f.read())
-    f.close()
-    #os.remove(tmp_video_dir+name_hash+"raw.mp4")
-    #os.remove(tmp_video_dir+name_hash+"script.txt")
-    print(name+"ALL DONE : www.choozmo.com:8168/"+video_sub_folder+name_hash+"raw.mp4")
 
-    Ctr_Autosub.init()
-    Ctr_Autosub.generate_subtitles(tmp_video_dir+name_hash+".mp4",'zh',listener_progress,output=tmp_video_dir+name_hash+"script.txt",concurrency=DEFAULT_CONCURRENCY,subtitle_file_format=DEFAULT_SUBTITLE_FORMAT)
-    path = tmp_video_dir+name_hash+"script.txt"
-    f = open(path, 'r')
-    print(f.read())
-    f.close()
+    os.remove(tmp_video_dir+name_hash+"raw.mp4")
+    os.remove(tmp_video_dir+name_hash+"script.txt")
+    print(name+"ALL DONE : www.choozmo.com:8168/"+video_sub_folder+name_hash+"raw.mp4")
 
 
 def anchor_video_v2(name_hash,name,text_content, image_urls,multiLang,avatar):