|
@@ -383,9 +383,7 @@ def parse_script(file_path,gt_list):
|
|
|
dict_list = []
|
|
|
for idx in range(len(lines)):
|
|
|
script={}
|
|
|
- print(lines[idx])
|
|
|
rep_ls = text_parser.replace_list(lines[idx])
|
|
|
- print(rep_ls)
|
|
|
line_content = lines[idx]
|
|
|
for reptxt in rep_ls:
|
|
|
line_content = line_content.replace(reptxt,'')
|
|
@@ -398,32 +396,41 @@ def parse_script(file_path,gt_list):
|
|
|
script['start'] = float(start[0])*3600 + float(start[1])*60 + float(start[2].replace(',','.'))
|
|
|
script['stop'] = float(stop[0])*3600 + float(stop[1])*60 + float(stop[2].replace(',','.'))
|
|
|
dict_list.append(script)
|
|
|
+
|
|
|
#merge duplicated sentences
|
|
|
+ skip_list = []
|
|
|
script_not_dup_list = []
|
|
|
for idx in range(len(dict_list)):
|
|
|
- dup_list = []
|
|
|
- for idx_inner in range(len(dict_list)):
|
|
|
- if dict_list[idx_inner]['content']==dict_list[idx]['content']:
|
|
|
- dup_list.append(idx_inner)
|
|
|
- for dup_idx in dup_list:
|
|
|
- if dup_idx == min(dup_list):
|
|
|
- dict_list[dup_idx]['type'] = 'lead_sentence'
|
|
|
- else:
|
|
|
- dict_list[dup_idx]['type'] = 'duplicated'
|
|
|
- dict_list[dup_list[0]]['stop'] = dict_list[dup_list[-1]]['stop']
|
|
|
- if dict_list[idx]['type'] == 'lead_sentence':
|
|
|
- script_not_dup_list.append(dict_list[idx])
|
|
|
-
|
|
|
- #avoid subtitle overlapping ? Timeline overlapping not found currently
|
|
|
- #cut by max length----> eng seperated problem {eng_idx}
|
|
|
- #ENG counts, zh counts, space counts
|
|
|
-
|
|
|
+ if idx not in skip_list:
|
|
|
+ dup_list = []
|
|
|
+ found = 0
|
|
|
+ for idx_inner in range(len(dict_list)):
|
|
|
+ if dict_list[idx_inner]['content'] == dict_list[idx]['content'] and idx <= idx_inner:
|
|
|
+ dup_list.append(idx_inner)
|
|
|
+ skip_list.append(idx_inner)
|
|
|
+ found += 1
|
|
|
+ if found != 0 and dict_list[idx_inner]['content']!=dict_list[idx]['content'] and idx <= idx_inner:
|
|
|
+ found = 0
|
|
|
+ break
|
|
|
+
|
|
|
+ for dup_idx in dup_list:
|
|
|
+ if dup_idx == min(dup_list):
|
|
|
+ dict_list[dup_idx]['type'] = 'lead_sentence'
|
|
|
+ else:
|
|
|
+ dict_list[dup_idx]['type'] = 'duplicated'
|
|
|
+ dict_list[dup_list[0]]['stop'] = dict_list[dup_list[-1]]['stop']
|
|
|
+
|
|
|
+ if dict_list[idx]['type'] == 'lead_sentence':
|
|
|
+ script_not_dup_list.append(dict_list[idx])
|
|
|
+
|
|
|
+
|
|
|
new_idx = 0
|
|
|
splitted_dict = []
|
|
|
for dic in script_not_dup_list:
|
|
|
dic_idx = 0
|
|
|
accumulated_duration = 0
|
|
|
duration = dic['stop']-dic['start']
|
|
|
+
|
|
|
for sub_dic in split_sentence(dic['content'],13):
|
|
|
new_dic = {}
|
|
|
new_dic['index'] = new_idx
|
|
@@ -434,14 +441,8 @@ def parse_script(file_path,gt_list):
|
|
|
new_dic['start'] = dic['start'] + accumulated_duration
|
|
|
accumulated_duration += ind_duration
|
|
|
new_dic['content'] = sub_dic['content']
|
|
|
- new_dic['duration'] = ind_duration*0.9
|
|
|
- if new_dic['duration'] > 3:
|
|
|
- print('-----------------------------')
|
|
|
- print('origin duration : ', duration)
|
|
|
- print(dic)
|
|
|
- print('-----------------------------')
|
|
|
+ new_dic['duration'] = ind_duration*0.7
|
|
|
splitted_dict.append(new_dic)
|
|
|
-
|
|
|
return splitted_dict
|
|
|
|
|
|
|
|
@@ -462,21 +463,28 @@ def adjustSub_by_text_similarity(gts_in,gens_raw):
|
|
|
combine2 = [''.join([i,j]) for i,j in zip(gts, gts[1:])]
|
|
|
combine3 = [''.join([i,j,k]) for i,j,k in zip(gts, gts[1:], gts[2:])]
|
|
|
alls = gts + combine2 + combine3
|
|
|
-
|
|
|
adjusted = [None]*len(gens)
|
|
|
duplicated_list = []
|
|
|
for idx in range(len(gens)):
|
|
|
match_text = difflib.get_close_matches(gens[idx], alls, cutoff=0.1)
|
|
|
- if match_text[0] in duplicated_list:
|
|
|
- for mt in match_text:
|
|
|
- if mt == adjusted[idx-1] or mt not in duplicated_list:
|
|
|
- adjusted[idx] = mt
|
|
|
- break
|
|
|
- else:
|
|
|
- adjusted[idx] = match_text[0]
|
|
|
- duplicated_list.append(match_text[0])
|
|
|
- if None == adjusted[idx]:
|
|
|
- adjusted[idx] = gens[idx]
|
|
|
+ if len(match_text) != 0:
|
|
|
+ if match_text[0] not in duplicated_list:
|
|
|
+ adjusted[idx] = match_text[0]
|
|
|
+ duplicated_list.append(match_text[0])
|
|
|
+ else:
|
|
|
+ if match_text[0] == adjusted[idx-1]:
|
|
|
+ adjusted[idx] = match_text[0]
|
|
|
+ else:
|
|
|
+ found = 0
|
|
|
+ for mt in match_text:
|
|
|
+ if mt not in duplicated_list:
|
|
|
+ adjusted[idx] = mt
|
|
|
+ found += 1
|
|
|
+ break
|
|
|
+ if found ==0:
|
|
|
+ adjusted[idx] = ' '
|
|
|
+ else :
|
|
|
+ adjusted[idx] = ' '
|
|
|
|
|
|
combine2_tag = [''.join([i,j]) for i,j in zip(gts_in, gts_in[1:])]
|
|
|
combine3_tag = [''.join([i,j,k]) for i,j,k in zip(gts_in, gts_in[1:], gts_in[2:])]
|
|
@@ -485,7 +493,6 @@ def adjustSub_by_text_similarity(gts_in,gens_raw):
|
|
|
for idx in range(len(adjusted)):
|
|
|
match_text = difflib.get_close_matches(adjusted[idx], alls_tag, cutoff=0.1)
|
|
|
adjusted[idx] = match_text[0]
|
|
|
-
|
|
|
return adjusted
|
|
|
|
|
|
def trim_punctuation(s):
|
|
@@ -666,8 +673,7 @@ def video_gen(name_hash,name,text_content, image_urls,multiLang,avatar):
|
|
|
img_list = [None]*len(img_clip_list)
|
|
|
|
|
|
img_file_ls = listdir(dir_photo+name_hash)
|
|
|
- print(img_file_ls)
|
|
|
- print(img_dict_ls)
|
|
|
+
|
|
|
for img_idx in range(len(img_file_ls)):
|
|
|
img_list[img_idx] = openshot.FFmpegReader(dir_photo+name_hash+'/'+img_file_ls[img_idx])
|
|
|
img_list[img_idx].Open()
|
|
@@ -692,20 +698,10 @@ def video_gen(name_hash,name,text_content, image_urls,multiLang,avatar):
|
|
|
t.Close()
|
|
|
w.Close()
|
|
|
|
|
|
- path = tmp_video_dir+name_hash+"script.txt"
|
|
|
- f = open(path, 'r')
|
|
|
- print(f.read())
|
|
|
- f.close()
|
|
|
- #os.remove(tmp_video_dir+name_hash+"raw.mp4")
|
|
|
- #os.remove(tmp_video_dir+name_hash+"script.txt")
|
|
|
- print(name+"ALL DONE : www.choozmo.com:8168/"+video_sub_folder+name_hash+"raw.mp4")
|
|
|
|
|
|
- Ctr_Autosub.init()
|
|
|
- Ctr_Autosub.generate_subtitles(tmp_video_dir+name_hash+".mp4",'zh',listener_progress,output=tmp_video_dir+name_hash+"script.txt",concurrency=DEFAULT_CONCURRENCY,subtitle_file_format=DEFAULT_SUBTITLE_FORMAT)
|
|
|
- path = tmp_video_dir+name_hash+"script.txt"
|
|
|
- f = open(path, 'r')
|
|
|
- print(f.read())
|
|
|
- f.close()
|
|
|
+ os.remove(tmp_video_dir+name_hash+"raw.mp4")
|
|
|
+ os.remove(tmp_video_dir+name_hash+"script.txt")
|
|
|
+ print(name+"ALL DONE : www.choozmo.com:8168/"+video_sub_folder+name_hash+"raw.mp4")
|
|
|
|
|
|
|
|
|
def anchor_video_v2(name_hash,name,text_content, image_urls,multiLang,avatar):
|