|
@@ -528,7 +528,36 @@ def generate_subtitle_image(name_hash,text_content):
|
|
|
sv_path = dir_subtitle + name_hash +'/'+str(idx)+ str(inner_idx) +'.png'
|
|
|
sub = senList[inner_idx]
|
|
|
txt2image(sub,sv_path)
|
|
|
- img_list[idx]+=[{"count":len(sub),"path":sv_path}]
|
|
|
+ clean_content = trim_punctuation(sub)
|
|
|
+
|
|
|
+
|
|
|
+ re.findall(r'[\u4e00-\u9fff]+', clean_content)
|
|
|
+
|
|
|
+ zh_idx = []
|
|
|
+ eng_idx= []
|
|
|
+ for i in range(len(clean_content)):
|
|
|
+ if clean_content[i] > u'\u4e00' and in_str[i] < u'\u9fff':
|
|
|
+ zh_idx.append(i)
|
|
|
+ else:
|
|
|
+ eng_idx.append(i)
|
|
|
+
|
|
|
+ space_index = [m.start() for m in re.finditer(' ', clean_content)]
|
|
|
+ for idx in space_index:
|
|
|
+ eng_idx.remove(idx)
|
|
|
+
|
|
|
+ eng_range_list = []
|
|
|
+ for k, g in groupby(enumerate(eng_idx), lambda ix : ix[0] - ix[1]):
|
|
|
+ eng_range = list(map(itemgetter(1), g))
|
|
|
+ eng_range_list.append(eng_range)
|
|
|
+
|
|
|
+ total_syllable = 0
|
|
|
+ for i in range(len(eng_range_list)):
|
|
|
+ total_syllable += (syllable_count(clean_content[eng_range_list[i][0]:eng_range_list[i][-1]+1])+0.5)
|
|
|
+ for i in range(len(zh_idx)):
|
|
|
+ total_syllable+=1
|
|
|
+
|
|
|
+
|
|
|
+ img_list[idx]+=[{"count":total_syllable,"path":sv_path}]
|
|
|
return img_list
|
|
|
|
|
|
def generate_subtitle_image_ENG(name_hash,text_content):
|