util.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404
  1. from autosub import DEFAULT_CONCURRENCY
  2. from autosub import DEFAULT_SUBTITLE_FORMAT
  3. from pytranscriber.control.ctr_main import Ctr_Main
  4. from pytranscriber.control.ctr_autosub import Ctr_Autosub
  5. import re,random, time
  6. from itertools import groupby
  7. from operator import itemgetter
  8. import requests,rpyc
  9. from difflib import SequenceMatcher
  10. from PIL import Image,ImageDraw,ImageFont
  11. def trim_punctuation(s):
  12. pat_block = u'[^\u4e00-\u9fff0-9a-zA-Z]+';
  13. pattern = u'([0-9]+{0}[0-9]+)|{0}'.format(pat_block)
  14. res = re.sub(pattern, lambda x: x.group(1) if x.group(1) else u" " ,s)
  15. return res
  16. def txt2image(content, save_target,lang='zh'):
  17. unicode_text = trim_punctuation(content)
  18. content = content.replace(' ','')
  19. font = ''
  20. if lang=='zh':
  21. font = ImageFont.truetype(font="font/DFT_B7.ttc", size=38)
  22. else :
  23. font = ImageFont.truetype(font="font/arial.ttf", size=38)
  24. text_width, text_height = font.getsize(unicode_text)
  25. canvas = Image.new('RGBA', (700, 500), (255, 0, 0, 0) )
  26. draw = ImageDraw.Draw(canvas)
  27. text= unicode_text
  28. draw.text((5,5), text, (255, 255, 0), font)
  29. canvas.save(save_target, "PNG")
  30. def generate_subtitle_image_from_dict(cpath, sub_dict):
  31. for script in sub_dict:
  32. sv_path = cpath + '/' + str(script['index'])+'.png'
  33. sub = script['content']
  34. txt2image(sub,sv_path)
  35. def listener_progress(string, percent):
  36. True
  37. def transScript(cpath):
  38. Ctr_Autosub.init()
  39. Ctr_Autosub.generate_subtitles(cpath+"speech.mp3",'zh'
  40. ,listener_progress
  41. ,output=cpath+"script.txt"
  42. ,concurrency=DEFAULT_CONCURRENCY,subtitle_file_format=DEFAULT_SUBTITLE_FORMAT)
  43. def syllable_count(word):
  44. word = word.lower()
  45. count = 0
  46. vowels = "aeiouy"
  47. if word[0] in vowels:
  48. count += 1
  49. for index in range(1, len(word)):
  50. if word[index] in vowels and word[index - 1] not in vowels:
  51. count += 1
  52. if word.endswith("e"):
  53. count -= 1
  54. if count == 0:
  55. count += 1
  56. return count
  57. def getKeyword(inStr):
  58. re.findall(r'[\u4e00-\u9fff]+', inStr)
  59. zh_idx = []
  60. eng_idx= []
  61. for i in range(len(inStr)):
  62. if inStr[i] > u'\u4e00' and inStr[i] < u'\u9fff':
  63. zh_idx.append(i)
  64. else:
  65. eng_idx.append(i)
  66. kws = obj.extractKeywordFromString(inStr)
  67. engStr =''
  68. for idx in range(len(eng_idx)):
  69. if idx != len(eng_idx)-1:
  70. if eng_idx[idx]+1 == eng_idx[idx+1]:
  71. engStr+=inStr[eng_idx[idx]]
  72. else:
  73. engStr+=inStr[eng_idx[idx]]
  74. if len(engStr)>2:
  75. kws.append((engStr,10))
  76. engStr=''
  77. else:
  78. engStr+=inStr[eng_idx[idx]]
  79. if len(engStr)>2:
  80. kws.append((engStr,10))
  81. engStr=''
  82. return kws
  83. def getImgDict():
  84. return [{'kw':'podcast','id':'17j41rqsoWVzc-HD8jvdxb651pbHJhvH8'}
  85. ,{'kw':'podcast','id':'1wT3uIaoe3xD-wrAo-J9eZweHEuEfI22_'}
  86. ,{'kw':'podcast','id':'1uaP8_xtqMn_Zbx3DX78ALdCtFjUPYgKQ'}
  87. ,{'kw':'podcast','id':'1L1NMByTorcDBN8EpbwwUakTexdRAiF4w'}
  88. ,{'kw':'youtuber','id':'17vUM8xrMgI9y1aEbAoprOxKiK9OOkipE'}
  89. ,{'kw':'支持者','id':'1sb-DmU5X9YE7HZLva_UueEvzcXqrKoGk'}
  90. ,{'kw':'app','id':'1jxoZuFlUHyl1L7-WB2ejPKBEW38bsbR6'}]
  91. def savekeywordImage(kw,imgDict):
  92. highest_val = 0
  93. highest_d = None
  94. for d in imgDict:
  95. sim = SequenceMatcher(None, d['kw'], kw).ratio()
  96. #print(sim,d['kw'],kw)
  97. if sim>highest_val :
  98. highest_val = sim
  99. highest_d = d
  100. return highest_d
  101. def get_script(cPath):
  102. fpath = cPath+'script.txt'
  103. with open(fpath, 'r',encoding="utf-8") as f:
  104. raw_lines = [line.strip() for line in f]
  105. lines =[]
  106. for idx in range(int(len(raw_lines)/4+1)):
  107. line_content = raw_lines[idx*4+2]
  108. lines.append(line_content)
  109. return lines
  110. def rewriteScript(cPath,newlines):
  111. fpath = cPath+'script.txt'
  112. with open(fpath, 'r',encoding="utf-8") as f:
  113. raw_lines = [line.strip() for line in f]
  114. for idx in range(int(len(raw_lines)/4+1)):
  115. raw_lines[idx*4+2] = newlines[idx]
  116. f = open(fpath, 'w',encoding="utf-8")
  117. for l in raw_lines:
  118. f.write(l+'\n')
  119. f.close()
  120. def parse_script(file_path):
  121. imgDict = getImgDict()
  122. with open(file_path, 'r',encoding="utf-8") as f:
  123. raw_lines = [line.strip() for line in f]
  124. dict_list = []
  125. sScript_dicts = []
  126. scriptIdx = 0
  127. for idx in range(int(len(raw_lines)/4+1)):
  128. script={}
  129. line_content = raw_lines[idx*4+2]
  130. script['content'] = line_content
  131. time_raw = raw_lines[idx * 4 +1 ].split(' --> ')
  132. start = time_raw[0].split(':')
  133. stop = time_raw[1].split(':')
  134. script['start'] = float(start[0])*3600 + float(start[1])*60 + float(start[2].replace(',','.'))
  135. script['stop'] = float(stop[0])*3600 + float(stop[1])*60 + float(stop[2].replace(',','.'))
  136. script['duration'] = script['stop']-script['start']
  137. duration = script['duration']
  138. start = script['start']
  139. try:
  140. kw= getKeyword(script['content'])
  141. kw.sort(key=lambda tup: tup[1], reverse=True)
  142. if kw[0][1]>2:
  143. script['kw'] = kw[0][0]
  144. else:
  145. script['kw'] = ''
  146. except Exception as e:
  147. script['kw']=''
  148. kwd = savekeywordImage(script['kw'],imgDict)
  149. if kwd is not None:
  150. script['imgid'] = kwd['id']
  151. imgDict.remove(kwd)
  152. else:
  153. script['imgid'] = None
  154. dict_list.append(script)
  155. accumulated_duration = 0
  156. for sen in shorten(script['content'],15):
  157. #print(sen)
  158. sScript = {}
  159. sScript['content'] = sen['content']
  160. sScript['index'] = scriptIdx
  161. scriptIdx+=1
  162. sScript['start'] = accumulated_duration+script['start']
  163. sScript['duration'] = duration*sen['time_ratio']
  164. accumulated_duration+=duration*sen['time_ratio']
  165. sScript_dicts.append(sScript)
  166. img_dicts=[]
  167. found = False
  168. start = 0
  169. stop = 0
  170. imgid=''
  171. imgidx = 0
  172. for d in dict_list:
  173. if d['imgid'] is not None:
  174. if found :
  175. img_dicts.append({'index':imgidx,'imgid':imgid,'start':start,'duration':d['start']-start})
  176. imgidx+=1
  177. start = d['start']
  178. imgid = d['imgid']
  179. else:
  180. found=True
  181. start = d['start']
  182. imgid = d['imgid']
  183. if d['start']==dict_list[-1]['start']:
  184. if d['imgid'] is not None:
  185. img_dicts.append({'index':imgidx,'imgid':d['imgid'],'start':d['start'],'duration':d['duration']})
  186. else:
  187. img_dicts.append({'index':imgidx,'imgid':imgid,'start':start,'duration':d['stop']-start})
  188. imgidx+=1
  189. return sScript_dicts, img_dicts
  190. def shorten(in_str, maxLen):
  191. re.findall(r'[\u4e00-\u9fff]+', in_str)
  192. zh_idx = []
  193. eng_idx= []
  194. for i in range(len(in_str)):
  195. if in_str[i] > u'\u4e00' and in_str[i] < u'\u9fff':
  196. zh_idx.append(i)
  197. else:
  198. eng_idx.append(i)
  199. space_index = [m.start() for m in re.finditer(' ', in_str)]
  200. for idx in space_index:
  201. eng_idx.remove(idx)
  202. eng_range_list = []
  203. for k, g in groupby(enumerate(eng_idx), lambda ix : ix[0] - ix[1]):
  204. eng_range = list(map(itemgetter(1), g))
  205. eng_range_list.append(eng_range)
  206. total_syllable = 0
  207. for i in range(len(eng_range_list)):
  208. total_syllable += (syllable_count(in_str[eng_range_list[i][0]:eng_range_list[i][-1]+1])+0.5)
  209. for i in range(len(zh_idx)):
  210. total_syllable+=1
  211. #final chchchchchc[en][en][en]
  212. #[en] is a vocabulary dict with occurence of image
  213. zh_eng_idx_list = []
  214. i = 0
  215. while i < len(in_str):
  216. if in_str[i]==' ':
  217. i+=1
  218. if i in zh_idx:
  219. zh_eng_idx_list.append(i)
  220. i+=1
  221. if i in eng_idx:
  222. for ls in eng_range_list:
  223. if i in ls:
  224. zh_eng_idx_list.append(ls)
  225. i = ls[-1]+1
  226. break
  227. zh_eng_dict_list = [{'content':'','time_ratio':0}]
  228. idx = 0
  229. current_len = 0
  230. sen_idx = 0
  231. while idx < len(zh_eng_idx_list):
  232. str_from_idx = ''
  233. sylla_cnt = 1
  234. if type(zh_eng_idx_list[idx])==type([]):
  235. str_from_idx = in_str[zh_eng_idx_list[idx][0]:zh_eng_idx_list[idx][-1]+1]+' '
  236. sylla_cnt = syllable_count(str_from_idx)
  237. else:
  238. str_from_idx = in_str[zh_eng_idx_list[idx]]
  239. if len(zh_eng_dict_list[sen_idx]['content'])+sylla_cnt>=maxLen:
  240. zh_eng_dict_list[sen_idx]['time_ratio'] = current_len/total_syllable
  241. zh_eng_dict_list.append({'content':'','time_ratio':0})
  242. sen_idx+=1
  243. current_len = 0
  244. else:
  245. current_len += sylla_cnt
  246. zh_eng_dict_list[sen_idx]['content'] += str_from_idx
  247. idx+=1
  248. total_ratio = 0
  249. for obj in zh_eng_dict_list:
  250. total_ratio+=obj['time_ratio']
  251. zh_eng_dict_list[-1]['time_ratio'] = 1-total_ratio
  252. return zh_eng_dict_list
  253. def video_writer_init(path):
  254. w = openshot.FFmpegWriter(path)
  255. w.SetAudioOptions(True, "aac", 44100, 2, openshot.LAYOUT_STEREO, 3000000)
  256. w.SetVideoOptions(True, "libx264", openshot.Fraction(30000, 1000), 1280, 720,
  257. openshot.Fraction(1, 1), False, False, 3000000)
  258. return w
  259. def cKey(r,g,b,fuzz):
  260. col=openshot.Color()
  261. col.red=openshot.Keyframe(r)
  262. col.green=openshot.Keyframe(g)
  263. col.blue=openshot.Keyframe(b)
  264. return openshot.ChromaKey(col, openshot.Keyframe(fuzz))
  265. def video_photo_clip(vid=None,layer=None, position=None, end=None
  266. ,scale_x=1,scale_y=1,location_x=0,location_y=0,ck=None,audio=True):
  267. clip = openshot.Clip(vid)
  268. clip.Layer(layer)
  269. clip.Position(position)
  270. clip.End(end)
  271. clip.scale_x=openshot.Keyframe(scale_x)
  272. clip.scale_y=openshot.Keyframe(scale_y)
  273. clip.location_x=openshot.Keyframe(location_x)
  274. clip.location_y=openshot.Keyframe(location_y)
  275. if ck!=None:
  276. clip.AddEffect(ck)
  277. if audio==True:
  278. clip.has_audio=openshot.Keyframe(1)
  279. else:
  280. clip.has_audio=openshot.Keyframe(0)
  281. return clip
  282. def downloadFromDrive(cPath,fid,idx):
  283. download_file_from_google_drive(fid, cPath+str(idx)+'img.jpg')
  284. def download_file_from_google_drive(id, destination):
  285. URL = "https://docs.google.com/uc?export=download"
  286. session = requests.Session()
  287. response = session.get(URL, params = { 'id' : id }, stream = True)
  288. token = get_confirm_token(response)
  289. if token:
  290. params = { 'id' : id, 'confirm' : token }
  291. response = session.get(URL, params = params, stream = True)
  292. save_response_content(response, destination)
  293. def get_confirm_token(response):
  294. for key, value in response.cookies.items():
  295. if key.startswith('download_warning'):
  296. return value
  297. return None
  298. def save_response_content(response, destination):
  299. CHUNK_SIZE = 32768
  300. with open(destination, "wb") as f:
  301. for chunk in response.iter_content(CHUNK_SIZE):
  302. if chunk: # filter out keep-alive new chunks
  303. f.write(chunk)
  304. def call_anchor(fileName,avatar):
  305. conn = rpyc.classic.connect("192.168.1.111",18812)
  306. ros = conn.modules.os
  307. rsys = conn.modules.sys
  308. fr=open(fileName,'rb')# local svoice
  309. #warning!!! file my be replaced by other process
  310. fw=conn.builtins.open('/tmp/output.mp3','wb')# remote
  311. while True:
  312. b=fr.read(1024)
  313. if b:
  314. fw.write(b)
  315. else:
  316. break
  317. fr.close()
  318. fw.close()
  319. val=random.randint(1000000,9999999)
  320. ros.chdir('/home/jared/to_video')
  321. ros.system('./p'+str(avatar)+'.sh '+str(val)+' &')
  322. while True:
  323. print('waiting...')
  324. if ros.path.exists('/tmp/results/'+str(val)):
  325. break
  326. time.sleep(5)
  327. print('waiting...')
  328. fr=conn.builtins.open('/tmp/results/'+str(val)+'.mp4','rb')
  329. newfileName = fileName.replace('speech.mp3','speaker.mp4')
  330. fw=open(newfileName,'wb')#local anchor
  331. while True:
  332. b=fr.read(1024)
  333. if b:
  334. fw.write(b)
  335. else:
  336. break
  337. fr.close()
  338. fw.close()