util.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404
  1. from autosub import DEFAULT_CONCURRENCY
  2. from autosub import DEFAULT_SUBTITLE_FORMAT
  3. from pytranscriber.control.ctr_main import Ctr_Main
  4. from pytranscriber.control.ctr_autosub import Ctr_Autosub
  5. import re,random, time
  6. from itertools import groupby
  7. from operator import itemgetter
  8. import requests,rpyc
  9. from difflib import SequenceMatcher
  10. from PIL import Image,ImageDraw,ImageFont
  11. import openshot
  12. def trim_punctuation(s):
  13. pat_block = u'[^\u4e00-\u9fff0-9a-zA-Z]+';
  14. pattern = u'([0-9]+{0}[0-9]+)|{0}'.format(pat_block)
  15. res = re.sub(pattern, lambda x: x.group(1) if x.group(1) else u" " ,s)
  16. return res
  17. def txt2image(content, save_target,lang='zh'):
  18. unicode_text = trim_punctuation(content)
  19. content = content.replace(' ','')
  20. font = ''
  21. if lang=='zh':
  22. font = ImageFont.truetype(font="font/DFT_B7.ttc", size=38)
  23. else :
  24. font = ImageFont.truetype(font="font/arial.ttf", size=38)
  25. text_width, text_height = font.getsize(unicode_text)
  26. canvas = Image.new('RGBA', (700, 500), (255, 0, 0, 0) )
  27. draw = ImageDraw.Draw(canvas)
  28. text= unicode_text
  29. draw.text((5,5), text, (255, 255, 0), font)
  30. canvas.save(save_target, "PNG")
  31. def generate_subtitle_image_from_dict(cpath, sub_dict):
  32. for script in sub_dict:
  33. sv_path = cpath + '/' + str(script['index'])+'.png'
  34. sub = script['content']
  35. txt2image(sub,sv_path)
  36. def listener_progress(string, percent):
  37. True
  38. def transScript(cpath):
  39. Ctr_Autosub.init()
  40. Ctr_Autosub.generate_subtitles(cpath+"speech.mp3",'zh'
  41. ,listener_progress
  42. ,output=cpath+"script.txt"
  43. ,concurrency=DEFAULT_CONCURRENCY,subtitle_file_format=DEFAULT_SUBTITLE_FORMAT)
  44. def syllable_count(word):
  45. word = word.lower()
  46. count = 0
  47. vowels = "aeiouy"
  48. if word[0] in vowels:
  49. count += 1
  50. for index in range(1, len(word)):
  51. if word[index] in vowels and word[index - 1] not in vowels:
  52. count += 1
  53. if word.endswith("e"):
  54. count -= 1
  55. if count == 0:
  56. count += 1
  57. return count
  58. def getKeyword(inStr):
  59. re.findall(r'[\u4e00-\u9fff]+', inStr)
  60. zh_idx = []
  61. eng_idx= []
  62. for i in range(len(inStr)):
  63. if inStr[i] > u'\u4e00' and inStr[i] < u'\u9fff':
  64. zh_idx.append(i)
  65. else:
  66. eng_idx.append(i)
  67. kws = obj.extractKeywordFromString(inStr)
  68. engStr =''
  69. for idx in range(len(eng_idx)):
  70. if idx != len(eng_idx)-1:
  71. if eng_idx[idx]+1 == eng_idx[idx+1]:
  72. engStr+=inStr[eng_idx[idx]]
  73. else:
  74. engStr+=inStr[eng_idx[idx]]
  75. if len(engStr)>2:
  76. kws.append((engStr,10))
  77. engStr=''
  78. else:
  79. engStr+=inStr[eng_idx[idx]]
  80. if len(engStr)>2:
  81. kws.append((engStr,10))
  82. engStr=''
  83. return kws
  84. def getImgDict():
  85. return [{'kw':'podcast','id':'17j41rqsoWVzc-HD8jvdxb651pbHJhvH8'}
  86. ,{'kw':'podcast','id':'1wT3uIaoe3xD-wrAo-J9eZweHEuEfI22_'}
  87. ,{'kw':'podcast','id':'1uaP8_xtqMn_Zbx3DX78ALdCtFjUPYgKQ'}
  88. ,{'kw':'podcast','id':'1L1NMByTorcDBN8EpbwwUakTexdRAiF4w'}
  89. ,{'kw':'youtuber','id':'17vUM8xrMgI9y1aEbAoprOxKiK9OOkipE'}
  90. ,{'kw':'支持者','id':'1sb-DmU5X9YE7HZLva_UueEvzcXqrKoGk'}
  91. ,{'kw':'app','id':'1jxoZuFlUHyl1L7-WB2ejPKBEW38bsbR6'}]
  92. def savekeywordImage(kw,imgDict):
  93. highest_val = 0
  94. highest_d = None
  95. for d in imgDict:
  96. sim = SequenceMatcher(None, d['kw'], kw).ratio()
  97. #print(sim,d['kw'],kw)
  98. if sim>highest_val :
  99. highest_val = sim
  100. highest_d = d
  101. return highest_d
  102. def get_script(cPath):
  103. fpath = cPath+'script.txt'
  104. with open(fpath, 'r',encoding="utf-8") as f:
  105. raw_lines = [line.strip() for line in f]
  106. lines =[]
  107. for idx in range(int(len(raw_lines)/4+1)):
  108. line_content = raw_lines[idx*4+2]
  109. lines.append(line_content)
  110. return lines
  111. def rewriteScript(cPath,newlines):
  112. fpath = cPath+'script.txt'
  113. with open(fpath, 'r',encoding="utf-8") as f:
  114. raw_lines = [line.strip() for line in f]
  115. for idx in range(int(len(raw_lines)/4+1)):
  116. raw_lines[idx*4+2] = newlines[idx]
  117. f = open(fpath, 'w',encoding="utf-8")
  118. for l in raw_lines:
  119. f.write(l+'\n')
  120. f.close()
  121. def parse_script(file_path):
  122. imgDict = getImgDict()
  123. with open(file_path, 'r',encoding="utf-8") as f:
  124. raw_lines = [line.strip() for line in f]
  125. dict_list = []
  126. sScript_dicts = []
  127. scriptIdx = 0
  128. for idx in range(int(len(raw_lines)/4+1)):
  129. script={}
  130. line_content = raw_lines[idx*4+2]
  131. script['content'] = line_content
  132. time_raw = raw_lines[idx * 4 +1 ].split(' --> ')
  133. start = time_raw[0].split(':')
  134. stop = time_raw[1].split(':')
  135. script['start'] = float(start[0])*3600 + float(start[1])*60 + float(start[2].replace(',','.'))
  136. script['stop'] = float(stop[0])*3600 + float(stop[1])*60 + float(stop[2].replace(',','.'))
  137. script['duration'] = script['stop']-script['start']
  138. duration = script['duration']
  139. start = script['start']
  140. try:
  141. kw= getKeyword(script['content'])
  142. kw.sort(key=lambda tup: tup[1], reverse=True)
  143. if kw[0][1]>2:
  144. script['kw'] = kw[0][0]
  145. else:
  146. script['kw'] = ''
  147. except Exception as e:
  148. script['kw']=''
  149. kwd = savekeywordImage(script['kw'],imgDict)
  150. if kwd is not None:
  151. script['imgid'] = kwd['id']
  152. imgDict.remove(kwd)
  153. else:
  154. script['imgid'] = None
  155. dict_list.append(script)
  156. accumulated_duration = 0
  157. for sen in shorten(script['content'],15):
  158. #print(sen)
  159. sScript = {}
  160. sScript['content'] = sen['content']
  161. sScript['index'] = scriptIdx
  162. scriptIdx+=1
  163. sScript['start'] = accumulated_duration+script['start']
  164. sScript['duration'] = duration*sen['time_ratio']
  165. accumulated_duration+=duration*sen['time_ratio']
  166. sScript_dicts.append(sScript)
  167. img_dicts=[]
  168. found = False
  169. start = 0
  170. stop = 0
  171. imgid=''
  172. imgidx = 0
  173. for d in dict_list:
  174. if d['imgid'] is not None:
  175. if found :
  176. img_dicts.append({'index':imgidx,'imgid':imgid,'start':start,'duration':d['start']-start})
  177. imgidx+=1
  178. start = d['start']
  179. imgid = d['imgid']
  180. else:
  181. found=True
  182. start = d['start']
  183. imgid = d['imgid']
  184. if d['start']==dict_list[-1]['start']:
  185. if d['imgid'] is not None:
  186. img_dicts.append({'index':imgidx,'imgid':d['imgid'],'start':d['start'],'duration':d['duration']})
  187. else:
  188. img_dicts.append({'index':imgidx,'imgid':imgid,'start':start,'duration':d['stop']-start})
  189. imgidx+=1
  190. return sScript_dicts, img_dicts
  191. def shorten(in_str, maxLen):
  192. re.findall(r'[\u4e00-\u9fff]+', in_str)
  193. zh_idx = []
  194. eng_idx= []
  195. for i in range(len(in_str)):
  196. if in_str[i] > u'\u4e00' and in_str[i] < u'\u9fff':
  197. zh_idx.append(i)
  198. else:
  199. eng_idx.append(i)
  200. space_index = [m.start() for m in re.finditer(' ', in_str)]
  201. for idx in space_index:
  202. eng_idx.remove(idx)
  203. eng_range_list = []
  204. for k, g in groupby(enumerate(eng_idx), lambda ix : ix[0] - ix[1]):
  205. eng_range = list(map(itemgetter(1), g))
  206. eng_range_list.append(eng_range)
  207. total_syllable = 0
  208. for i in range(len(eng_range_list)):
  209. total_syllable += (syllable_count(in_str[eng_range_list[i][0]:eng_range_list[i][-1]+1])+0.5)
  210. for i in range(len(zh_idx)):
  211. total_syllable+=1
  212. #final chchchchchc[en][en][en]
  213. #[en] is a vocabulary dict with occurence of image
  214. zh_eng_idx_list = []
  215. i = 0
  216. while i < len(in_str):
  217. if in_str[i]==' ':
  218. i+=1
  219. if i in zh_idx:
  220. zh_eng_idx_list.append(i)
  221. i+=1
  222. if i in eng_idx:
  223. for ls in eng_range_list:
  224. if i in ls:
  225. zh_eng_idx_list.append(ls)
  226. i = ls[-1]+1
  227. break
  228. zh_eng_dict_list = [{'content':'','time_ratio':0}]
  229. idx = 0
  230. current_len = 0
  231. sen_idx = 0
  232. while idx < len(zh_eng_idx_list):
  233. str_from_idx = ''
  234. sylla_cnt = 1
  235. if type(zh_eng_idx_list[idx])==type([]):
  236. str_from_idx = in_str[zh_eng_idx_list[idx][0]:zh_eng_idx_list[idx][-1]+1]+' '
  237. sylla_cnt = syllable_count(str_from_idx)
  238. else:
  239. str_from_idx = in_str[zh_eng_idx_list[idx]]
  240. if len(zh_eng_dict_list[sen_idx]['content'])+sylla_cnt>=maxLen:
  241. zh_eng_dict_list[sen_idx]['time_ratio'] = current_len/total_syllable
  242. zh_eng_dict_list.append({'content':'','time_ratio':0})
  243. sen_idx+=1
  244. current_len = 0
  245. else:
  246. current_len += sylla_cnt
  247. zh_eng_dict_list[sen_idx]['content'] += str_from_idx
  248. idx+=1
  249. total_ratio = 0
  250. for obj in zh_eng_dict_list:
  251. total_ratio+=obj['time_ratio']
  252. zh_eng_dict_list[-1]['time_ratio'] = 1-total_ratio
  253. return zh_eng_dict_list
  254. def video_writer_init(path):
  255. w = openshot.FFmpegWriter(path)
  256. w.SetAudioOptions(True, "aac", 44100, 2, openshot.LAYOUT_STEREO, 3000000)
  257. w.SetVideoOptions(True, "libx264", openshot.Fraction(30000, 1000), 1280, 720,
  258. openshot.Fraction(1, 1), False, False, 3000000)
  259. return w
  260. def cKey(r,g,b,fuzz):
  261. col=openshot.Color()
  262. col.red=openshot.Keyframe(r)
  263. col.green=openshot.Keyframe(g)
  264. col.blue=openshot.Keyframe(b)
  265. return openshot.ChromaKey(col, openshot.Keyframe(fuzz))
  266. def video_photo_clip(vid=None,layer=None, position=None, end=None
  267. ,scale_x=1,scale_y=1,location_x=0,location_y=0,ck=None,audio=True):
  268. clip = openshot.Clip(vid)
  269. clip.Layer(layer)
  270. clip.Position(position)
  271. clip.End(end)
  272. clip.scale_x=openshot.Keyframe(scale_x)
  273. clip.scale_y=openshot.Keyframe(scale_y)
  274. clip.location_x=openshot.Keyframe(location_x)
  275. clip.location_y=openshot.Keyframe(location_y)
  276. if ck!=None:
  277. clip.AddEffect(ck)
  278. if audio==True:
  279. clip.has_audio=openshot.Keyframe(1)
  280. else:
  281. clip.has_audio=openshot.Keyframe(0)
  282. return clip
  283. def downloadFromDrive(cPath,fid,idx):
  284. download_file_from_google_drive(fid, cPath+str(idx)+'img.jpg')
  285. def download_file_from_google_drive(id, destination):
  286. URL = "https://docs.google.com/uc?export=download"
  287. session = requests.Session()
  288. response = session.get(URL, params = { 'id' : id }, stream = True)
  289. token = get_confirm_token(response)
  290. if token:
  291. params = { 'id' : id, 'confirm' : token }
  292. response = session.get(URL, params = params, stream = True)
  293. save_response_content(response, destination)
  294. def get_confirm_token(response):
  295. for key, value in response.cookies.items():
  296. if key.startswith('download_warning'):
  297. return value
  298. return None
  299. def save_response_content(response, destination):
  300. CHUNK_SIZE = 32768
  301. with open(destination, "wb") as f:
  302. for chunk in response.iter_content(CHUNK_SIZE):
  303. if chunk: # filter out keep-alive new chunks
  304. f.write(chunk)
  305. def call_anchor(fileName,avatar):
  306. conn = rpyc.classic.connect("192.168.1.111",18812)
  307. ros = conn.modules.os
  308. rsys = conn.modules.sys
  309. fr=open(fileName,'rb')# local svoice
  310. #warning!!! file my be replaced by other process
  311. fw=conn.builtins.open('/tmp/output.mp3','wb')# remote
  312. while True:
  313. b=fr.read(1024)
  314. if b:
  315. fw.write(b)
  316. else:
  317. break
  318. fr.close()
  319. fw.close()
  320. val=random.randint(1000000,9999999)
  321. ros.chdir('/home/jared/to_video')
  322. ros.system('./p'+str(avatar)+'.sh '+str(val)+' &')
  323. while True:
  324. print('waiting...')
  325. if ros.path.exists('/tmp/results/'+str(val)):
  326. break
  327. time.sleep(5)
  328. print('waiting...')
  329. fr=conn.builtins.open('/tmp/results/'+str(val)+'.mp4','rb')
  330. newfileName = fileName.replace('speech.mp3','speaker.mp4')
  331. fw=open(newfileName,'wb')#local anchor
  332. while True:
  333. b=fr.read(1024)
  334. if b:
  335. fw.write(b)
  336. else:
  337. break
  338. fr.close()
  339. fw.close()