Kaynağa Gözat

自動上字幕04/17

Mia 2 yıl önce
ebeveyn
işleme
2455580b3b
1 değiştirilmiş dosya ile 52 ekleme ve 21 silme
  1. 52 21
      openshot_word.py

+ 52 - 21
openshot_word.py

@@ -1,6 +1,9 @@
 import openshot
 import openshot
 import os
 import os
 import re
 import re
+import time
+import pysrt
+import shutil
 from PIL import Image,ImageDraw,ImageFont
 from PIL import Image,ImageDraw,ImageFont
 
 
 def cKey(r,g,b,fuzz):
 def cKey(r,g,b,fuzz):
@@ -42,6 +45,7 @@ def trim_punctuation(s):
     res = re.sub(pattern, lambda x: x.group(1) if x.group(1) else u" " ,s)
     res = re.sub(pattern, lambda x: x.group(1) if x.group(1) else u" " ,s)
     return res
     return res
 
 
+#文字轉圖片
 def txt2image(content, save_target,lang='zh',size=26,fon="font/DFT_B7.ttc"):
 def txt2image(content, save_target,lang='zh',size=26,fon="font/DFT_B7.ttc"):
     unicode_text = trim_punctuation(content)
     unicode_text = trim_punctuation(content)
     font = ''
     font = ''
@@ -49,16 +53,23 @@ def txt2image(content, save_target,lang='zh',size=26,fon="font/DFT_B7.ttc"):
         font = ImageFont.truetype(font=fon, size=size)
         font = ImageFont.truetype(font=fon, size=size)
     else :
     else :
         font = ImageFont.truetype(font="font/arial.ttf", size=size)
         font = ImageFont.truetype(font="font/arial.ttf", size=size)
-    text_width, text_height = font.getsize(unicode_text)
+    
     W, H = (1280,500)
     W, H = (1280,500)
     canvas = Image.new('RGBA', (W, H), (255, 255, 255, 0) )
     canvas = Image.new('RGBA', (W, H), (255, 255, 255, 0) )
     draw = ImageDraw.Draw(canvas)
     draw = ImageDraw.Draw(canvas)
-    w, h = draw.textsize(content,font = font)
+    
     text= unicode_text
     text= unicode_text
-    draw.text(((W-w)/2,0), text,'black', font)
+    if len(text)>10 :
+        w, h = draw.textsize(text[0:18],font = font)
+        draw.text(((W-w)/2,0), text[0:18],'black', font)
+        w, h = draw.textsize(text[18:],font = font)
+        draw.text(((W-w)/2,h+2), text[18:],'black', font)
+    else:
+        w, h = draw.textsize(content,font = font)
+        draw.text(((W-w)/2,0), text,'black', font)
     canvas.save(save_target, "PNG")
     canvas.save(save_target, "PNG")
 
 
-def text_to_short_vedio(bg,text_form,vedio_time):
+def text_to_short_vedio(mp4_file = "input/example/test3.mp4",sound_file = "input/example/test3.mp4",vedio_time = 30,output_filename="output/demo.mp4",text_font = "font/DFT_B7.ttc"):
     t = openshot.Timeline(1280, 720, openshot.Fraction(30000, 1000), 44100, 2, openshot.LAYOUT_STEREO)
     t = openshot.Timeline(1280, 720, openshot.Fraction(30000, 1000), 44100, 2, openshot.LAYOUT_STEREO)
     t.Open()
     t.Open()
     
     
@@ -66,30 +77,42 @@ def text_to_short_vedio(bg,text_form,vedio_time):
     ck = cKey(0, 254, 0, 270)
     ck = cKey(0, 254, 0, 270)
     ck_anchor = cKey(0, 255, 0, 320)
     ck_anchor = cKey(0, 255, 0, 320)
 
 
-    anchor = openshot.FFmpegReader(bg)
+    anchor = openshot.FFmpegReader(mp4_file)
     anchor.Open()
     anchor.Open()
     anchor_clip = video_photo_clip(video=anchor,layer=2,scale_x=1,scale_y=1,
     anchor_clip = video_photo_clip(video=anchor,layer=2,scale_x=1,scale_y=1,
             location_x=0,location_y=0,position=0, end=vedio_time,ck=ck_anchor,audio=True)
             location_x=0,location_y=0,position=0, end=vedio_time,ck=ck_anchor,audio=True)
     t.AddClip(anchor_clip)
     t.AddClip(anchor_clip)
     anchor.Close()
     anchor.Close()
+    number = 0
+
+    #音檔自動產生srt(逐字稿)
+    cmd = "autosub -S zh-TW -D zh-TW " + sound_file
+    os.system(cmd)
+
+    #開啟srt檔
+    sound_srt_file = sound_file.split('.')[0] + ".srt"
+    subs = pysrt.open(sound_srt_file)
+    text_form = []
+    for context in subs:
+        #print(context.start.minutes*60+context.start.seconds+ 0.001*context.start.milliseconds)
+        end = context.end-context.start
+        end_timeStamp=(end.minutes*60+end.seconds+ 0.001*end.milliseconds)
+        start_timeStamp=(context.start.minutes*60+context.start.seconds+ 0.001*context.start.milliseconds)
+        text_form.append({'text':context.text,'start':start_timeStamp,'end':end_timeStamp,'size':30,'font':text_font})
 
 
     for text_tmp in text_form:
     for text_tmp in text_form:
-        print(text_tmp['text'])
-        time = float(text_tmp['end']) - float(text_tmp['start'])
-        file_name = "tmp/save_target_" + text_tmp['text'] + ".png"
-        txt2image(text_tmp['text'], file_name,lang='zh',size = text_tmp['size'])
-        exec('text_anchor_{} = openshot.QtImageReader("tmp/save_target_{}.png")'.format(text_tmp['text'],text_tmp['text']))
-        exec('text_anchor_{}.Open()'.format(text_tmp['text']))
-        exec('text_anchor_{}.Open()'.format(text_tmp['text']))
+        file_name = "tmp/save_target_" + str(number) + ".png"
+        txt2image(text_tmp['text'], file_name,lang='zh',size = text_tmp['size'],fon = text_tmp['font'])
+        exec('text_anchor_{} = openshot.QtImageReader("tmp/save_target_{}.png")'.format(number,number))
+        exec('text_anchor_{}.Open()'.format(number))
+        exec('text_anchor_{}.Open()'.format(number))
         exec('text_anchor_clip_{} = video_photo_clip(video=text_anchor_{},layer=4,scale_x=1,scale_y=1,\
         exec('text_anchor_clip_{} = video_photo_clip(video=text_anchor_{},layer=4,scale_x=1,scale_y=1,\
-                location_x=0,location_y=0.7,position=text_tmp["start"], end=time,ck=ck_anchor,audio=True)'.format(text_tmp['text'],text_tmp['text']))
-        exec('t.AddClip(text_anchor_clip_{})'.format(text_tmp['text']))
-        exec('text_anchor_{}.Close()'.format(text_tmp['text']))
-        exec('os.remove("tmp/save_target_{}.png")'.format(text_tmp['text']))
-        
+                location_x=0,location_y=0.7,position=text_tmp["start"], end=text_tmp["end"],ck=ck_anchor,audio=True)'.format(number,number))
+        exec('t.AddClip(text_anchor_clip_{})'.format(number))
+        exec('text_anchor_{}.Close()'.format(number))
+        number = number+1
 
 
-
-    w = video_writer_init("output/test2.mp4")
+    w = video_writer_init(output_filename)
     w.Open()
     w.Open()
 
 
     frames = int(t.info.fps)*int(vedio_time)
     frames = int(t.info.fps)*int(vedio_time)
@@ -102,5 +125,13 @@ def text_to_short_vedio(bg,text_form,vedio_time):
 
 
 
 
 if __name__ == '__main__':
 if __name__ == '__main__':
-    text_form = [{'text':"texttexttext",'start':0,'end':3,'size':26},{'text':"test22222222",'start':4,'end':6,'size':26}]
-    text_to_short_vedio("input/bg/dog.mp4",text_form,6)
+    
+        
+    #text_form = [{'text':"texttexttext",'start':0,'end':3,'size':26},{'text':"test22222222",'start':4,'end':6,'size':26}]
+    #print(text_form)
+    text_to_short_vedio(mp4_file = "input/example/test3.mp4",sound_file ='input/example/test3.mp4',vedio_time =284,text_font ="font/DFT_HNT7.ttc")
+
+    #刪除暫存檔案
+    shutil.rmtree('tmp')
+    os.mkdir('tmp')
+