ProcessSub.py 1.0 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152
  1. from difflib import SequenceMatcher
  2. '''
  3. 1. two array, 1.Ground truth 2.from pytrancriber
  4. 2. get 1st index from both, if Ground Truth one is too short, get 2 or 3 even more
  5. 3.
  6. compare by mp3 len????
  7. '''
  8. def similar(a, b):
  9. return SequenceMatcher(None, a, b).ratio()
  10. def gen_mp3_from_text(gt,gen):
  11. tts=gTTS(text=gen, lang='zh')
  12. tts.save("gen_tmp.mp3")
  13. tts=gTTS(text=gt, lang='zh')
  14. tts.save("gt_tmp.mp3")
  15. audio = MP3("gen_tmp.mp3")
  16. gen_len = audio.info.length
  17. audio = MP3("gt_tmp.mp3")
  18. gt_len = audio.info.length
  19. return gt_len, gen_len
  20. def adjustSub(gt_array, generated_array):
  21. for gen_idx in
  22. return adjusted_array
  23. from gtts import gTTS
  24. tts=gTTS(text='玉米燕麥豆是一位居住在英國倫敦的日益女性', lang='zh')
  25. tts.save("1.mp3")
  26. tts=gTTS(text='yumi yamato是一位居住在英國倫敦的日益女性', lang='zh')
  27. tts.save("2.mp3")
  28. from mutagen.mp3 import MP3
  29. audio = MP3("1.mp3")
  30. print(audio.info.length)
  31. audio = MP3("2.mp3")
  32. print(audio.info.length)