ctr_autosub.py 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145
  1. from autosub import FLACConverter
  2. from autosub import SpeechRecognizer
  3. from autosub import extract_audio
  4. from autosub import find_speech_regions
  5. from autosub import DEFAULT_CONCURRENCY
  6. from autosub import DEFAULT_SUBTITLE_FORMAT
  7. from autosub import GOOGLE_SPEECH_API_KEY
  8. from autosub.formatters import FORMATTERS
  9. import multiprocessing
  10. import time
  11. import os
  12. from pytranscriber.util.util import MyUtil
  13. class Ctr_Autosub():
  14. cancel = False
  15. @staticmethod
  16. def init():
  17. Ctr_Autosub.cancel = False
  18. @staticmethod
  19. def is_operation_canceled():
  20. return Ctr_Autosub.cancel
  21. @staticmethod
  22. def output_progress(listener_progress, str_task, progress_percent):
  23. # only update progress if not requested to cancel
  24. if not Ctr_Autosub.cancel:
  25. listener_progress(str_task, progress_percent)
  26. @staticmethod
  27. def cancel_operation():
  28. Ctr_Autosub.cancel = True
  29. while Ctr_Autosub.step == 0:
  30. time.sleep(0.1)
  31. # the first step involves ffmpeg and cannot be stopped safely
  32. if Ctr_Autosub.step == 1:
  33. # close wait for threads to finish their work first
  34. Ctr_Autosub.pool.close()
  35. Ctr_Autosub.pool.join()
  36. else:
  37. # terminates the threads immediately
  38. Ctr_Autosub.pool.terminate()
  39. Ctr_Autosub.pool.join()
  40. @staticmethod
  41. def generate_subtitles(# pylint: disable=too-many-locals,too-many-arguments
  42. source_path,
  43. src_language,
  44. listener_progress,
  45. output=None,
  46. concurrency=DEFAULT_CONCURRENCY,
  47. subtitle_file_format=DEFAULT_SUBTITLE_FORMAT
  48. ):
  49. # windows not support forkserver... only spawn
  50. if os.name != "nt" and "Darwin" in os.uname():
  51. # necessary for running on MacOS
  52. # method can be set only once, otherwise crash
  53. #from python 3.8 above the default for macos is spawn and not fork
  54. if 'spawn' != multiprocessing.get_start_method(allow_none=True):
  55. multiprocessing.set_start_method('spawn')
  56. Ctr_Autosub.cancel = False
  57. Ctr_Autosub.step = 0
  58. """
  59. Given an input audio/video file, generate subtitles in the specified language and format.
  60. """
  61. audio_filename, audio_rate = extract_audio(source_path)
  62. regions = find_speech_regions(audio_filename)
  63. converter = FLACConverter(source_path=audio_filename)
  64. recognizer = SpeechRecognizer(language=src_language, rate=audio_rate,
  65. api_key=GOOGLE_SPEECH_API_KEY)
  66. transcripts = []
  67. if regions:
  68. try:
  69. if Ctr_Autosub.cancel:
  70. return -1
  71. str_task_1 = "Step 1 of 2: Converting speech regions to FLAC files "
  72. len_regions = len(regions)
  73. extracted_regions = []
  74. Ctr_Autosub.pool = multiprocessing.Pool(concurrency)
  75. for i, extracted_region in enumerate(Ctr_Autosub.pool.imap(converter, regions)):
  76. Ctr_Autosub.step = 1
  77. extracted_regions.append(extracted_region)
  78. progress_percent = MyUtil.percentage(i, len_regions)
  79. Ctr_Autosub.output_progress(listener_progress, str_task_1, progress_percent)
  80. if Ctr_Autosub.cancel:
  81. return -1
  82. else:
  83. Ctr_Autosub.pool.close()
  84. Ctr_Autosub.pool.join()
  85. str_task_2 = "Step 2 of 2: Performing speech recognition "
  86. Ctr_Autosub.pool = multiprocessing.Pool(concurrency)
  87. for i, transcript in enumerate(Ctr_Autosub.pool.imap(recognizer, extracted_regions)):
  88. Ctr_Autosub.step = 2
  89. transcripts.append(transcript)
  90. progress_percent = MyUtil.percentage(i, len_regions)
  91. Ctr_Autosub.output_progress(listener_progress, str_task_2, progress_percent)
  92. if Ctr_Autosub.cancel:
  93. return -1
  94. else:
  95. Ctr_Autosub.pool.close()
  96. Ctr_Autosub.pool.join()
  97. except KeyboardInterrupt:
  98. Ctr_Autosub.pbar.finish()
  99. Ctr_Autosub.pool.terminate()
  100. Ctr_Autosub.pool.join()
  101. raise
  102. timed_subtitles = [(r, t) for r, t in zip(regions, transcripts) if t]
  103. formatter = FORMATTERS.get(subtitle_file_format)
  104. formatted_subtitles = formatter(timed_subtitles)
  105. dest = output
  106. if not dest:
  107. base = os.path.splitext(source_path)[0]
  108. dest = "{base}.{format}".format(base=base, format=subtitle_file_format)
  109. with open(dest, 'wb') as output_file:
  110. output_file.write(formatted_subtitles.encode("utf-8"))
  111. os.remove(audio_filename)
  112. if Ctr_Autosub.cancel:
  113. return -1
  114. else:
  115. Ctr_Autosub.pool.close()
  116. Ctr_Autosub.pool.join()
  117. return dest