ming 3 lat temu
rodzic
commit
aedb2ffba1

+ 405 - 405
autosub/__init__-0.4.0.py → composer/autosub/__init__-0.4.0.py

@@ -1,405 +1,405 @@
-"""
-Defines autosub's main functionality.
-"""
-
-#!/usr/bin/env python
-
-from __future__ import absolute_import, print_function, unicode_literals
-
-import argparse
-import audioop
-import json
-import math
-import multiprocessing
-import os
-import subprocess
-import sys
-import tempfile
-import wave
-
-import requests
-from googleapiclient.discovery import build
-from progressbar import ProgressBar, Percentage, Bar, ETA
-
-from autosub.constants import (
-    LANGUAGE_CODES, GOOGLE_SPEECH_API_KEY, GOOGLE_SPEECH_API_URL,
-)
-from autosub.formatters import FORMATTERS
-
-DEFAULT_SUBTITLE_FORMAT = 'srt'
-DEFAULT_CONCURRENCY = 10
-DEFAULT_SRC_LANGUAGE = 'en'
-DEFAULT_DST_LANGUAGE = 'en'
-
-
-def percentile(arr, percent):
-    """
-    Calculate the given percentile of arr.
-    """
-    arr = sorted(arr)
-    index = (len(arr) - 1) * percent
-    floor = math.floor(index)
-    ceil = math.ceil(index)
-    if floor == ceil:
-        return arr[int(index)]
-    low_value = arr[int(floor)] * (ceil - index)
-    high_value = arr[int(ceil)] * (index - floor)
-    return low_value + high_value
-
-
-class FLACConverter(object): # pylint: disable=too-few-public-methods
-    """
-    Class for converting a region of an input audio or video file into a FLAC audio file
-    """
-    def __init__(self, source_path, include_before=0.25, include_after=0.25):
-        self.source_path = source_path
-        self.include_before = include_before
-        self.include_after = include_after
-
-    def __call__(self, region):
-        try:
-            start, end = region
-            start = max(0, start - self.include_before)
-            end += self.include_after
-            temp = tempfile.NamedTemporaryFile(suffix='.flac')
-            command = ["ffmpeg", "-ss", str(start), "-t", str(end - start),
-                       "-y", "-i", self.source_path,
-                       "-loglevel", "error", temp.name]
-            use_shell = True if os.name == "nt" else False
-            subprocess.check_output(command, stdin=open(os.devnull), shell=use_shell)
-            return temp.read()
-
-        except KeyboardInterrupt:
-            return None
-
-
-class SpeechRecognizer(object): # pylint: disable=too-few-public-methods
-    """
-    Class for performing speech-to-text for an input FLAC file.
-    """
-    def __init__(self, language="en", rate=44100, retries=3, api_key=GOOGLE_SPEECH_API_KEY):
-        self.language = language
-        self.rate = rate
-        self.api_key = api_key
-        self.retries = retries
-
-    def __call__(self, data):
-        try:
-            for _ in range(self.retries):
-                url = GOOGLE_SPEECH_API_URL.format(lang=self.language, key=self.api_key)
-                headers = {"Content-Type": "audio/x-flac; rate=%d" % self.rate}
-
-                try:
-                    resp = requests.post(url, data=data, headers=headers)
-                except requests.exceptions.ConnectionError:
-                    continue
-
-                for line in resp.content.decode('utf-8').split("\n"):
-                    try:
-                        line = json.loads(line)
-                        line = line['result'][0]['alternative'][0]['transcript']
-                        return line[:1].upper() + line[1:]
-                    except IndexError:
-                        # no result
-                        continue
-
-        except KeyboardInterrupt:
-            return None
-
-
-class Translator(object): # pylint: disable=too-few-public-methods
-    """
-    Class for translating a sentence from a one language to another.
-    """
-    def __init__(self, language, api_key, src, dst):
-        self.language = language
-        self.api_key = api_key
-        self.service = build('translate', 'v2',
-                             developerKey=self.api_key)
-        self.src = src
-        self.dst = dst
-
-    def __call__(self, sentence):
-        try:
-            if not sentence:
-                return None
-
-            result = self.service.translations().list( # pylint: disable=no-member
-                source=self.src,
-                target=self.dst,
-                q=[sentence]
-            ).execute()
-
-            if 'translations' in result and result['translations'] and \
-                'translatedText' in result['translations'][0]:
-                return result['translations'][0]['translatedText']
-
-            return None
-
-        except KeyboardInterrupt:
-            return None
-
-
-def which(program):
-    """
-    Return the path for a given executable.
-    """
-    def is_exe(file_path):
-        """
-        Checks whether a file is executable.
-        """
-        return os.path.isfile(file_path) and os.access(file_path, os.X_OK)
-
-    fpath, _ = os.path.split(program)
-    if fpath:
-        if is_exe(program):
-            return program
-    else:
-        for path in os.environ["PATH"].split(os.pathsep):
-            path = path.strip('"')
-            exe_file = os.path.join(path, program)
-            if is_exe(exe_file):
-                return exe_file
-    return None
-
-
-def extract_audio(filename, channels=1, rate=16000):
-    """
-    Extract audio from an input file to a temporary WAV file.
-    """
-    temp = tempfile.NamedTemporaryFile(suffix='.wav', delete=False)
-    if not os.path.isfile(filename):
-        print("The given file does not exist: {}".format(filename))
-        raise Exception("Invalid filepath: {}".format(filename))
-    if not which("ffmpeg"):
-        print("ffmpeg: Executable not found on machine.")
-        raise Exception("Dependency not found: ffmpeg")
-    command = ["ffmpeg", "-y", "-i", filename,
-               "-ac", str(channels), "-ar", str(rate),
-               "-loglevel", "error", temp.name]
-    use_shell = True if os.name == "nt" else False
-    subprocess.check_output(command, stdin=open(os.devnull), shell=use_shell)
-    return temp.name, rate
-
-
-def find_speech_regions(filename, frame_width=4096, min_region_size=0.5, max_region_size=6): # pylint: disable=too-many-locals
-    """
-    Perform voice activity detection on a given audio file.
-    """
-    reader = wave.open(filename)
-    sample_width = reader.getsampwidth()
-    rate = reader.getframerate()
-    n_channels = reader.getnchannels()
-    chunk_duration = float(frame_width) / rate
-
-    n_chunks = int(math.ceil(reader.getnframes()*1.0 / frame_width))
-    energies = []
-
-    for _ in range(n_chunks):
-        chunk = reader.readframes(frame_width)
-        energies.append(audioop.rms(chunk, sample_width * n_channels))
-
-    threshold = percentile(energies, 0.2)
-
-    elapsed_time = 0
-
-    regions = []
-    region_start = None
-
-    for energy in energies:
-        is_silence = energy <= threshold
-        max_exceeded = region_start and elapsed_time - region_start >= max_region_size
-
-        if (max_exceeded or is_silence) and region_start:
-            if elapsed_time - region_start >= min_region_size:
-                regions.append((region_start, elapsed_time))
-                region_start = None
-
-        elif (not region_start) and (not is_silence):
-            region_start = elapsed_time
-        elapsed_time += chunk_duration
-    return regions
-
-
-def generate_subtitles( # pylint: disable=too-many-locals,too-many-arguments
-        source_path,
-        output=None,
-        concurrency=DEFAULT_CONCURRENCY,
-        src_language=DEFAULT_SRC_LANGUAGE,
-        dst_language=DEFAULT_DST_LANGUAGE,
-        subtitle_file_format=DEFAULT_SUBTITLE_FORMAT,
-        api_key=None,
-    ):
-    """
-    Given an input audio/video file, generate subtitles in the specified language and format.
-    """
-    audio_filename, audio_rate = extract_audio(source_path)
-
-    regions = find_speech_regions(audio_filename)
-
-    pool = multiprocessing.Pool(concurrency)
-    converter = FLACConverter(source_path=audio_filename)
-    recognizer = SpeechRecognizer(language=src_language, rate=audio_rate,
-                                  api_key=GOOGLE_SPEECH_API_KEY)
-
-    transcripts = []
-    if regions:
-        try:
-            widgets = ["Converting speech regions to FLAC files: ", Percentage(), ' ', Bar(), ' ',
-                       ETA()]
-            pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start()
-            extracted_regions = []
-            for i, extracted_region in enumerate(pool.imap(converter, regions)):
-                extracted_regions.append(extracted_region)
-                pbar.update(i)
-            pbar.finish()
-
-            widgets = ["Performing speech recognition: ", Percentage(), ' ', Bar(), ' ', ETA()]
-            pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start()
-
-            for i, transcript in enumerate(pool.imap(recognizer, extracted_regions)):
-                transcripts.append(transcript)
-                pbar.update(i)
-            pbar.finish()
-
-            if src_language.split("-")[0] != dst_language.split("-")[0]:
-                if api_key:
-                    google_translate_api_key = api_key
-                    translator = Translator(dst_language, google_translate_api_key,
-                                            dst=dst_language,
-                                            src=src_language)
-                    prompt = "Translating from {0} to {1}: ".format(src_language, dst_language)
-                    widgets = [prompt, Percentage(), ' ', Bar(), ' ', ETA()]
-                    pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start()
-                    translated_transcripts = []
-                    for i, transcript in enumerate(pool.imap(translator, transcripts)):
-                        translated_transcripts.append(transcript)
-                        pbar.update(i)
-                    pbar.finish()
-                    transcripts = translated_transcripts
-                else:
-                    print(
-                        "Error: Subtitle translation requires specified Google Translate API key. "
-                        "See --help for further information."
-                    )
-                    return 1
-
-        except KeyboardInterrupt:
-            pbar.finish()
-            pool.terminate()
-            pool.join()
-            print("Cancelling transcription")
-            raise
-
-    timed_subtitles = [(r, t) for r, t in zip(regions, transcripts) if t]
-    formatter = FORMATTERS.get(subtitle_file_format)
-    formatted_subtitles = formatter(timed_subtitles)
-
-    dest = output
-
-    if not dest:
-        base = os.path.splitext(source_path)[0]
-        dest = "{base}.{format}".format(base=base, format=subtitle_file_format)
-
-    with open(dest, 'wb') as output_file:
-        output_file.write(formatted_subtitles.encode("utf-8"))
-
-    os.remove(audio_filename)
-
-    return dest
-
-
-def validate(args):
-    """
-    Check that the CLI arguments passed to autosub are valid.
-    """
-    if args.format not in FORMATTERS:
-        print(
-            "Subtitle format not supported. "
-            "Run with --list-formats to see all supported formats."
-        )
-        return False
-
-    if args.src_language not in LANGUAGE_CODES.keys():
-        print(
-            "Source language not supported. "
-            "Run with --list-languages to see all supported languages."
-        )
-        return False
-
-    if args.dst_language not in LANGUAGE_CODES.keys():
-        print(
-            "Destination language not supported. "
-            "Run with --list-languages to see all supported languages."
-        )
-        return False
-
-    if not args.source_path:
-        print("Error: You need to specify a source path.")
-        return False
-
-    return True
-
-
-def main():
-    """
-    Run autosub as a command-line program.
-    """
-    parser = argparse.ArgumentParser()
-    parser.add_argument('source_path', help="Path to the video or audio file to subtitle",
-                        nargs='?')
-    parser.add_argument('-C', '--concurrency', help="Number of concurrent API requests to make",
-                        type=int, default=DEFAULT_CONCURRENCY)
-    parser.add_argument('-o', '--output',
-                        help="Output path for subtitles (by default, subtitles are saved in \
-                        the same directory and name as the source path)")
-    parser.add_argument('-F', '--format', help="Destination subtitle format",
-                        default=DEFAULT_SUBTITLE_FORMAT)
-    parser.add_argument('-S', '--src-language', help="Language spoken in source file",
-                        default=DEFAULT_SRC_LANGUAGE)
-    parser.add_argument('-D', '--dst-language', help="Desired language for the subtitles",
-                        default=DEFAULT_DST_LANGUAGE)
-    parser.add_argument('-K', '--api-key',
-                        help="The Google Translate API key to be used. \
-                        (Required for subtitle translation)")
-    parser.add_argument('--list-formats', help="List all available subtitle formats",
-                        action='store_true')
-    parser.add_argument('--list-languages', help="List all available source/destination languages",
-                        action='store_true')
-
-    args = parser.parse_args()
-
-    if args.list_formats:
-        print("List of formats:")
-        for subtitle_format in FORMATTERS:
-            print("{format}".format(format=subtitle_format))
-        return 0
-
-    if args.list_languages:
-        print("List of all languages:")
-        for code, language in sorted(LANGUAGE_CODES.items()):
-            print("{code}\t{language}".format(code=code, language=language))
-        return 0
-
-    if not validate(args):
-        return 1
-
-    try:
-        subtitle_file_path = generate_subtitles(
-            source_path=args.source_path,
-            concurrency=args.concurrency,
-            src_language=args.src_language,
-            dst_language=args.dst_language,
-            api_key=args.api_key,
-            subtitle_file_format=args.format,
-            output=args.output,
-        )
-        print("Subtitles file created at {}".format(subtitle_file_path))
-    except KeyboardInterrupt:
-        return 1
-
-    return 0
-
-
-if __name__ == '__main__':
-    sys.exit(main())
+"""
+Defines autosub's main functionality.
+"""
+
+#!/usr/bin/env python
+
+from __future__ import absolute_import, print_function, unicode_literals
+
+import argparse
+import audioop
+import json
+import math
+import multiprocessing
+import os
+import subprocess
+import sys
+import tempfile
+import wave
+
+import requests
+from googleapiclient.discovery import build
+from progressbar import ProgressBar, Percentage, Bar, ETA
+
+from autosub.constants import (
+    LANGUAGE_CODES, GOOGLE_SPEECH_API_KEY, GOOGLE_SPEECH_API_URL,
+)
+from autosub.formatters import FORMATTERS
+
+DEFAULT_SUBTITLE_FORMAT = 'srt'
+DEFAULT_CONCURRENCY = 10
+DEFAULT_SRC_LANGUAGE = 'en'
+DEFAULT_DST_LANGUAGE = 'en'
+
+
+def percentile(arr, percent):
+    """
+    Calculate the given percentile of arr.
+    """
+    arr = sorted(arr)
+    index = (len(arr) - 1) * percent
+    floor = math.floor(index)
+    ceil = math.ceil(index)
+    if floor == ceil:
+        return arr[int(index)]
+    low_value = arr[int(floor)] * (ceil - index)
+    high_value = arr[int(ceil)] * (index - floor)
+    return low_value + high_value
+
+
+class FLACConverter(object): # pylint: disable=too-few-public-methods
+    """
+    Class for converting a region of an input audio or video file into a FLAC audio file
+    """
+    def __init__(self, source_path, include_before=0.25, include_after=0.25):
+        self.source_path = source_path
+        self.include_before = include_before
+        self.include_after = include_after
+
+    def __call__(self, region):
+        try:
+            start, end = region
+            start = max(0, start - self.include_before)
+            end += self.include_after
+            temp = tempfile.NamedTemporaryFile(suffix='.flac')
+            command = ["ffmpeg", "-ss", str(start), "-t", str(end - start),
+                       "-y", "-i", self.source_path,
+                       "-loglevel", "error", temp.name]
+            use_shell = True if os.name == "nt" else False
+            subprocess.check_output(command, stdin=open(os.devnull), shell=use_shell)
+            return temp.read()
+
+        except KeyboardInterrupt:
+            return None
+
+
+class SpeechRecognizer(object): # pylint: disable=too-few-public-methods
+    """
+    Class for performing speech-to-text for an input FLAC file.
+    """
+    def __init__(self, language="en", rate=44100, retries=3, api_key=GOOGLE_SPEECH_API_KEY):
+        self.language = language
+        self.rate = rate
+        self.api_key = api_key
+        self.retries = retries
+
+    def __call__(self, data):
+        try:
+            for _ in range(self.retries):
+                url = GOOGLE_SPEECH_API_URL.format(lang=self.language, key=self.api_key)
+                headers = {"Content-Type": "audio/x-flac; rate=%d" % self.rate}
+
+                try:
+                    resp = requests.post(url, data=data, headers=headers)
+                except requests.exceptions.ConnectionError:
+                    continue
+
+                for line in resp.content.decode('utf-8').split("\n"):
+                    try:
+                        line = json.loads(line)
+                        line = line['result'][0]['alternative'][0]['transcript']
+                        return line[:1].upper() + line[1:]
+                    except IndexError:
+                        # no result
+                        continue
+
+        except KeyboardInterrupt:
+            return None
+
+
+class Translator(object): # pylint: disable=too-few-public-methods
+    """
+    Class for translating a sentence from a one language to another.
+    """
+    def __init__(self, language, api_key, src, dst):
+        self.language = language
+        self.api_key = api_key
+        self.service = build('translate', 'v2',
+                             developerKey=self.api_key)
+        self.src = src
+        self.dst = dst
+
+    def __call__(self, sentence):
+        try:
+            if not sentence:
+                return None
+
+            result = self.service.translations().list( # pylint: disable=no-member
+                source=self.src,
+                target=self.dst,
+                q=[sentence]
+            ).execute()
+
+            if 'translations' in result and result['translations'] and \
+                'translatedText' in result['translations'][0]:
+                return result['translations'][0]['translatedText']
+
+            return None
+
+        except KeyboardInterrupt:
+            return None
+
+
+def which(program):
+    """
+    Return the path for a given executable.
+    """
+    def is_exe(file_path):
+        """
+        Checks whether a file is executable.
+        """
+        return os.path.isfile(file_path) and os.access(file_path, os.X_OK)
+
+    fpath, _ = os.path.split(program)
+    if fpath:
+        if is_exe(program):
+            return program
+    else:
+        for path in os.environ["PATH"].split(os.pathsep):
+            path = path.strip('"')
+            exe_file = os.path.join(path, program)
+            if is_exe(exe_file):
+                return exe_file
+    return None
+
+
+def extract_audio(filename, channels=1, rate=16000):
+    """
+    Extract audio from an input file to a temporary WAV file.
+    """
+    temp = tempfile.NamedTemporaryFile(suffix='.wav', delete=False)
+    if not os.path.isfile(filename):
+        print("The given file does not exist: {}".format(filename))
+        raise Exception("Invalid filepath: {}".format(filename))
+    if not which("ffmpeg"):
+        print("ffmpeg: Executable not found on machine.")
+        raise Exception("Dependency not found: ffmpeg")
+    command = ["ffmpeg", "-y", "-i", filename,
+               "-ac", str(channels), "-ar", str(rate),
+               "-loglevel", "error", temp.name]
+    use_shell = True if os.name == "nt" else False
+    subprocess.check_output(command, stdin=open(os.devnull), shell=use_shell)
+    return temp.name, rate
+
+
+def find_speech_regions(filename, frame_width=4096, min_region_size=0.5, max_region_size=6): # pylint: disable=too-many-locals
+    """
+    Perform voice activity detection on a given audio file.
+    """
+    reader = wave.open(filename)
+    sample_width = reader.getsampwidth()
+    rate = reader.getframerate()
+    n_channels = reader.getnchannels()
+    chunk_duration = float(frame_width) / rate
+
+    n_chunks = int(math.ceil(reader.getnframes()*1.0 / frame_width))
+    energies = []
+
+    for _ in range(n_chunks):
+        chunk = reader.readframes(frame_width)
+        energies.append(audioop.rms(chunk, sample_width * n_channels))
+
+    threshold = percentile(energies, 0.2)
+
+    elapsed_time = 0
+
+    regions = []
+    region_start = None
+
+    for energy in energies:
+        is_silence = energy <= threshold
+        max_exceeded = region_start and elapsed_time - region_start >= max_region_size
+
+        if (max_exceeded or is_silence) and region_start:
+            if elapsed_time - region_start >= min_region_size:
+                regions.append((region_start, elapsed_time))
+                region_start = None
+
+        elif (not region_start) and (not is_silence):
+            region_start = elapsed_time
+        elapsed_time += chunk_duration
+    return regions
+
+
+def generate_subtitles( # pylint: disable=too-many-locals,too-many-arguments
+        source_path,
+        output=None,
+        concurrency=DEFAULT_CONCURRENCY,
+        src_language=DEFAULT_SRC_LANGUAGE,
+        dst_language=DEFAULT_DST_LANGUAGE,
+        subtitle_file_format=DEFAULT_SUBTITLE_FORMAT,
+        api_key=None,
+    ):
+    """
+    Given an input audio/video file, generate subtitles in the specified language and format.
+    """
+    audio_filename, audio_rate = extract_audio(source_path)
+
+    regions = find_speech_regions(audio_filename)
+
+    pool = multiprocessing.Pool(concurrency)
+    converter = FLACConverter(source_path=audio_filename)
+    recognizer = SpeechRecognizer(language=src_language, rate=audio_rate,
+                                  api_key=GOOGLE_SPEECH_API_KEY)
+
+    transcripts = []
+    if regions:
+        try:
+            widgets = ["Converting speech regions to FLAC files: ", Percentage(), ' ', Bar(), ' ',
+                       ETA()]
+            pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start()
+            extracted_regions = []
+            for i, extracted_region in enumerate(pool.imap(converter, regions)):
+                extracted_regions.append(extracted_region)
+                pbar.update(i)
+            pbar.finish()
+
+            widgets = ["Performing speech recognition: ", Percentage(), ' ', Bar(), ' ', ETA()]
+            pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start()
+
+            for i, transcript in enumerate(pool.imap(recognizer, extracted_regions)):
+                transcripts.append(transcript)
+                pbar.update(i)
+            pbar.finish()
+
+            if src_language.split("-")[0] != dst_language.split("-")[0]:
+                if api_key:
+                    google_translate_api_key = api_key
+                    translator = Translator(dst_language, google_translate_api_key,
+                                            dst=dst_language,
+                                            src=src_language)
+                    prompt = "Translating from {0} to {1}: ".format(src_language, dst_language)
+                    widgets = [prompt, Percentage(), ' ', Bar(), ' ', ETA()]
+                    pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start()
+                    translated_transcripts = []
+                    for i, transcript in enumerate(pool.imap(translator, transcripts)):
+                        translated_transcripts.append(transcript)
+                        pbar.update(i)
+                    pbar.finish()
+                    transcripts = translated_transcripts
+                else:
+                    print(
+                        "Error: Subtitle translation requires specified Google Translate API key. "
+                        "See --help for further information."
+                    )
+                    return 1
+
+        except KeyboardInterrupt:
+            pbar.finish()
+            pool.terminate()
+            pool.join()
+            print("Cancelling transcription")
+            raise
+
+    timed_subtitles = [(r, t) for r, t in zip(regions, transcripts) if t]
+    formatter = FORMATTERS.get(subtitle_file_format)
+    formatted_subtitles = formatter(timed_subtitles)
+
+    dest = output
+
+    if not dest:
+        base = os.path.splitext(source_path)[0]
+        dest = "{base}.{format}".format(base=base, format=subtitle_file_format)
+
+    with open(dest, 'wb') as output_file:
+        output_file.write(formatted_subtitles.encode("utf-8"))
+
+    os.remove(audio_filename)
+
+    return dest
+
+
+def validate(args):
+    """
+    Check that the CLI arguments passed to autosub are valid.
+    """
+    if args.format not in FORMATTERS:
+        print(
+            "Subtitle format not supported. "
+            "Run with --list-formats to see all supported formats."
+        )
+        return False
+
+    if args.src_language not in LANGUAGE_CODES.keys():
+        print(
+            "Source language not supported. "
+            "Run with --list-languages to see all supported languages."
+        )
+        return False
+
+    if args.dst_language not in LANGUAGE_CODES.keys():
+        print(
+            "Destination language not supported. "
+            "Run with --list-languages to see all supported languages."
+        )
+        return False
+
+    if not args.source_path:
+        print("Error: You need to specify a source path.")
+        return False
+
+    return True
+
+
+def main():
+    """
+    Run autosub as a command-line program.
+    """
+    parser = argparse.ArgumentParser()
+    parser.add_argument('source_path', help="Path to the video or audio file to subtitle",
+                        nargs='?')
+    parser.add_argument('-C', '--concurrency', help="Number of concurrent API requests to make",
+                        type=int, default=DEFAULT_CONCURRENCY)
+    parser.add_argument('-o', '--output',
+                        help="Output path for subtitles (by default, subtitles are saved in \
+                        the same directory and name as the source path)")
+    parser.add_argument('-F', '--format', help="Destination subtitle format",
+                        default=DEFAULT_SUBTITLE_FORMAT)
+    parser.add_argument('-S', '--src-language', help="Language spoken in source file",
+                        default=DEFAULT_SRC_LANGUAGE)
+    parser.add_argument('-D', '--dst-language', help="Desired language for the subtitles",
+                        default=DEFAULT_DST_LANGUAGE)
+    parser.add_argument('-K', '--api-key',
+                        help="The Google Translate API key to be used. \
+                        (Required for subtitle translation)")
+    parser.add_argument('--list-formats', help="List all available subtitle formats",
+                        action='store_true')
+    parser.add_argument('--list-languages', help="List all available source/destination languages",
+                        action='store_true')
+
+    args = parser.parse_args()
+
+    if args.list_formats:
+        print("List of formats:")
+        for subtitle_format in FORMATTERS:
+            print("{format}".format(format=subtitle_format))
+        return 0
+
+    if args.list_languages:
+        print("List of all languages:")
+        for code, language in sorted(LANGUAGE_CODES.items()):
+            print("{code}\t{language}".format(code=code, language=language))
+        return 0
+
+    if not validate(args):
+        return 1
+
+    try:
+        subtitle_file_path = generate_subtitles(
+            source_path=args.source_path,
+            concurrency=args.concurrency,
+            src_language=args.src_language,
+            dst_language=args.dst_language,
+            api_key=args.api_key,
+            subtitle_file_format=args.format,
+            output=args.output,
+        )
+        print("Subtitles file created at {}".format(subtitle_file_path))
+    except KeyboardInterrupt:
+        return 1
+
+    return 0
+
+
+if __name__ == '__main__':
+    sys.exit(main())

+ 434 - 434
autosub/__init__.py → composer/autosub/__init__.py

@@ -1,434 +1,434 @@
-"""
-Defines autosub's main functionality.
-"""
-
-#!/usr/bin/env python
-
-from __future__ import absolute_import, print_function, unicode_literals
-
-import argparse
-import audioop
-import math
-import multiprocessing
-import os
-from json import JSONDecodeError
-import subprocess
-import sys
-import tempfile
-import wave
-
-import json
-import requests
-try:
-    from json.decoder import JSONDecodeError
-except ImportError:
-    JSONDecodeError = ValueError
-
-from googleapiclient.discovery import build
-from progressbar import ProgressBar, Percentage, Bar, ETA
-
-from autosub.constants import (
-    LANGUAGE_CODES, GOOGLE_SPEECH_API_KEY, GOOGLE_SPEECH_API_URL,
-)
-from autosub.formatters import FORMATTERS
-
-DEFAULT_SUBTITLE_FORMAT = 'srt'
-DEFAULT_CONCURRENCY = 10
-DEFAULT_SRC_LANGUAGE = 'en'
-DEFAULT_DST_LANGUAGE = 'en'
-
-
-def percentile(arr, percent):
-    """
-    Calculate the given percentile of arr.
-    """
-    arr = sorted(arr)
-    index = (len(arr) - 1) * percent
-    floor = math.floor(index)
-    ceil = math.ceil(index)
-    if floor == ceil:
-        return arr[int(index)]
-    low_value = arr[int(floor)] * (ceil - index)
-    high_value = arr[int(ceil)] * (index - floor)
-    return low_value + high_value
-
-
-class FLACConverter(object): # pylint: disable=too-few-public-methods
-    """
-    Class for converting a region of an input audio or video file into a FLAC audio file
-    """
-    def __init__(self, source_path, include_before=0.25, include_after=0.25):
-        self.source_path = source_path
-        self.include_before = include_before
-        self.include_after = include_after
-
-    def __call__(self, region):
-        try:
-            start, end = region
-            start = max(0, start - self.include_before)
-            end += self.include_after
-            #delete=False necessary for running on Windows
-            temp = tempfile.NamedTemporaryFile(suffix='.flac', delete=False)
-            program_ffmpeg = which("ffmpeg")
-            command = [str(program_ffmpeg), "-ss", str(start), "-t", str(end - start),
-                       "-y", "-i", self.source_path,
-                       "-loglevel", "error", temp.name]
-            use_shell = True if os.name == "nt" else False
-            subprocess.check_output(command, stdin=open(os.devnull), shell=use_shell)
-            read_data = temp.read()
-            temp.close()
-            os.unlink(temp.name)
-            return read_data
-
-        except KeyboardInterrupt:
-            return None
-
-
-class SpeechRecognizer(object): # pylint: disable=too-few-public-methods
-    """
-    Class for performing speech-to-text for an input FLAC file.
-    """
-    def __init__(self, language="en", rate=44100, retries=3, api_key=GOOGLE_SPEECH_API_KEY):
-        self.language = language
-        self.rate = rate
-        self.api_key = api_key
-        self.retries = retries
-
-    def __call__(self, data):
-        try:
-            for _ in range(self.retries):
-                url = GOOGLE_SPEECH_API_URL.format(lang=self.language, key=self.api_key)
-                headers = {"Content-Type": "audio/x-flac; rate=%d" % self.rate}
-
-                try:
-                    resp = requests.post(url, data=data, headers=headers)
-                except requests.exceptions.ConnectionError:
-                    continue
-
-                for line in resp.content.decode('utf-8').split("\n"):
-                    try:
-                        line = json.loads(line)
-                        line = line['result'][0]['alternative'][0]['transcript']
-                        return line[:1].upper() + line[1:]
-                    except IndexError:
-                        # no result
-                        continue
-                    except JSONDecodeError:
-                        continue
-
-        except KeyboardInterrupt:
-            return None
-
-
-class Translator(object): # pylint: disable=too-few-public-methods
-    """
-    Class for translating a sentence from a one language to another.
-    """
-    def __init__(self, language, api_key, src, dst):
-        self.language = language
-        self.api_key = api_key
-        self.service = build('translate', 'v2',
-                             developerKey=self.api_key)
-        self.src = src
-        self.dst = dst
-
-    def __call__(self, sentence):
-        try:
-            if not sentence:
-                return None
-
-            result = self.service.translations().list( # pylint: disable=no-member
-                source=self.src,
-                target=self.dst,
-                q=[sentence]
-            ).execute()
-
-            if 'translations' in result and result['translations'] and \
-                'translatedText' in result['translations'][0]:
-                return result['translations'][0]['translatedText']
-
-            return None
-
-        except KeyboardInterrupt:
-            return None
-
-
-def which(program):
-    """
-    Return the path for a given executable.
-    """
-    def is_exe(file_path):
-        """
-        Checks whether a file is executable.
-        """
-        return os.path.isfile(file_path) and os.access(file_path, os.X_OK)
-    #necessary to run on Windows
-    if os.name == "nt":
-        program += ".exe"
-    fpath, _ = os.path.split(program)
-    if fpath:
-        if is_exe(program):
-            return program
-    else:
-        #looks for file in the script execution folder before checking on system path
-        current_dir = os.getcwd()
-        local_program = os.path.join(current_dir, program)
-        if is_exe(local_program):
-            return local_program
-        else:
-            for path in os.environ["PATH"].split(os.pathsep):
-                path = path.strip('"')
-                exe_file = os.path.join(path, program)
-                if is_exe(exe_file):
-                    return exe_file
-    return None
-
-
-def extract_audio(filename, channels=1, rate=16000):
-    """
-    Extract audio from an input file to a temporary WAV file.
-    """
-    temp = tempfile.NamedTemporaryFile(suffix='.wav', delete=False)
-    if not os.path.isfile(filename):
-        print("The given file does not exist: {}".format(filename))
-        raise Exception("Invalid filepath: {}".format(filename))
-    program_ffmpeg = which("ffmpeg")
-    if not program_ffmpeg:
-        print("ffmpeg: Executable not found on machine.")
-        raise Exception("Dependency not found: ffmpeg")
-    command = [str(program_ffmpeg), "-y", "-i", filename,
-               "-ac", str(channels), "-ar", str(rate),
-               "-loglevel", "error", temp.name]
-    use_shell = True if os.name == "nt" else False
-    subprocess.check_output(command, stdin=open(os.devnull), shell=use_shell)
-    return temp.name, rate
-
-
-def find_speech_regions(filename, frame_width=4096, min_region_size=0.5, max_region_size=6): # pylint: disable=too-many-locals
-    """
-    Perform voice activity detection on a given audio file.
-    """
-    reader = wave.open(filename)
-    sample_width = reader.getsampwidth()
-    rate = reader.getframerate()
-    n_channels = reader.getnchannels()
-    chunk_duration = float(frame_width) / rate
-
-    n_chunks = int(math.ceil(reader.getnframes()*1.0 / frame_width))
-    energies = []
-
-    for _ in range(n_chunks):
-        chunk = reader.readframes(frame_width)
-        energies.append(audioop.rms(chunk, sample_width * n_channels))
-
-    threshold = percentile(energies, 0.2)
-
-    elapsed_time = 0
-
-    regions = []
-    region_start = None
-
-    for energy in energies:
-        is_silence = energy <= threshold
-        max_exceeded = region_start and elapsed_time - region_start >= max_region_size
-
-        if (max_exceeded or is_silence) and region_start:
-            if elapsed_time - region_start >= min_region_size:
-                regions.append((region_start, elapsed_time))
-                region_start = None
-
-        elif (not region_start) and (not is_silence):
-            region_start = elapsed_time
-        elapsed_time += chunk_duration
-    return regions
-
-
-def generate_subtitles( # pylint: disable=too-many-locals,too-many-arguments
-        source_path,
-        output=None,
-        concurrency=DEFAULT_CONCURRENCY,
-        src_language=DEFAULT_SRC_LANGUAGE,
-        dst_language=DEFAULT_DST_LANGUAGE,
-        subtitle_file_format=DEFAULT_SUBTITLE_FORMAT,
-        api_key=None,
-    ):
-    """
-    Given an input audio/video file, generate subtitles in the specified language and format.
-    """
-
-    if os.name != "nt" and "Darwin" in os.uname():
-        #the default unix fork method does not work on Mac OS
-        #need to use forkserver
-        if 'forkserver' != multiprocessing.get_start_method(allow_none=True):
-            multiprocessing.set_start_method('forkserver')
-
-    audio_filename, audio_rate = extract_audio(source_path)
-
-    regions = find_speech_regions(audio_filename)
-
-    pool = multiprocessing.Pool(concurrency)
-    converter = FLACConverter(source_path=audio_filename)
-    recognizer = SpeechRecognizer(language=src_language, rate=audio_rate,
-                                  api_key=GOOGLE_SPEECH_API_KEY)
-
-    transcripts = []
-    if regions:
-        try:
-            widgets = ["Converting speech regions to FLAC files: ", Percentage(), ' ', Bar(), ' ',
-                       ETA()]
-            pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start()
-            extracted_regions = []
-            for i, extracted_region in enumerate(pool.imap(converter, regions)):
-                extracted_regions.append(extracted_region)
-                pbar.update(i)
-            pbar.finish()
-
-            widgets = ["Performing speech recognition: ", Percentage(), ' ', Bar(), ' ', ETA()]
-            pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start()
-
-            for i, transcript in enumerate(pool.imap(recognizer, extracted_regions)):
-                transcripts.append(transcript)
-                pbar.update(i)
-            pbar.finish()
-
-            if src_language.split("-")[0] != dst_language.split("-")[0]:
-                if api_key:
-                    google_translate_api_key = api_key
-                    translator = Translator(dst_language, google_translate_api_key,
-                                            dst=dst_language,
-                                            src=src_language)
-                    prompt = "Translating from {0} to {1}: ".format(src_language, dst_language)
-                    widgets = [prompt, Percentage(), ' ', Bar(), ' ', ETA()]
-                    pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start()
-                    translated_transcripts = []
-                    for i, transcript in enumerate(pool.imap(translator, transcripts)):
-                        translated_transcripts.append(transcript)
-                        pbar.update(i)
-                    pbar.finish()
-                    transcripts = translated_transcripts
-                else:
-                    print(
-                        "Error: Subtitle translation requires specified Google Translate API key. "
-                        "See --help for further information."
-                    )
-                    return 1
-
-        except KeyboardInterrupt:
-            pbar.finish()
-            pool.terminate()
-            pool.join()
-            print("Cancelling transcription")
-            raise
-
-    timed_subtitles = [(r, t) for r, t in zip(regions, transcripts) if t]
-    formatter = FORMATTERS.get(subtitle_file_format)
-    formatted_subtitles = formatter(timed_subtitles)
-
-    dest = output
-
-    if not dest:
-        base = os.path.splitext(source_path)[0]
-        dest = "{base}.{format}".format(base=base, format=subtitle_file_format)
-
-    with open(dest, 'wb') as output_file:
-        output_file.write(formatted_subtitles.encode("utf-8"))
-
-    os.remove(audio_filename)
-
-    return dest
-
-
-def validate(args):
-    """
-    Check that the CLI arguments passed to autosub are valid.
-    """
-    if args.format not in FORMATTERS:
-        print(
-            "Subtitle format not supported. "
-            "Run with --list-formats to see all supported formats."
-        )
-        return False
-
-    if args.src_language not in LANGUAGE_CODES.keys():
-        print(
-            "Source language not supported. "
-            "Run with --list-languages to see all supported languages."
-        )
-        return False
-
-    if args.dst_language not in LANGUAGE_CODES.keys():
-        print(
-            "Destination language not supported. "
-            "Run with --list-languages to see all supported languages."
-        )
-        return False
-
-    if not args.source_path:
-        print("Error: You need to specify a source path.")
-        return False
-
-    return True
-
-
-def main():
-    """
-    Run autosub as a command-line program.
-    """
-    parser = argparse.ArgumentParser()
-    parser.add_argument('source_path', help="Path to the video or audio file to subtitle",
-                        nargs='?')
-    parser.add_argument('-C', '--concurrency', help="Number of concurrent API requests to make",
-                        type=int, default=DEFAULT_CONCURRENCY)
-    parser.add_argument('-o', '--output',
-                        help="Output path for subtitles (by default, subtitles are saved in \
-                        the same directory and name as the source path)")
-    parser.add_argument('-F', '--format', help="Destination subtitle format",
-                        default=DEFAULT_SUBTITLE_FORMAT)
-    parser.add_argument('-S', '--src-language', help="Language spoken in source file",
-                        default=DEFAULT_SRC_LANGUAGE)
-    parser.add_argument('-D', '--dst-language', help="Desired language for the subtitles",
-                        default=DEFAULT_DST_LANGUAGE)
-    parser.add_argument('-K', '--api-key',
-                        help="The Google Translate API key to be used. \
-                        (Required for subtitle translation)")
-    parser.add_argument('--list-formats', help="List all available subtitle formats",
-                        action='store_true')
-    parser.add_argument('--list-languages', help="List all available source/destination languages",
-                        action='store_true')
-
-    args = parser.parse_args()
-
-    if args.list_formats:
-        print("List of formats:")
-        for subtitle_format in FORMATTERS:
-            print("{format}".format(format=subtitle_format))
-        return 0
-
-    if args.list_languages:
-        print("List of all languages:")
-        for code, language in sorted(LANGUAGE_CODES.items()):
-            print("{code}\t{language}".format(code=code, language=language))
-        return 0
-
-    if not validate(args):
-        return 1
-
-    try:
-        subtitle_file_path = generate_subtitles(
-            source_path=args.source_path,
-            concurrency=args.concurrency,
-            src_language=args.src_language,
-            dst_language=args.dst_language,
-            api_key=args.api_key,
-            subtitle_file_format=args.format,
-            output=args.output,
-        )
-        print("Subtitles file created at {}".format(subtitle_file_path))
-    except KeyboardInterrupt:
-        return 1
-
-    return 0
-
-
-if __name__ == '__main__':
-    sys.exit(main())
+"""
+Defines autosub's main functionality.
+"""
+
+#!/usr/bin/env python
+
+from __future__ import absolute_import, print_function, unicode_literals
+
+import argparse
+import audioop
+import math
+import multiprocessing
+import os
+from json import JSONDecodeError
+import subprocess
+import sys
+import tempfile
+import wave
+
+import json
+import requests
+try:
+    from json.decoder import JSONDecodeError
+except ImportError:
+    JSONDecodeError = ValueError
+
+from googleapiclient.discovery import build
+from progressbar import ProgressBar, Percentage, Bar, ETA
+
+from autosub.constants import (
+    LANGUAGE_CODES, GOOGLE_SPEECH_API_KEY, GOOGLE_SPEECH_API_URL,
+)
+from autosub.formatters import FORMATTERS
+
+DEFAULT_SUBTITLE_FORMAT = 'srt'
+DEFAULT_CONCURRENCY = 10
+DEFAULT_SRC_LANGUAGE = 'en'
+DEFAULT_DST_LANGUAGE = 'en'
+
+
+def percentile(arr, percent):
+    """
+    Calculate the given percentile of arr.
+    """
+    arr = sorted(arr)
+    index = (len(arr) - 1) * percent
+    floor = math.floor(index)
+    ceil = math.ceil(index)
+    if floor == ceil:
+        return arr[int(index)]
+    low_value = arr[int(floor)] * (ceil - index)
+    high_value = arr[int(ceil)] * (index - floor)
+    return low_value + high_value
+
+
+class FLACConverter(object): # pylint: disable=too-few-public-methods
+    """
+    Class for converting a region of an input audio or video file into a FLAC audio file
+    """
+    def __init__(self, source_path, include_before=0.25, include_after=0.25):
+        self.source_path = source_path
+        self.include_before = include_before
+        self.include_after = include_after
+
+    def __call__(self, region):
+        try:
+            start, end = region
+            start = max(0, start - self.include_before)
+            end += self.include_after
+            #delete=False necessary for running on Windows
+            temp = tempfile.NamedTemporaryFile(suffix='.flac', delete=False)
+            program_ffmpeg = which("ffmpeg")
+            command = [str(program_ffmpeg), "-ss", str(start), "-t", str(end - start),
+                       "-y", "-i", self.source_path,
+                       "-loglevel", "error", temp.name]
+            use_shell = True if os.name == "nt" else False
+            subprocess.check_output(command, stdin=open(os.devnull), shell=use_shell)
+            read_data = temp.read()
+            temp.close()
+            os.unlink(temp.name)
+            return read_data
+
+        except KeyboardInterrupt:
+            return None
+
+
+class SpeechRecognizer(object): # pylint: disable=too-few-public-methods
+    """
+    Class for performing speech-to-text for an input FLAC file.
+    """
+    def __init__(self, language="en", rate=44100, retries=3, api_key=GOOGLE_SPEECH_API_KEY):
+        self.language = language
+        self.rate = rate
+        self.api_key = api_key
+        self.retries = retries
+
+    def __call__(self, data):
+        try:
+            for _ in range(self.retries):
+                url = GOOGLE_SPEECH_API_URL.format(lang=self.language, key=self.api_key)
+                headers = {"Content-Type": "audio/x-flac; rate=%d" % self.rate}
+
+                try:
+                    resp = requests.post(url, data=data, headers=headers)
+                except requests.exceptions.ConnectionError:
+                    continue
+
+                for line in resp.content.decode('utf-8').split("\n"):
+                    try:
+                        line = json.loads(line)
+                        line = line['result'][0]['alternative'][0]['transcript']
+                        return line[:1].upper() + line[1:]
+                    except IndexError:
+                        # no result
+                        continue
+                    except JSONDecodeError:
+                        continue
+
+        except KeyboardInterrupt:
+            return None
+
+
+class Translator(object): # pylint: disable=too-few-public-methods
+    """
+    Class for translating a sentence from a one language to another.
+    """
+    def __init__(self, language, api_key, src, dst):
+        self.language = language
+        self.api_key = api_key
+        self.service = build('translate', 'v2',
+                             developerKey=self.api_key)
+        self.src = src
+        self.dst = dst
+
+    def __call__(self, sentence):
+        try:
+            if not sentence:
+                return None
+
+            result = self.service.translations().list( # pylint: disable=no-member
+                source=self.src,
+                target=self.dst,
+                q=[sentence]
+            ).execute()
+
+            if 'translations' in result and result['translations'] and \
+                'translatedText' in result['translations'][0]:
+                return result['translations'][0]['translatedText']
+
+            return None
+
+        except KeyboardInterrupt:
+            return None
+
+
+def which(program):
+    """
+    Return the path for a given executable.
+    """
+    def is_exe(file_path):
+        """
+        Checks whether a file is executable.
+        """
+        return os.path.isfile(file_path) and os.access(file_path, os.X_OK)
+    #necessary to run on Windows
+    if os.name == "nt":
+        program += ".exe"
+    fpath, _ = os.path.split(program)
+    if fpath:
+        if is_exe(program):
+            return program
+    else:
+        #looks for file in the script execution folder before checking on system path
+        current_dir = os.getcwd()
+        local_program = os.path.join(current_dir, program)
+        if is_exe(local_program):
+            return local_program
+        else:
+            for path in os.environ["PATH"].split(os.pathsep):
+                path = path.strip('"')
+                exe_file = os.path.join(path, program)
+                if is_exe(exe_file):
+                    return exe_file
+    return None
+
+
+def extract_audio(filename, channels=1, rate=16000):
+    """
+    Extract audio from an input file to a temporary WAV file.
+    """
+    temp = tempfile.NamedTemporaryFile(suffix='.wav', delete=False)
+    if not os.path.isfile(filename):
+        print("The given file does not exist: {}".format(filename))
+        raise Exception("Invalid filepath: {}".format(filename))
+    program_ffmpeg = which("ffmpeg")
+    if not program_ffmpeg:
+        print("ffmpeg: Executable not found on machine.")
+        raise Exception("Dependency not found: ffmpeg")
+    command = [str(program_ffmpeg), "-y", "-i", filename,
+               "-ac", str(channels), "-ar", str(rate),
+               "-loglevel", "error", temp.name]
+    use_shell = True if os.name == "nt" else False
+    subprocess.check_output(command, stdin=open(os.devnull), shell=use_shell)
+    return temp.name, rate
+
+
+def find_speech_regions(filename, frame_width=4096, min_region_size=0.5, max_region_size=6): # pylint: disable=too-many-locals
+    """
+    Perform voice activity detection on a given audio file.
+    """
+    reader = wave.open(filename)
+    sample_width = reader.getsampwidth()
+    rate = reader.getframerate()
+    n_channels = reader.getnchannels()
+    chunk_duration = float(frame_width) / rate
+
+    n_chunks = int(math.ceil(reader.getnframes()*1.0 / frame_width))
+    energies = []
+
+    for _ in range(n_chunks):
+        chunk = reader.readframes(frame_width)
+        energies.append(audioop.rms(chunk, sample_width * n_channels))
+
+    threshold = percentile(energies, 0.2)
+
+    elapsed_time = 0
+
+    regions = []
+    region_start = None
+
+    for energy in energies:
+        is_silence = energy <= threshold
+        max_exceeded = region_start and elapsed_time - region_start >= max_region_size
+
+        if (max_exceeded or is_silence) and region_start:
+            if elapsed_time - region_start >= min_region_size:
+                regions.append((region_start, elapsed_time))
+                region_start = None
+
+        elif (not region_start) and (not is_silence):
+            region_start = elapsed_time
+        elapsed_time += chunk_duration
+    return regions
+
+
+def generate_subtitles( # pylint: disable=too-many-locals,too-many-arguments
+        source_path,
+        output=None,
+        concurrency=DEFAULT_CONCURRENCY,
+        src_language=DEFAULT_SRC_LANGUAGE,
+        dst_language=DEFAULT_DST_LANGUAGE,
+        subtitle_file_format=DEFAULT_SUBTITLE_FORMAT,
+        api_key=None,
+    ):
+    """
+    Given an input audio/video file, generate subtitles in the specified language and format.
+    """
+
+    if os.name != "nt" and "Darwin" in os.uname():
+        #the default unix fork method does not work on Mac OS
+        #need to use forkserver
+        if 'forkserver' != multiprocessing.get_start_method(allow_none=True):
+            multiprocessing.set_start_method('forkserver')
+
+    audio_filename, audio_rate = extract_audio(source_path)
+
+    regions = find_speech_regions(audio_filename)
+
+    pool = multiprocessing.Pool(concurrency)
+    converter = FLACConverter(source_path=audio_filename)
+    recognizer = SpeechRecognizer(language=src_language, rate=audio_rate,
+                                  api_key=GOOGLE_SPEECH_API_KEY)
+
+    transcripts = []
+    if regions:
+        try:
+            widgets = ["Converting speech regions to FLAC files: ", Percentage(), ' ', Bar(), ' ',
+                       ETA()]
+            pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start()
+            extracted_regions = []
+            for i, extracted_region in enumerate(pool.imap(converter, regions)):
+                extracted_regions.append(extracted_region)
+                pbar.update(i)
+            pbar.finish()
+
+            widgets = ["Performing speech recognition: ", Percentage(), ' ', Bar(), ' ', ETA()]
+            pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start()
+
+            for i, transcript in enumerate(pool.imap(recognizer, extracted_regions)):
+                transcripts.append(transcript)
+                pbar.update(i)
+            pbar.finish()
+
+            if src_language.split("-")[0] != dst_language.split("-")[0]:
+                if api_key:
+                    google_translate_api_key = api_key
+                    translator = Translator(dst_language, google_translate_api_key,
+                                            dst=dst_language,
+                                            src=src_language)
+                    prompt = "Translating from {0} to {1}: ".format(src_language, dst_language)
+                    widgets = [prompt, Percentage(), ' ', Bar(), ' ', ETA()]
+                    pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start()
+                    translated_transcripts = []
+                    for i, transcript in enumerate(pool.imap(translator, transcripts)):
+                        translated_transcripts.append(transcript)
+                        pbar.update(i)
+                    pbar.finish()
+                    transcripts = translated_transcripts
+                else:
+                    print(
+                        "Error: Subtitle translation requires specified Google Translate API key. "
+                        "See --help for further information."
+                    )
+                    return 1
+
+        except KeyboardInterrupt:
+            pbar.finish()
+            pool.terminate()
+            pool.join()
+            print("Cancelling transcription")
+            raise
+
+    timed_subtitles = [(r, t) for r, t in zip(regions, transcripts) if t]
+    formatter = FORMATTERS.get(subtitle_file_format)
+    formatted_subtitles = formatter(timed_subtitles)
+
+    dest = output
+
+    if not dest:
+        base = os.path.splitext(source_path)[0]
+        dest = "{base}.{format}".format(base=base, format=subtitle_file_format)
+
+    with open(dest, 'wb') as output_file:
+        output_file.write(formatted_subtitles.encode("utf-8"))
+
+    os.remove(audio_filename)
+
+    return dest
+
+
+def validate(args):
+    """
+    Check that the CLI arguments passed to autosub are valid.
+    """
+    if args.format not in FORMATTERS:
+        print(
+            "Subtitle format not supported. "
+            "Run with --list-formats to see all supported formats."
+        )
+        return False
+
+    if args.src_language not in LANGUAGE_CODES.keys():
+        print(
+            "Source language not supported. "
+            "Run with --list-languages to see all supported languages."
+        )
+        return False
+
+    if args.dst_language not in LANGUAGE_CODES.keys():
+        print(
+            "Destination language not supported. "
+            "Run with --list-languages to see all supported languages."
+        )
+        return False
+
+    if not args.source_path:
+        print("Error: You need to specify a source path.")
+        return False
+
+    return True
+
+
+def main():
+    """
+    Run autosub as a command-line program.
+    """
+    parser = argparse.ArgumentParser()
+    parser.add_argument('source_path', help="Path to the video or audio file to subtitle",
+                        nargs='?')
+    parser.add_argument('-C', '--concurrency', help="Number of concurrent API requests to make",
+                        type=int, default=DEFAULT_CONCURRENCY)
+    parser.add_argument('-o', '--output',
+                        help="Output path for subtitles (by default, subtitles are saved in \
+                        the same directory and name as the source path)")
+    parser.add_argument('-F', '--format', help="Destination subtitle format",
+                        default=DEFAULT_SUBTITLE_FORMAT)
+    parser.add_argument('-S', '--src-language', help="Language spoken in source file",
+                        default=DEFAULT_SRC_LANGUAGE)
+    parser.add_argument('-D', '--dst-language', help="Desired language for the subtitles",
+                        default=DEFAULT_DST_LANGUAGE)
+    parser.add_argument('-K', '--api-key',
+                        help="The Google Translate API key to be used. \
+                        (Required for subtitle translation)")
+    parser.add_argument('--list-formats', help="List all available subtitle formats",
+                        action='store_true')
+    parser.add_argument('--list-languages', help="List all available source/destination languages",
+                        action='store_true')
+
+    args = parser.parse_args()
+
+    if args.list_formats:
+        print("List of formats:")
+        for subtitle_format in FORMATTERS:
+            print("{format}".format(format=subtitle_format))
+        return 0
+
+    if args.list_languages:
+        print("List of all languages:")
+        for code, language in sorted(LANGUAGE_CODES.items()):
+            print("{code}\t{language}".format(code=code, language=language))
+        return 0
+
+    if not validate(args):
+        return 1
+
+    try:
+        subtitle_file_path = generate_subtitles(
+            source_path=args.source_path,
+            concurrency=args.concurrency,
+            src_language=args.src_language,
+            dst_language=args.dst_language,
+            api_key=args.api_key,
+            subtitle_file_format=args.format,
+            output=args.output,
+        )
+        print("Subtitles file created at {}".format(subtitle_file_path))
+    except KeyboardInterrupt:
+        return 1
+
+    return 0
+
+
+if __name__ == '__main__':
+    sys.exit(main())

+ 0 - 0
autosub/__pycache__/__init__.cpython-37.pyc → composer/autosub/__pycache__/__init__.cpython-37.pyc


+ 0 - 0
autosub/__pycache__/__init__.cpython-38.pyc → composer/autosub/__pycache__/__init__.cpython-38.pyc


+ 0 - 0
autosub/__pycache__/__init__.cpython-39.pyc → composer/autosub/__pycache__/__init__.cpython-39.pyc


+ 0 - 0
autosub/__pycache__/constants.cpython-37.pyc → composer/autosub/__pycache__/constants.cpython-37.pyc


+ 0 - 0
autosub/__pycache__/constants.cpython-38.pyc → composer/autosub/__pycache__/constants.cpython-38.pyc


+ 0 - 0
autosub/__pycache__/constants.cpython-39.pyc → composer/autosub/__pycache__/constants.cpython-39.pyc


+ 0 - 0
autosub/__pycache__/formatters.cpython-37.pyc → composer/autosub/__pycache__/formatters.cpython-37.pyc


+ 0 - 0
autosub/__pycache__/formatters.cpython-38.pyc → composer/autosub/__pycache__/formatters.cpython-38.pyc


+ 0 - 0
autosub/__pycache__/formatters.cpython-39.pyc → composer/autosub/__pycache__/formatters.cpython-39.pyc


+ 118 - 118
autosub/constants.py → composer/autosub/constants.py

@@ -1,118 +1,118 @@
-"""
-Defines constants used by autosub.s
-"""
-
-from __future__ import unicode_literals
-
-GOOGLE_SPEECH_API_KEY = "AIzaSyBOti4mM-6x9WDnZIjIeyEU21OpBXqWBgw"
-GOOGLE_SPEECH_API_URL = "http://www.google.com/speech-api/v2/recognize?client=chromium&lang={lang}&key={key}" # pylint: disable=line-too-long
-
-LANGUAGE_CODES = {
-    'af': 'Afrikaans',
-    'ar': 'Arabic',
-    'az': 'Azerbaijani',
-    'be': 'Belarusian',
-    'bg': 'Bulgarian',
-    'bn': 'Bengali',
-    'bs': 'Bosnian',
-    'ca': 'Catalan',
-    'ceb': 'Cebuano',
-    'cs': 'Czech',
-    'cy': 'Welsh',
-    'da': 'Danish',
-    'de': 'German',
-    'el': 'Greek',
-    'en-AU': 'English (Australia)',
-    'en-CA': 'English (Canada)',
-    'en-GB': 'English (United Kingdom)',
-    'en-IN': 'English (India)',
-    'en-IE': 'English (Ireland)',
-    'en-NZ': 'English (New Zealand)',
-    'en-PH': 'English (Philippines)',
-    'en-SG': 'English (Singapore)',
-    'en-US': 'English (United States)',
-    'eo': 'Esperanto',
-    'es-AR': 'Spanish (Argentina)',
-    'es-CL': 'Spanish (Chile)',
-    'es-ES': 'Spanish (Spain)',
-    'es-US': 'Spanish (United States)',
-    'es-MX': 'Spanish (Mexico)',
-    'es': 'Spanish',
-    'et': 'Estonian',
-    'eu': 'Basque',
-    'fa': 'Persian',
-    'fi': 'Finnish',
-    'fr': 'French',
-    'ga': 'Irish',
-    'gl': 'Galician',
-    'gu': 'Gujarati',
-    'ha': 'Hausa',
-    'hi': 'Hindi',
-    'hmn': 'Hmong',
-    'hr': 'Croatian',
-    'ht': 'Haitian Creole',
-    'hu': 'Hungarian',
-    'hy': 'Armenian',
-    'id': 'Indonesian',
-    'ig': 'Igbo',
-    'is': 'Icelandic',
-    'it': 'Italian',
-    'iw': 'Hebrew',
-    'ja': 'Japanese',
-    'jw': 'Javanese',
-    'ka': 'Georgian',
-    'kk': 'Kazakh',
-    'km': 'Khmer',
-    'kn': 'Kannada',
-    'ko': 'Korean',
-    'la': 'Latin',
-    'lo': 'Lao',
-    'lt': 'Lithuanian',
-    'lv': 'Latvian',
-    'mg': 'Malagasy',
-    'mi': 'Maori',
-    'mk': 'Macedonian',
-    'ml': 'Malayalam',
-    'mn': 'Mongolian',
-    'mr': 'Marathi',
-    'ms': 'Malay',
-    'mt': 'Maltese',
-    'my': 'Myanmar (Burmese)',
-    'ne': 'Nepali',
-    'nl': 'Dutch',
-    'no': 'Norwegian',
-    'ny': 'Chichewa',
-    'pa': 'Punjabi',
-    'pl': 'Polish',
-    'pt-BR': 'Portuguese (Brazil)',
-    'pt-PT': 'Portuguese (Portugal)',
-    'ro': 'Romanian',
-    'ru': 'Russian',
-    'si': 'Sinhala',
-    'sk': 'Slovak',
-    'sl': 'Slovenian',
-    'so': 'Somali',
-    'sq': 'Albanian',
-    'sr': 'Serbian',
-    'st': 'Sesotho',
-    'su': 'Sudanese',
-    'sv': 'Swedish',
-    'sw': 'Swahili',
-    'ta': 'Tamil',
-    'te': 'Telugu',
-    'tg': 'Tajik',
-    'th': 'Thai',
-    'tl': 'Filipino',
-    'tr': 'Turkish',
-    'uk': 'Ukrainian',
-    'ur': 'Urdu',
-    'uz': 'Uzbek',
-    'vi': 'Vietnamese',
-    'yi': 'Yiddish',
-    'yo': 'Yoruba',
-    'yue-Hant-HK': 'Cantonese, (Traditional HK)',
-    'zh': 'Chinese (Simplified, China)',
-    'zh-HK': 'Chinese (Simplified, Hong Kong)',
-    'zh-TW': 'Chinese (Traditional, Taiwan)',
-    'zu': 'Zulu',
-}
+"""
+Defines constants used by autosub.s
+"""
+
+from __future__ import unicode_literals
+
+GOOGLE_SPEECH_API_KEY = "AIzaSyBOti4mM-6x9WDnZIjIeyEU21OpBXqWBgw"
+GOOGLE_SPEECH_API_URL = "http://www.google.com/speech-api/v2/recognize?client=chromium&lang={lang}&key={key}" # pylint: disable=line-too-long
+
+LANGUAGE_CODES = {
+    'af': 'Afrikaans',
+    'ar': 'Arabic',
+    'az': 'Azerbaijani',
+    'be': 'Belarusian',
+    'bg': 'Bulgarian',
+    'bn': 'Bengali',
+    'bs': 'Bosnian',
+    'ca': 'Catalan',
+    'ceb': 'Cebuano',
+    'cs': 'Czech',
+    'cy': 'Welsh',
+    'da': 'Danish',
+    'de': 'German',
+    'el': 'Greek',
+    'en-AU': 'English (Australia)',
+    'en-CA': 'English (Canada)',
+    'en-GB': 'English (United Kingdom)',
+    'en-IN': 'English (India)',
+    'en-IE': 'English (Ireland)',
+    'en-NZ': 'English (New Zealand)',
+    'en-PH': 'English (Philippines)',
+    'en-SG': 'English (Singapore)',
+    'en-US': 'English (United States)',
+    'eo': 'Esperanto',
+    'es-AR': 'Spanish (Argentina)',
+    'es-CL': 'Spanish (Chile)',
+    'es-ES': 'Spanish (Spain)',
+    'es-US': 'Spanish (United States)',
+    'es-MX': 'Spanish (Mexico)',
+    'es': 'Spanish',
+    'et': 'Estonian',
+    'eu': 'Basque',
+    'fa': 'Persian',
+    'fi': 'Finnish',
+    'fr': 'French',
+    'ga': 'Irish',
+    'gl': 'Galician',
+    'gu': 'Gujarati',
+    'ha': 'Hausa',
+    'hi': 'Hindi',
+    'hmn': 'Hmong',
+    'hr': 'Croatian',
+    'ht': 'Haitian Creole',
+    'hu': 'Hungarian',
+    'hy': 'Armenian',
+    'id': 'Indonesian',
+    'ig': 'Igbo',
+    'is': 'Icelandic',
+    'it': 'Italian',
+    'iw': 'Hebrew',
+    'ja': 'Japanese',
+    'jw': 'Javanese',
+    'ka': 'Georgian',
+    'kk': 'Kazakh',
+    'km': 'Khmer',
+    'kn': 'Kannada',
+    'ko': 'Korean',
+    'la': 'Latin',
+    'lo': 'Lao',
+    'lt': 'Lithuanian',
+    'lv': 'Latvian',
+    'mg': 'Malagasy',
+    'mi': 'Maori',
+    'mk': 'Macedonian',
+    'ml': 'Malayalam',
+    'mn': 'Mongolian',
+    'mr': 'Marathi',
+    'ms': 'Malay',
+    'mt': 'Maltese',
+    'my': 'Myanmar (Burmese)',
+    'ne': 'Nepali',
+    'nl': 'Dutch',
+    'no': 'Norwegian',
+    'ny': 'Chichewa',
+    'pa': 'Punjabi',
+    'pl': 'Polish',
+    'pt-BR': 'Portuguese (Brazil)',
+    'pt-PT': 'Portuguese (Portugal)',
+    'ro': 'Romanian',
+    'ru': 'Russian',
+    'si': 'Sinhala',
+    'sk': 'Slovak',
+    'sl': 'Slovenian',
+    'so': 'Somali',
+    'sq': 'Albanian',
+    'sr': 'Serbian',
+    'st': 'Sesotho',
+    'su': 'Sudanese',
+    'sv': 'Swedish',
+    'sw': 'Swahili',
+    'ta': 'Tamil',
+    'te': 'Telugu',
+    'tg': 'Tajik',
+    'th': 'Thai',
+    'tl': 'Filipino',
+    'tr': 'Turkish',
+    'uk': 'Ukrainian',
+    'ur': 'Urdu',
+    'uz': 'Uzbek',
+    'vi': 'Vietnamese',
+    'yi': 'Yiddish',
+    'yo': 'Yoruba',
+    'yue-Hant-HK': 'Cantonese, (Traditional HK)',
+    'zh': 'Chinese (Simplified, China)',
+    'zh-HK': 'Chinese (Simplified, Hong Kong)',
+    'zh-TW': 'Chinese (Traditional, Taiwan)',
+    'zu': 'Zulu',
+}

+ 66 - 66
autosub/formatters.py → composer/autosub/formatters.py

@@ -1,66 +1,66 @@
-"""
-Defines subtitle formatters used by autosub.s
-"""
-
-# -*- coding: utf-8 -*-
-from __future__ import unicode_literals
-
-import json
-
-import pysrt
-import six
-
-
-def srt_formatter(subtitles, padding_before=0, padding_after=0):
-    """
-    Serialize a list of subtitles according to the SRT format, with optional time padding.
-    """
-    sub_rip_file = pysrt.SubRipFile()
-    for i, ((start, end), text) in enumerate(subtitles, start=1):
-        item = pysrt.SubRipItem()
-        item.index = i
-        item.text = six.text_type(text)
-        item.start.seconds = max(0, start - padding_before)
-        item.end.seconds = end + padding_after
-        sub_rip_file.append(item)
-    return '\n'.join(six.text_type(item) for item in sub_rip_file)
-
-
-def vtt_formatter(subtitles, padding_before=0, padding_after=0):
-    """
-    Serialize a list of subtitles according to the VTT format, with optional time padding.
-    """
-    text = srt_formatter(subtitles, padding_before, padding_after)
-    text = 'WEBVTT\n\n' + text.replace(',', '.')
-    return text
-
-
-def json_formatter(subtitles):
-    """
-    Serialize a list of subtitles as a JSON blob.
-    """
-    subtitle_dicts = [
-        {
-            'start': start,
-            'end': end,
-            'content': text,
-        }
-        for ((start, end), text)
-        in subtitles
-    ]
-    return json.dumps(subtitle_dicts)
-
-
-def raw_formatter(subtitles):
-    """
-    Serialize a list of subtitles as a newline-delimited string.
-    """
-    return ' '.join(text for (_rng, text) in subtitles)
-
-
-FORMATTERS = {
-    'srt': srt_formatter,
-    'vtt': vtt_formatter,
-    'json': json_formatter,
-    'raw': raw_formatter,
-}
+"""
+Defines subtitle formatters used by autosub.s
+"""
+
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+
+import json
+
+import pysrt
+import six
+
+
+def srt_formatter(subtitles, padding_before=0, padding_after=0):
+    """
+    Serialize a list of subtitles according to the SRT format, with optional time padding.
+    """
+    sub_rip_file = pysrt.SubRipFile()
+    for i, ((start, end), text) in enumerate(subtitles, start=1):
+        item = pysrt.SubRipItem()
+        item.index = i
+        item.text = six.text_type(text)
+        item.start.seconds = max(0, start - padding_before)
+        item.end.seconds = end + padding_after
+        sub_rip_file.append(item)
+    return '\n'.join(six.text_type(item) for item in sub_rip_file)
+
+
+def vtt_formatter(subtitles, padding_before=0, padding_after=0):
+    """
+    Serialize a list of subtitles according to the VTT format, with optional time padding.
+    """
+    text = srt_formatter(subtitles, padding_before, padding_after)
+    text = 'WEBVTT\n\n' + text.replace(',', '.')
+    return text
+
+
+def json_formatter(subtitles):
+    """
+    Serialize a list of subtitles as a JSON blob.
+    """
+    subtitle_dicts = [
+        {
+            'start': start,
+            'end': end,
+            'content': text,
+        }
+        for ((start, end), text)
+        in subtitles
+    ]
+    return json.dumps(subtitle_dicts)
+
+
+def raw_formatter(subtitles):
+    """
+    Serialize a list of subtitles as a newline-delimited string.
+    """
+    return ' '.join(text for (_rng, text) in subtitles)
+
+
+FORMATTERS = {
+    'srt': srt_formatter,
+    'vtt': vtt_formatter,
+    'json': json_formatter,
+    'raw': raw_formatter,
+}