3 lat temu · aedb2ffba1
--- a/composer/autosub/__init__-0.4.0.py
+++ b/composer/autosub/__init__-0.4.0.py
@@ -1,405 +1,405 @@
 
				-"""

			
 
				-Defines autosub's main functionality.

			
 
				-"""

			
 
				-

			
 
				-#!/usr/bin/env python

			
 
				-

			
 
				-from __future__ import absolute_import, print_function, unicode_literals

			
 
				-

			
 
				-import argparse

			
 
				-import audioop

			
 
				-import json

			
 
				-import math

			
 
				-import multiprocessing

			
 
				-import os

			
 
				-import subprocess

			
 
				-import sys

			
 
				-import tempfile

			
 
				-import wave

			
 
				-

			
 
				-import requests

			
 
				-from googleapiclient.discovery import build

			
 
				-from progressbar import ProgressBar, Percentage, Bar, ETA

			
 
				-

			
 
				-from autosub.constants import (

			
 
				-    LANGUAGE_CODES, GOOGLE_SPEECH_API_KEY, GOOGLE_SPEECH_API_URL,

			
 
				-)

			
 
				-from autosub.formatters import FORMATTERS

			
 
				-

			
 
				-DEFAULT_SUBTITLE_FORMAT = 'srt'

			
 
				-DEFAULT_CONCURRENCY = 10

			
 
				-DEFAULT_SRC_LANGUAGE = 'en'

			
 
				-DEFAULT_DST_LANGUAGE = 'en'

			
 
				-

			
 
				-

			
 
				-def percentile(arr, percent):

			
 
				-    """

			
 
				-    Calculate the given percentile of arr.

			
 
				-    """

			
 
				-    arr = sorted(arr)

			
 
				-    index = (len(arr) - 1) * percent

			
 
				-    floor = math.floor(index)

			
 
				-    ceil = math.ceil(index)

			
 
				-    if floor == ceil:

			
 
				-        return arr[int(index)]

			
 
				-    low_value = arr[int(floor)] * (ceil - index)

			
 
				-    high_value = arr[int(ceil)] * (index - floor)

			
 
				-    return low_value + high_value

			
 
				-

			
 
				-

			
 
				-class FLACConverter(object): # pylint: disable=too-few-public-methods

			
 
				-    """

			
 
				-    Class for converting a region of an input audio or video file into a FLAC audio file

			
 
				-    """

			
 
				-    def __init__(self, source_path, include_before=0.25, include_after=0.25):

			
 
				-        self.source_path = source_path

			
 
				-        self.include_before = include_before

			
 
				-        self.include_after = include_after

			
 
				-

			
 
				-    def __call__(self, region):

			
 
				-        try:

			
 
				-            start, end = region

			
 
				-            start = max(0, start - self.include_before)

			
 
				-            end += self.include_after

			
 
				-            temp = tempfile.NamedTemporaryFile(suffix='.flac')

			
 
				-            command = ["ffmpeg", "-ss", str(start), "-t", str(end - start),

			
 
				-                       "-y", "-i", self.source_path,

			
 
				-                       "-loglevel", "error", temp.name]

			
 
				-            use_shell = True if os.name == "nt" else False

			
 
				-            subprocess.check_output(command, stdin=open(os.devnull), shell=use_shell)

			
 
				-            return temp.read()

			
 
				-

			
 
				-        except KeyboardInterrupt:

			
 
				-            return None

			
 
				-

			
 
				-

			
 
				-class SpeechRecognizer(object): # pylint: disable=too-few-public-methods

			
 
				-    """

			
 
				-    Class for performing speech-to-text for an input FLAC file.

			
 
				-    """

			
 
				-    def __init__(self, language="en", rate=44100, retries=3, api_key=GOOGLE_SPEECH_API_KEY):

			
 
				-        self.language = language

			
 
				-        self.rate = rate

			
 
				-        self.api_key = api_key

			
 
				-        self.retries = retries

			
 
				-

			
 
				-    def __call__(self, data):

			
 
				-        try:

			
 
				-            for _ in range(self.retries):

			
 
				-                url = GOOGLE_SPEECH_API_URL.format(lang=self.language, key=self.api_key)

			
 
				-                headers = {"Content-Type": "audio/x-flac; rate=%d" % self.rate}

			
 
				-

			
 
				-                try:

			
 
				-                    resp = requests.post(url, data=data, headers=headers)

			
 
				-                except requests.exceptions.ConnectionError:

			
 
				-                    continue

			
 
				-

			
 
				-                for line in resp.content.decode('utf-8').split("\n"):

			
 
				-                    try:

			
 
				-                        line = json.loads(line)

			
 
				-                        line = line['result'][0]['alternative'][0]['transcript']

			
 
				-                        return line[:1].upper() + line[1:]

			
 
				-                    except IndexError:

			
 
				-                        # no result

			
 
				-                        continue

			
 
				-

			
 
				-        except KeyboardInterrupt:

			
 
				-            return None

			
 
				-

			
 
				-

			
 
				-class Translator(object): # pylint: disable=too-few-public-methods

			
 
				-    """

			
 
				-    Class for translating a sentence from a one language to another.

			
 
				-    """

			
 
				-    def __init__(self, language, api_key, src, dst):

			
 
				-        self.language = language

			
 
				-        self.api_key = api_key

			
 
				-        self.service = build('translate', 'v2',

			
 
				-                             developerKey=self.api_key)

			
 
				-        self.src = src

			
 
				-        self.dst = dst

			
 
				-

			
 
				-    def __call__(self, sentence):

			
 
				-        try:

			
 
				-            if not sentence:

			
 
				-                return None

			
 
				-

			
 
				-            result = self.service.translations().list( # pylint: disable=no-member

			
 
				-                source=self.src,

			
 
				-                target=self.dst,

			
 
				-                q=[sentence]

			
 
				-            ).execute()

			
 
				-

			
 
				-            if 'translations' in result and result['translations'] and \

			
 
				-                'translatedText' in result['translations'][0]:

			
 
				-                return result['translations'][0]['translatedText']

			
 
				-

			
 
				-            return None

			
 
				-

			
 
				-        except KeyboardInterrupt:

			
 
				-            return None

			
 
				-

			
 
				-

			
 
				-def which(program):

			
 
				-    """

			
 
				-    Return the path for a given executable.

			
 
				-    """

			
 
				-    def is_exe(file_path):

			
 
				-        """

			
 
				-        Checks whether a file is executable.

			
 
				-        """

			
 
				-        return os.path.isfile(file_path) and os.access(file_path, os.X_OK)

			
 
				-

			
 
				-    fpath, _ = os.path.split(program)

			
 
				-    if fpath:

			
 
				-        if is_exe(program):

			
 
				-            return program

			
 
				-    else:

			
 
				-        for path in os.environ["PATH"].split(os.pathsep):

			
 
				-            path = path.strip('"')

			
 
				-            exe_file = os.path.join(path, program)

			
 
				-            if is_exe(exe_file):

			
 
				-                return exe_file

			
 
				-    return None

			
 
				-

			
 
				-

			
 
				-def extract_audio(filename, channels=1, rate=16000):

			
 
				-    """

			
 
				-    Extract audio from an input file to a temporary WAV file.

			
 
				-    """

			
 
				-    temp = tempfile.NamedTemporaryFile(suffix='.wav', delete=False)

			
 
				-    if not os.path.isfile(filename):

			
 
				-        print("The given file does not exist: {}".format(filename))

			
 
				-        raise Exception("Invalid filepath: {}".format(filename))

			
 
				-    if not which("ffmpeg"):

			
 
				-        print("ffmpeg: Executable not found on machine.")

			
 
				-        raise Exception("Dependency not found: ffmpeg")

			
 
				-    command = ["ffmpeg", "-y", "-i", filename,

			
 
				-               "-ac", str(channels), "-ar", str(rate),

			
 
				-               "-loglevel", "error", temp.name]

			
 
				-    use_shell = True if os.name == "nt" else False

			
 
				-    subprocess.check_output(command, stdin=open(os.devnull), shell=use_shell)

			
 
				-    return temp.name, rate

			
 
				-

			
 
				-

			
 
				-def find_speech_regions(filename, frame_width=4096, min_region_size=0.5, max_region_size=6): # pylint: disable=too-many-locals

			
 
				-    """

			
 
				-    Perform voice activity detection on a given audio file.

			
 
				-    """

			
 
				-    reader = wave.open(filename)

			
 
				-    sample_width = reader.getsampwidth()

			
 
				-    rate = reader.getframerate()

			
 
				-    n_channels = reader.getnchannels()

			
 
				-    chunk_duration = float(frame_width) / rate

			
 
				-

			
 
				-    n_chunks = int(math.ceil(reader.getnframes()*1.0 / frame_width))

			
 
				-    energies = []

			
 
				-

			
 
				-    for _ in range(n_chunks):

			
 
				-        chunk = reader.readframes(frame_width)

			
 
				-        energies.append(audioop.rms(chunk, sample_width * n_channels))

			
 
				-

			
 
				-    threshold = percentile(energies, 0.2)

			
 
				-

			
 
				-    elapsed_time = 0

			
 
				-

			
 
				-    regions = []

			
 
				-    region_start = None

			
 
				-

			
 
				-    for energy in energies:

			
 
				-        is_silence = energy <= threshold

			
 
				-        max_exceeded = region_start and elapsed_time - region_start >= max_region_size

			
 
				-

			
 
				-        if (max_exceeded or is_silence) and region_start:

			
 
				-            if elapsed_time - region_start >= min_region_size:

			
 
				-                regions.append((region_start, elapsed_time))

			
 
				-                region_start = None

			
 
				-

			
 
				-        elif (not region_start) and (not is_silence):

			
 
				-            region_start = elapsed_time

			
 
				-        elapsed_time += chunk_duration

			
 
				-    return regions

			
 
				-

			
 
				-

			
 
				-def generate_subtitles( # pylint: disable=too-many-locals,too-many-arguments

			
 
				-        source_path,

			
 
				-        output=None,

			
 
				-        concurrency=DEFAULT_CONCURRENCY,

			
 
				-        src_language=DEFAULT_SRC_LANGUAGE,

			
 
				-        dst_language=DEFAULT_DST_LANGUAGE,

			
 
				-        subtitle_file_format=DEFAULT_SUBTITLE_FORMAT,

			
 
				-        api_key=None,

			
 
				-    ):

			
 
				-    """

			
 
				-    Given an input audio/video file, generate subtitles in the specified language and format.

			
 
				-    """

			
 
				-    audio_filename, audio_rate = extract_audio(source_path)

			
 
				-

			
 
				-    regions = find_speech_regions(audio_filename)

			
 
				-

			
 
				-    pool = multiprocessing.Pool(concurrency)

			
 
				-    converter = FLACConverter(source_path=audio_filename)

			
 
				-    recognizer = SpeechRecognizer(language=src_language, rate=audio_rate,

			
 
				-                                  api_key=GOOGLE_SPEECH_API_KEY)

			
 
				-

			
 
				-    transcripts = []

			
 
				-    if regions:

			
 
				-        try:

			
 
				-            widgets = ["Converting speech regions to FLAC files: ", Percentage(), ' ', Bar(), ' ',

			
 
				-                       ETA()]

			
 
				-            pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start()

			
 
				-            extracted_regions = []

			
 
				-            for i, extracted_region in enumerate(pool.imap(converter, regions)):

			
 
				-                extracted_regions.append(extracted_region)

			
 
				-                pbar.update(i)

			
 
				-            pbar.finish()

			
 
				-

			
 
				-            widgets = ["Performing speech recognition: ", Percentage(), ' ', Bar(), ' ', ETA()]

			
 
				-            pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start()

			
 
				-

			
 
				-            for i, transcript in enumerate(pool.imap(recognizer, extracted_regions)):

			
 
				-                transcripts.append(transcript)

			
 
				-                pbar.update(i)

			
 
				-            pbar.finish()

			
 
				-

			
 
				-            if src_language.split("-")[0] != dst_language.split("-")[0]:

			
 
				-                if api_key:

			
 
				-                    google_translate_api_key = api_key

			
 
				-                    translator = Translator(dst_language, google_translate_api_key,

			
 
				-                                            dst=dst_language,

			
 
				-                                            src=src_language)

			
 
				-                    prompt = "Translating from {0} to {1}: ".format(src_language, dst_language)

			
 
				-                    widgets = [prompt, Percentage(), ' ', Bar(), ' ', ETA()]

			
 
				-                    pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start()

			
 
				-                    translated_transcripts = []

			
 
				-                    for i, transcript in enumerate(pool.imap(translator, transcripts)):

			
 
				-                        translated_transcripts.append(transcript)

			
 
				-                        pbar.update(i)

			
 
				-                    pbar.finish()

			
 
				-                    transcripts = translated_transcripts

			
 
				-                else:

			
 
				-                    print(

			
 
				-                        "Error: Subtitle translation requires specified Google Translate API key. "

			
 
				-                        "See --help for further information."

			
 
				-                    )

			
 
				-                    return 1

			
 
				-

			
 
				-        except KeyboardInterrupt:

			
 
				-            pbar.finish()

			
 
				-            pool.terminate()

			
 
				-            pool.join()

			
 
				-            print("Cancelling transcription")

			
 
				-            raise

			
 
				-

			
 
				-    timed_subtitles = [(r, t) for r, t in zip(regions, transcripts) if t]

			
 
				-    formatter = FORMATTERS.get(subtitle_file_format)

			
 
				-    formatted_subtitles = formatter(timed_subtitles)

			
 
				-

			
 
				-    dest = output

			
 
				-

			
 
				-    if not dest:

			
 
				-        base = os.path.splitext(source_path)[0]

			
 
				-        dest = "{base}.{format}".format(base=base, format=subtitle_file_format)

			
 
				-

			
 
				-    with open(dest, 'wb') as output_file:

			
 
				-        output_file.write(formatted_subtitles.encode("utf-8"))

			
 
				-

			
 
				-    os.remove(audio_filename)

			
 
				-

			
 
				-    return dest

			
 
				-

			
 
				-

			
 
				-def validate(args):

			
 
				-    """

			
 
				-    Check that the CLI arguments passed to autosub are valid.

			
 
				-    """

			
 
				-    if args.format not in FORMATTERS:

			
 
				-        print(

			
 
				-            "Subtitle format not supported. "

			
 
				-            "Run with --list-formats to see all supported formats."

			
 
				-        )

			
 
				-        return False

			
 
				-

			
 
				-    if args.src_language not in LANGUAGE_CODES.keys():

			
 
				-        print(

			
 
				-            "Source language not supported. "

			
 
				-            "Run with --list-languages to see all supported languages."

			
 
				-        )

			
 
				-        return False

			
 
				-

			
 
				-    if args.dst_language not in LANGUAGE_CODES.keys():

			
 
				-        print(

			
 
				-            "Destination language not supported. "

			
 
				-            "Run with --list-languages to see all supported languages."

			
 
				-        )

			
 
				-        return False

			
 
				-

			
 
				-    if not args.source_path:

			
 
				-        print("Error: You need to specify a source path.")

			
 
				-        return False

			
 
				-

			
 
				-    return True

			
 
				-

			
 
				-

			
 
				-def main():

			
 
				-    """

			
 
				-    Run autosub as a command-line program.

			
 
				-    """

			
 
				-    parser = argparse.ArgumentParser()

			
 
				-    parser.add_argument('source_path', help="Path to the video or audio file to subtitle",

			
 
				-                        nargs='?')

			
 
				-    parser.add_argument('-C', '--concurrency', help="Number of concurrent API requests to make",

			
 
				-                        type=int, default=DEFAULT_CONCURRENCY)

			
 
				-    parser.add_argument('-o', '--output',

			
 
				-                        help="Output path for subtitles (by default, subtitles are saved in \

			
 
				-                        the same directory and name as the source path)")

			
 
				-    parser.add_argument('-F', '--format', help="Destination subtitle format",

			
 
				-                        default=DEFAULT_SUBTITLE_FORMAT)

			
 
				-    parser.add_argument('-S', '--src-language', help="Language spoken in source file",

			
 
				-                        default=DEFAULT_SRC_LANGUAGE)

			
 
				-    parser.add_argument('-D', '--dst-language', help="Desired language for the subtitles",

			
 
				-                        default=DEFAULT_DST_LANGUAGE)

			
 
				-    parser.add_argument('-K', '--api-key',

			
 
				-                        help="The Google Translate API key to be used. \

			
 
				-                        (Required for subtitle translation)")

			
 
				-    parser.add_argument('--list-formats', help="List all available subtitle formats",

			
 
				-                        action='store_true')

			
 
				-    parser.add_argument('--list-languages', help="List all available source/destination languages",

			
 
				-                        action='store_true')

			
 
				-

			
 
				-    args = parser.parse_args()

			
 
				-

			
 
				-    if args.list_formats:

			
 
				-        print("List of formats:")

			
 
				-        for subtitle_format in FORMATTERS:

			
 
				-            print("{format}".format(format=subtitle_format))

			
 
				-        return 0

			
 
				-

			
 
				-    if args.list_languages:

			
 
				-        print("List of all languages:")

			
 
				-        for code, language in sorted(LANGUAGE_CODES.items()):

			
 
				-            print("{code}\t{language}".format(code=code, language=language))

			
 
				-        return 0

			
 
				-

			
 
				-    if not validate(args):

			
 
				-        return 1

			
 
				-

			
 
				-    try:

			
 
				-        subtitle_file_path = generate_subtitles(

			
 
				-            source_path=args.source_path,

			
 
				-            concurrency=args.concurrency,

			
 
				-            src_language=args.src_language,

			
 
				-            dst_language=args.dst_language,

			
 
				-            api_key=args.api_key,

			
 
				-            subtitle_file_format=args.format,

			
 
				-            output=args.output,

			
 
				-        )

			
 
				-        print("Subtitles file created at {}".format(subtitle_file_path))

			
 
				-    except KeyboardInterrupt:

			
 
				-        return 1

			
 
				-

			
 
				-    return 0

			
 
				-

			
 
				-

			
 
				-if __name__ == '__main__':

			
 
				-    sys.exit(main())

			
 
				+"""
			
 
				+Defines autosub's main functionality.
			
 
				+"""
			
 
				+
			
 
				+#!/usr/bin/env python
			
 
				+
			
 
				+from __future__ import absolute_import, print_function, unicode_literals
			
 
				+
			
 
				+import argparse
			
 
				+import audioop
			
 
				+import json
			
 
				+import math
			
 
				+import multiprocessing
			
 
				+import os
			
 
				+import subprocess
			
 
				+import sys
			
 
				+import tempfile
			
 
				+import wave
			
 
				+
			
 
				+import requests
			
 
				+from googleapiclient.discovery import build
			
 
				+from progressbar import ProgressBar, Percentage, Bar, ETA
			
 
				+
			
 
				+from autosub.constants import (
			
 
				+    LANGUAGE_CODES, GOOGLE_SPEECH_API_KEY, GOOGLE_SPEECH_API_URL,
			
 
				+)
			
 
				+from autosub.formatters import FORMATTERS
			
 
				+
			
 
				+DEFAULT_SUBTITLE_FORMAT = 'srt'
			
 
				+DEFAULT_CONCURRENCY = 10
			
 
				+DEFAULT_SRC_LANGUAGE = 'en'
			
 
				+DEFAULT_DST_LANGUAGE = 'en'
			
 
				+
			
 
				+
			
 
				+def percentile(arr, percent):
			
 
				+    """
			
 
				+    Calculate the given percentile of arr.
			
 
				+    """
			
 
				+    arr = sorted(arr)
			
 
				+    index = (len(arr) - 1) * percent
			
 
				+    floor = math.floor(index)
			
 
				+    ceil = math.ceil(index)
			
 
				+    if floor == ceil:
			
 
				+        return arr[int(index)]
			
 
				+    low_value = arr[int(floor)] * (ceil - index)
			
 
				+    high_value = arr[int(ceil)] * (index - floor)
			
 
				+    return low_value + high_value
			
 
				+
			
 
				+
			
 
				+class FLACConverter(object): # pylint: disable=too-few-public-methods
			
 
				+    """
			
 
				+    Class for converting a region of an input audio or video file into a FLAC audio file
			
 
				+    """
			
 
				+    def __init__(self, source_path, include_before=0.25, include_after=0.25):
			
 
				+        self.source_path = source_path
			
 
				+        self.include_before = include_before
			
 
				+        self.include_after = include_after
			
 
				+
			
 
				+    def __call__(self, region):
			
 
				+        try:
			
 
				+            start, end = region
			
 
				+            start = max(0, start - self.include_before)
			
 
				+            end += self.include_after
			
 
				+            temp = tempfile.NamedTemporaryFile(suffix='.flac')
			
 
				+            command = ["ffmpeg", "-ss", str(start), "-t", str(end - start),
			
 
				+                       "-y", "-i", self.source_path,
			
 
				+                       "-loglevel", "error", temp.name]
			
 
				+            use_shell = True if os.name == "nt" else False
			
 
				+            subprocess.check_output(command, stdin=open(os.devnull), shell=use_shell)
			
 
				+            return temp.read()
			
 
				+
			
 
				+        except KeyboardInterrupt:
			
 
				+            return None
			
 
				+
			
 
				+
			
 
				+class SpeechRecognizer(object): # pylint: disable=too-few-public-methods
			
 
				+    """
			
 
				+    Class for performing speech-to-text for an input FLAC file.
			
 
				+    """
			
 
				+    def __init__(self, language="en", rate=44100, retries=3, api_key=GOOGLE_SPEECH_API_KEY):
			
 
				+        self.language = language
			
 
				+        self.rate = rate
			
 
				+        self.api_key = api_key
			
 
				+        self.retries = retries
			
 
				+
			
 
				+    def __call__(self, data):
			
 
				+        try:
			
 
				+            for _ in range(self.retries):
			
 
				+                url = GOOGLE_SPEECH_API_URL.format(lang=self.language, key=self.api_key)
			
 
				+                headers = {"Content-Type": "audio/x-flac; rate=%d" % self.rate}
			
 
				+
			
 
				+                try:
			
 
				+                    resp = requests.post(url, data=data, headers=headers)
			
 
				+                except requests.exceptions.ConnectionError:
			
 
				+                    continue
			
 
				+
			
 
				+                for line in resp.content.decode('utf-8').split("\n"):
			
 
				+                    try:
			
 
				+                        line = json.loads(line)
			
 
				+                        line = line['result'][0]['alternative'][0]['transcript']
			
 
				+                        return line[:1].upper() + line[1:]
			
 
				+                    except IndexError:
			
 
				+                        # no result
			
 
				+                        continue
			
 
				+
			
 
				+        except KeyboardInterrupt:
			
 
				+            return None
			
 
				+
			
 
				+
			
 
				+class Translator(object): # pylint: disable=too-few-public-methods
			
 
				+    """
			
 
				+    Class for translating a sentence from a one language to another.
			
 
				+    """
			
 
				+    def __init__(self, language, api_key, src, dst):
			
 
				+        self.language = language
			
 
				+        self.api_key = api_key
			
 
				+        self.service = build('translate', 'v2',
			
 
				+                             developerKey=self.api_key)
			
 
				+        self.src = src
			
 
				+        self.dst = dst
			
 
				+
			
 
				+    def __call__(self, sentence):
			
 
				+        try:
			
 
				+            if not sentence:
			
 
				+                return None
			
 
				+
			
 
				+            result = self.service.translations().list( # pylint: disable=no-member
			
 
				+                source=self.src,
			
 
				+                target=self.dst,
			
 
				+                q=[sentence]
			
 
				+            ).execute()
			
 
				+
			
 
				+            if 'translations' in result and result['translations'] and \
			
 
				+                'translatedText' in result['translations'][0]:
			
 
				+                return result['translations'][0]['translatedText']
			
 
				+
			
 
				+            return None
			
 
				+
			
 
				+        except KeyboardInterrupt:
			
 
				+            return None
			
 
				+
			
 
				+
			
 
				+def which(program):
			
 
				+    """
			
 
				+    Return the path for a given executable.
			
 
				+    """
			
 
				+    def is_exe(file_path):
			
 
				+        """
			
 
				+        Checks whether a file is executable.
			
 
				+        """
			
 
				+        return os.path.isfile(file_path) and os.access(file_path, os.X_OK)
			
 
				+
			
 
				+    fpath, _ = os.path.split(program)
			
 
				+    if fpath:
			
 
				+        if is_exe(program):
			
 
				+            return program
			
 
				+    else:
			
 
				+        for path in os.environ["PATH"].split(os.pathsep):
			
 
				+            path = path.strip('"')
			
 
				+            exe_file = os.path.join(path, program)
			
 
				+            if is_exe(exe_file):
			
 
				+                return exe_file
			
 
				+    return None
			
 
				+
			
 
				+
			
 
				+def extract_audio(filename, channels=1, rate=16000):
			
 
				+    """
			
 
				+    Extract audio from an input file to a temporary WAV file.
			
 
				+    """
			
 
				+    temp = tempfile.NamedTemporaryFile(suffix='.wav', delete=False)
			
 
				+    if not os.path.isfile(filename):
			
 
				+        print("The given file does not exist: {}".format(filename))
			
 
				+        raise Exception("Invalid filepath: {}".format(filename))
			
 
				+    if not which("ffmpeg"):
			
 
				+        print("ffmpeg: Executable not found on machine.")
			
 
				+        raise Exception("Dependency not found: ffmpeg")
			
 
				+    command = ["ffmpeg", "-y", "-i", filename,
			
 
				+               "-ac", str(channels), "-ar", str(rate),
			
 
				+               "-loglevel", "error", temp.name]
			
 
				+    use_shell = True if os.name == "nt" else False
			
 
				+    subprocess.check_output(command, stdin=open(os.devnull), shell=use_shell)
			
 
				+    return temp.name, rate
			
 
				+
			
 
				+
			
 
				+def find_speech_regions(filename, frame_width=4096, min_region_size=0.5, max_region_size=6): # pylint: disable=too-many-locals
			
 
				+    """
			
 
				+    Perform voice activity detection on a given audio file.
			
 
				+    """
			
 
				+    reader = wave.open(filename)
			
 
				+    sample_width = reader.getsampwidth()
			
 
				+    rate = reader.getframerate()
			
 
				+    n_channels = reader.getnchannels()
			
 
				+    chunk_duration = float(frame_width) / rate
			
 
				+
			
 
				+    n_chunks = int(math.ceil(reader.getnframes()*1.0 / frame_width))
			
 
				+    energies = []
			
 
				+
			
 
				+    for _ in range(n_chunks):
			
 
				+        chunk = reader.readframes(frame_width)
			
 
				+        energies.append(audioop.rms(chunk, sample_width * n_channels))
			
 
				+
			
 
				+    threshold = percentile(energies, 0.2)
			
 
				+
			
 
				+    elapsed_time = 0
			
 
				+
			
 
				+    regions = []
			
 
				+    region_start = None
			
 
				+
			
 
				+    for energy in energies:
			
 
				+        is_silence = energy <= threshold
			
 
				+        max_exceeded = region_start and elapsed_time - region_start >= max_region_size
			
 
				+
			
 
				+        if (max_exceeded or is_silence) and region_start:
			
 
				+            if elapsed_time - region_start >= min_region_size:
			
 
				+                regions.append((region_start, elapsed_time))
			
 
				+                region_start = None
			
 
				+
			
 
				+        elif (not region_start) and (not is_silence):
			
 
				+            region_start = elapsed_time
			
 
				+        elapsed_time += chunk_duration
			
 
				+    return regions
			
 
				+
			
 
				+
			
 
				+def generate_subtitles( # pylint: disable=too-many-locals,too-many-arguments
			
 
				+        source_path,
			
 
				+        output=None,
			
 
				+        concurrency=DEFAULT_CONCURRENCY,
			
 
				+        src_language=DEFAULT_SRC_LANGUAGE,
			
 
				+        dst_language=DEFAULT_DST_LANGUAGE,
			
 
				+        subtitle_file_format=DEFAULT_SUBTITLE_FORMAT,
			
 
				+        api_key=None,
			
 
				+    ):
			
 
				+    """
			
 
				+    Given an input audio/video file, generate subtitles in the specified language and format.
			
 
				+    """
			
 
				+    audio_filename, audio_rate = extract_audio(source_path)
			
 
				+
			
 
				+    regions = find_speech_regions(audio_filename)
			
 
				+
			
 
				+    pool = multiprocessing.Pool(concurrency)
			
 
				+    converter = FLACConverter(source_path=audio_filename)
			
 
				+    recognizer = SpeechRecognizer(language=src_language, rate=audio_rate,
			
 
				+                                  api_key=GOOGLE_SPEECH_API_KEY)
			
 
				+
			
 
				+    transcripts = []
			
 
				+    if regions:
			
 
				+        try:
			
 
				+            widgets = ["Converting speech regions to FLAC files: ", Percentage(), ' ', Bar(), ' ',
			
 
				+                       ETA()]
			
 
				+            pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start()
			
 
				+            extracted_regions = []
			
 
				+            for i, extracted_region in enumerate(pool.imap(converter, regions)):
			
 
				+                extracted_regions.append(extracted_region)
			
 
				+                pbar.update(i)
			
 
				+            pbar.finish()
			
 
				+
			
 
				+            widgets = ["Performing speech recognition: ", Percentage(), ' ', Bar(), ' ', ETA()]
			
 
				+            pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start()
			
 
				+
			
 
				+            for i, transcript in enumerate(pool.imap(recognizer, extracted_regions)):
			
 
				+                transcripts.append(transcript)
			
 
				+                pbar.update(i)
			
 
				+            pbar.finish()
			
 
				+
			
 
				+            if src_language.split("-")[0] != dst_language.split("-")[0]:
			
 
				+                if api_key:
			
 
				+                    google_translate_api_key = api_key
			
 
				+                    translator = Translator(dst_language, google_translate_api_key,
			
 
				+                                            dst=dst_language,
			
 
				+                                            src=src_language)
			
 
				+                    prompt = "Translating from {0} to {1}: ".format(src_language, dst_language)
			
 
				+                    widgets = [prompt, Percentage(), ' ', Bar(), ' ', ETA()]
			
 
				+                    pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start()
			
 
				+                    translated_transcripts = []
			
 
				+                    for i, transcript in enumerate(pool.imap(translator, transcripts)):
			
 
				+                        translated_transcripts.append(transcript)
			
 
				+                        pbar.update(i)
			
 
				+                    pbar.finish()
			
 
				+                    transcripts = translated_transcripts
			
 
				+                else:
			
 
				+                    print(
			
 
				+                        "Error: Subtitle translation requires specified Google Translate API key. "
			
 
				+                        "See --help for further information."
			
 
				+                    )
			
 
				+                    return 1
			
 
				+
			
 
				+        except KeyboardInterrupt:
			
 
				+            pbar.finish()
			
 
				+            pool.terminate()
			
 
				+            pool.join()
			
 
				+            print("Cancelling transcription")
			
 
				+            raise
			
 
				+
			
 
				+    timed_subtitles = [(r, t) for r, t in zip(regions, transcripts) if t]
			
 
				+    formatter = FORMATTERS.get(subtitle_file_format)
			
 
				+    formatted_subtitles = formatter(timed_subtitles)
			
 
				+
			
 
				+    dest = output
			
 
				+
			
 
				+    if not dest:
			
 
				+        base = os.path.splitext(source_path)[0]
			
 
				+        dest = "{base}.{format}".format(base=base, format=subtitle_file_format)
			
 
				+
			
 
				+    with open(dest, 'wb') as output_file:
			
 
				+        output_file.write(formatted_subtitles.encode("utf-8"))
			
 
				+
			
 
				+    os.remove(audio_filename)
			
 
				+
			
 
				+    return dest
			
 
				+
			
 
				+
			
 
				+def validate(args):
			
 
				+    """
			
 
				+    Check that the CLI arguments passed to autosub are valid.
			
 
				+    """
			
 
				+    if args.format not in FORMATTERS:
			
 
				+        print(
			
 
				+            "Subtitle format not supported. "
			
 
				+            "Run with --list-formats to see all supported formats."
			
 
				+        )
			
 
				+        return False
			
 
				+
			
 
				+    if args.src_language not in LANGUAGE_CODES.keys():
			
 
				+        print(
			
 
				+            "Source language not supported. "
			
 
				+            "Run with --list-languages to see all supported languages."
			
 
				+        )
			
 
				+        return False
			
 
				+
			
 
				+    if args.dst_language not in LANGUAGE_CODES.keys():
			
 
				+        print(
			
 
				+            "Destination language not supported. "
			
 
				+            "Run with --list-languages to see all supported languages."
			
 
				+        )
			
 
				+        return False
			
 
				+
			
 
				+    if not args.source_path:
			
 
				+        print("Error: You need to specify a source path.")
			
 
				+        return False
			
 
				+
			
 
				+    return True
			
 
				+
			
 
				+
			
 
				+def main():
			
 
				+    """
			
 
				+    Run autosub as a command-line program.
			
 
				+    """
			
 
				+    parser = argparse.ArgumentParser()
			
 
				+    parser.add_argument('source_path', help="Path to the video or audio file to subtitle",
			
 
				+                        nargs='?')
			
 
				+    parser.add_argument('-C', '--concurrency', help="Number of concurrent API requests to make",
			
 
				+                        type=int, default=DEFAULT_CONCURRENCY)
			
 
				+    parser.add_argument('-o', '--output',
			
 
				+                        help="Output path for subtitles (by default, subtitles are saved in \
			
 
				+                        the same directory and name as the source path)")
			
 
				+    parser.add_argument('-F', '--format', help="Destination subtitle format",
			
 
				+                        default=DEFAULT_SUBTITLE_FORMAT)
			
 
				+    parser.add_argument('-S', '--src-language', help="Language spoken in source file",
			
 
				+                        default=DEFAULT_SRC_LANGUAGE)
			
 
				+    parser.add_argument('-D', '--dst-language', help="Desired language for the subtitles",
			
 
				+                        default=DEFAULT_DST_LANGUAGE)
			
 
				+    parser.add_argument('-K', '--api-key',
			
 
				+                        help="The Google Translate API key to be used. \
			
 
				+                        (Required for subtitle translation)")
			
 
				+    parser.add_argument('--list-formats', help="List all available subtitle formats",
			
 
				+                        action='store_true')
			
 
				+    parser.add_argument('--list-languages', help="List all available source/destination languages",
			
 
				+                        action='store_true')
			
 
				+
			
 
				+    args = parser.parse_args()
			
 
				+
			
 
				+    if args.list_formats:
			
 
				+        print("List of formats:")
			
 
				+        for subtitle_format in FORMATTERS:
			
 
				+            print("{format}".format(format=subtitle_format))
			
 
				+        return 0
			
 
				+
			
 
				+    if args.list_languages:
			
 
				+        print("List of all languages:")
			
 
				+        for code, language in sorted(LANGUAGE_CODES.items()):
			
 
				+            print("{code}\t{language}".format(code=code, language=language))
			
 
				+        return 0
			
 
				+
			
 
				+    if not validate(args):
			
 
				+        return 1
			
 
				+
			
 
				+    try:
			
 
				+        subtitle_file_path = generate_subtitles(
			
 
				+            source_path=args.source_path,
			
 
				+            concurrency=args.concurrency,
			
 
				+            src_language=args.src_language,
			
 
				+            dst_language=args.dst_language,
			
 
				+            api_key=args.api_key,
			
 
				+            subtitle_file_format=args.format,
			
 
				+            output=args.output,
			
 
				+        )
			
 
				+        print("Subtitles file created at {}".format(subtitle_file_path))
			
 
				+    except KeyboardInterrupt:
			
 
				+        return 1
			
 
				+
			
 
				+    return 0
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    sys.exit(main())
			
--- a/composer/autosub/__init__.py
+++ b/composer/autosub/__init__.py
@@ -1,434 +1,434 @@
 
				-"""

			
 
				-Defines autosub's main functionality.

			
 
				-"""

			
 
				-

			
 
				-#!/usr/bin/env python

			
 
				-

			
 
				-from __future__ import absolute_import, print_function, unicode_literals

			
 
				-

			
 
				-import argparse

			
 
				-import audioop

			
 
				-import math

			
 
				-import multiprocessing

			
 
				-import os

			
 
				-from json import JSONDecodeError

			
 
				-import subprocess

			
 
				-import sys

			
 
				-import tempfile

			
 
				-import wave

			
 
				-

			
 
				-import json

			
 
				-import requests

			
 
				-try:

			
 
				-    from json.decoder import JSONDecodeError

			
 
				-except ImportError:

			
 
				-    JSONDecodeError = ValueError

			
 
				-

			
 
				-from googleapiclient.discovery import build

			
 
				-from progressbar import ProgressBar, Percentage, Bar, ETA

			
 
				-

			
 
				-from autosub.constants import (

			
 
				-    LANGUAGE_CODES, GOOGLE_SPEECH_API_KEY, GOOGLE_SPEECH_API_URL,

			
 
				-)

			
 
				-from autosub.formatters import FORMATTERS

			
 
				-

			
 
				-DEFAULT_SUBTITLE_FORMAT = 'srt'

			
 
				-DEFAULT_CONCURRENCY = 10

			
 
				-DEFAULT_SRC_LANGUAGE = 'en'

			
 
				-DEFAULT_DST_LANGUAGE = 'en'

			
 
				-

			
 
				-

			
 
				-def percentile(arr, percent):

			
 
				-    """

			
 
				-    Calculate the given percentile of arr.

			
 
				-    """

			
 
				-    arr = sorted(arr)

			
 
				-    index = (len(arr) - 1) * percent

			
 
				-    floor = math.floor(index)

			
 
				-    ceil = math.ceil(index)

			
 
				-    if floor == ceil:

			
 
				-        return arr[int(index)]

			
 
				-    low_value = arr[int(floor)] * (ceil - index)

			
 
				-    high_value = arr[int(ceil)] * (index - floor)

			
 
				-    return low_value + high_value

			
 
				-

			
 
				-

			
 
				-class FLACConverter(object): # pylint: disable=too-few-public-methods

			
 
				-    """

			
 
				-    Class for converting a region of an input audio or video file into a FLAC audio file

			
 
				-    """

			
 
				-    def __init__(self, source_path, include_before=0.25, include_after=0.25):

			
 
				-        self.source_path = source_path

			
 
				-        self.include_before = include_before

			
 
				-        self.include_after = include_after

			
 
				-

			
 
				-    def __call__(self, region):

			
 
				-        try:

			
 
				-            start, end = region

			
 
				-            start = max(0, start - self.include_before)

			
 
				-            end += self.include_after

			
 
				-            #delete=False necessary for running on Windows

			
 
				-            temp = tempfile.NamedTemporaryFile(suffix='.flac', delete=False)

			
 
				-            program_ffmpeg = which("ffmpeg")

			
 
				-            command = [str(program_ffmpeg), "-ss", str(start), "-t", str(end - start),

			
 
				-                       "-y", "-i", self.source_path,

			
 
				-                       "-loglevel", "error", temp.name]

			
 
				-            use_shell = True if os.name == "nt" else False

			
 
				-            subprocess.check_output(command, stdin=open(os.devnull), shell=use_shell)

			
 
				-            read_data = temp.read()

			
 
				-            temp.close()

			
 
				-            os.unlink(temp.name)

			
 
				-            return read_data

			
 
				-

			
 
				-        except KeyboardInterrupt:

			
 
				-            return None

			
 
				-

			
 
				-

			
 
				-class SpeechRecognizer(object): # pylint: disable=too-few-public-methods

			
 
				-    """

			
 
				-    Class for performing speech-to-text for an input FLAC file.

			
 
				-    """

			
 
				-    def __init__(self, language="en", rate=44100, retries=3, api_key=GOOGLE_SPEECH_API_KEY):

			
 
				-        self.language = language

			
 
				-        self.rate = rate

			
 
				-        self.api_key = api_key

			
 
				-        self.retries = retries

			
 
				-

			
 
				-    def __call__(self, data):

			
 
				-        try:

			
 
				-            for _ in range(self.retries):

			
 
				-                url = GOOGLE_SPEECH_API_URL.format(lang=self.language, key=self.api_key)

			
 
				-                headers = {"Content-Type": "audio/x-flac; rate=%d" % self.rate}

			
 
				-

			
 
				-                try:

			
 
				-                    resp = requests.post(url, data=data, headers=headers)

			
 
				-                except requests.exceptions.ConnectionError:

			
 
				-                    continue

			
 
				-

			
 
				-                for line in resp.content.decode('utf-8').split("\n"):

			
 
				-                    try:

			
 
				-                        line = json.loads(line)

			
 
				-                        line = line['result'][0]['alternative'][0]['transcript']

			
 
				-                        return line[:1].upper() + line[1:]

			
 
				-                    except IndexError:

			
 
				-                        # no result

			
 
				-                        continue

			
 
				-                    except JSONDecodeError:

			
 
				-                        continue

			
 
				-

			
 
				-        except KeyboardInterrupt:

			
 
				-            return None

			
 
				-

			
 
				-

			
 
				-class Translator(object): # pylint: disable=too-few-public-methods

			
 
				-    """

			
 
				-    Class for translating a sentence from a one language to another.

			
 
				-    """

			
 
				-    def __init__(self, language, api_key, src, dst):

			
 
				-        self.language = language

			
 
				-        self.api_key = api_key

			
 
				-        self.service = build('translate', 'v2',

			
 
				-                             developerKey=self.api_key)

			
 
				-        self.src = src

			
 
				-        self.dst = dst

			
 
				-

			
 
				-    def __call__(self, sentence):

			
 
				-        try:

			
 
				-            if not sentence:

			
 
				-                return None

			
 
				-

			
 
				-            result = self.service.translations().list( # pylint: disable=no-member

			
 
				-                source=self.src,

			
 
				-                target=self.dst,

			
 
				-                q=[sentence]

			
 
				-            ).execute()

			
 
				-

			
 
				-            if 'translations' in result and result['translations'] and \

			
 
				-                'translatedText' in result['translations'][0]:

			
 
				-                return result['translations'][0]['translatedText']

			
 
				-

			
 
				-            return None

			
 
				-

			
 
				-        except KeyboardInterrupt:

			
 
				-            return None

			
 
				-

			
 
				-

			
 
				-def which(program):

			
 
				-    """

			
 
				-    Return the path for a given executable.

			
 
				-    """

			
 
				-    def is_exe(file_path):

			
 
				-        """

			
 
				-        Checks whether a file is executable.

			
 
				-        """

			
 
				-        return os.path.isfile(file_path) and os.access(file_path, os.X_OK)

			
 
				-    #necessary to run on Windows

			
 
				-    if os.name == "nt":

			
 
				-        program += ".exe"

			
 
				-    fpath, _ = os.path.split(program)

			
 
				-    if fpath:

			
 
				-        if is_exe(program):

			
 
				-            return program

			
 
				-    else:

			
 
				-        #looks for file in the script execution folder before checking on system path

			
 
				-        current_dir = os.getcwd()

			
 
				-        local_program = os.path.join(current_dir, program)

			
 
				-        if is_exe(local_program):

			
 
				-            return local_program

			
 
				-        else:

			
 
				-            for path in os.environ["PATH"].split(os.pathsep):

			
 
				-                path = path.strip('"')

			
 
				-                exe_file = os.path.join(path, program)

			
 
				-                if is_exe(exe_file):

			
 
				-                    return exe_file

			
 
				-    return None

			
 
				-

			
 
				-

			
 
				-def extract_audio(filename, channels=1, rate=16000):

			
 
				-    """

			
 
				-    Extract audio from an input file to a temporary WAV file.

			
 
				-    """

			
 
				-    temp = tempfile.NamedTemporaryFile(suffix='.wav', delete=False)

			
 
				-    if not os.path.isfile(filename):

			
 
				-        print("The given file does not exist: {}".format(filename))

			
 
				-        raise Exception("Invalid filepath: {}".format(filename))

			
 
				-    program_ffmpeg = which("ffmpeg")

			
 
				-    if not program_ffmpeg:

			
 
				-        print("ffmpeg: Executable not found on machine.")

			
 
				-        raise Exception("Dependency not found: ffmpeg")

			
 
				-    command = [str(program_ffmpeg), "-y", "-i", filename,

			
 
				-               "-ac", str(channels), "-ar", str(rate),

			
 
				-               "-loglevel", "error", temp.name]

			
 
				-    use_shell = True if os.name == "nt" else False

			
 
				-    subprocess.check_output(command, stdin=open(os.devnull), shell=use_shell)

			
 
				-    return temp.name, rate

			
 
				-

			
 
				-

			
 
				-def find_speech_regions(filename, frame_width=4096, min_region_size=0.5, max_region_size=6): # pylint: disable=too-many-locals

			
 
				-    """

			
 
				-    Perform voice activity detection on a given audio file.

			
 
				-    """

			
 
				-    reader = wave.open(filename)

			
 
				-    sample_width = reader.getsampwidth()

			
 
				-    rate = reader.getframerate()

			
 
				-    n_channels = reader.getnchannels()

			
 
				-    chunk_duration = float(frame_width) / rate

			
 
				-

			
 
				-    n_chunks = int(math.ceil(reader.getnframes()*1.0 / frame_width))

			
 
				-    energies = []

			
 
				-

			
 
				-    for _ in range(n_chunks):

			
 
				-        chunk = reader.readframes(frame_width)

			
 
				-        energies.append(audioop.rms(chunk, sample_width * n_channels))

			
 
				-

			
 
				-    threshold = percentile(energies, 0.2)

			
 
				-

			
 
				-    elapsed_time = 0

			
 
				-

			
 
				-    regions = []

			
 
				-    region_start = None

			
 
				-

			
 
				-    for energy in energies:

			
 
				-        is_silence = energy <= threshold

			
 
				-        max_exceeded = region_start and elapsed_time - region_start >= max_region_size

			
 
				-

			
 
				-        if (max_exceeded or is_silence) and region_start:

			
 
				-            if elapsed_time - region_start >= min_region_size:

			
 
				-                regions.append((region_start, elapsed_time))

			
 
				-                region_start = None

			
 
				-

			
 
				-        elif (not region_start) and (not is_silence):

			
 
				-            region_start = elapsed_time

			
 
				-        elapsed_time += chunk_duration

			
 
				-    return regions

			
 
				-

			
 
				-

			
 
				-def generate_subtitles( # pylint: disable=too-many-locals,too-many-arguments

			
 
				-        source_path,

			
 
				-        output=None,

			
 
				-        concurrency=DEFAULT_CONCURRENCY,

			
 
				-        src_language=DEFAULT_SRC_LANGUAGE,

			
 
				-        dst_language=DEFAULT_DST_LANGUAGE,

			
 
				-        subtitle_file_format=DEFAULT_SUBTITLE_FORMAT,

			
 
				-        api_key=None,

			
 
				-    ):

			
 
				-    """

			
 
				-    Given an input audio/video file, generate subtitles in the specified language and format.

			
 
				-    """

			
 
				-

			
 
				-    if os.name != "nt" and "Darwin" in os.uname():

			
 
				-        #the default unix fork method does not work on Mac OS

			
 
				-        #need to use forkserver

			
 
				-        if 'forkserver' != multiprocessing.get_start_method(allow_none=True):

			
 
				-            multiprocessing.set_start_method('forkserver')

			
 
				-

			
 
				-    audio_filename, audio_rate = extract_audio(source_path)

			
 
				-

			
 
				-    regions = find_speech_regions(audio_filename)

			
 
				-

			
 
				-    pool = multiprocessing.Pool(concurrency)

			
 
				-    converter = FLACConverter(source_path=audio_filename)

			
 
				-    recognizer = SpeechRecognizer(language=src_language, rate=audio_rate,

			
 
				-                                  api_key=GOOGLE_SPEECH_API_KEY)

			
 
				-

			
 
				-    transcripts = []

			
 
				-    if regions:

			
 
				-        try:

			
 
				-            widgets = ["Converting speech regions to FLAC files: ", Percentage(), ' ', Bar(), ' ',

			
 
				-                       ETA()]

			
 
				-            pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start()

			
 
				-            extracted_regions = []

			
 
				-            for i, extracted_region in enumerate(pool.imap(converter, regions)):

			
 
				-                extracted_regions.append(extracted_region)

			
 
				-                pbar.update(i)

			
 
				-            pbar.finish()

			
 
				-

			
 
				-            widgets = ["Performing speech recognition: ", Percentage(), ' ', Bar(), ' ', ETA()]

			
 
				-            pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start()

			
 
				-

			
 
				-            for i, transcript in enumerate(pool.imap(recognizer, extracted_regions)):

			
 
				-                transcripts.append(transcript)

			
 
				-                pbar.update(i)

			
 
				-            pbar.finish()

			
 
				-

			
 
				-            if src_language.split("-")[0] != dst_language.split("-")[0]:

			
 
				-                if api_key:

			
 
				-                    google_translate_api_key = api_key

			
 
				-                    translator = Translator(dst_language, google_translate_api_key,

			
 
				-                                            dst=dst_language,

			
 
				-                                            src=src_language)

			
 
				-                    prompt = "Translating from {0} to {1}: ".format(src_language, dst_language)

			
 
				-                    widgets = [prompt, Percentage(), ' ', Bar(), ' ', ETA()]

			
 
				-                    pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start()

			
 
				-                    translated_transcripts = []

			
 
				-                    for i, transcript in enumerate(pool.imap(translator, transcripts)):

			
 
				-                        translated_transcripts.append(transcript)

			
 
				-                        pbar.update(i)

			
 
				-                    pbar.finish()

			
 
				-                    transcripts = translated_transcripts

			
 
				-                else:

			
 
				-                    print(

			
 
				-                        "Error: Subtitle translation requires specified Google Translate API key. "

			
 
				-                        "See --help for further information."

			
 
				-                    )

			
 
				-                    return 1

			
 
				-

			
 
				-        except KeyboardInterrupt:

			
 
				-            pbar.finish()

			
 
				-            pool.terminate()

			
 
				-            pool.join()

			
 
				-            print("Cancelling transcription")

			
 
				-            raise

			
 
				-

			
 
				-    timed_subtitles = [(r, t) for r, t in zip(regions, transcripts) if t]

			
 
				-    formatter = FORMATTERS.get(subtitle_file_format)

			
 
				-    formatted_subtitles = formatter(timed_subtitles)

			
 
				-

			
 
				-    dest = output

			
 
				-

			
 
				-    if not dest:

			
 
				-        base = os.path.splitext(source_path)[0]

			
 
				-        dest = "{base}.{format}".format(base=base, format=subtitle_file_format)

			
 
				-

			
 
				-    with open(dest, 'wb') as output_file:

			
 
				-        output_file.write(formatted_subtitles.encode("utf-8"))

			
 
				-

			
 
				-    os.remove(audio_filename)

			
 
				-

			
 
				-    return dest

			
 
				-

			
 
				-

			
 
				-def validate(args):

			
 
				-    """

			
 
				-    Check that the CLI arguments passed to autosub are valid.

			
 
				-    """

			
 
				-    if args.format not in FORMATTERS:

			
 
				-        print(

			
 
				-            "Subtitle format not supported. "

			
 
				-            "Run with --list-formats to see all supported formats."

			
 
				-        )

			
 
				-        return False

			
 
				-

			
 
				-    if args.src_language not in LANGUAGE_CODES.keys():

			
 
				-        print(

			
 
				-            "Source language not supported. "

			
 
				-            "Run with --list-languages to see all supported languages."

			
 
				-        )

			
 
				-        return False

			
 
				-

			
 
				-    if args.dst_language not in LANGUAGE_CODES.keys():

			
 
				-        print(

			
 
				-            "Destination language not supported. "

			
 
				-            "Run with --list-languages to see all supported languages."

			
 
				-        )

			
 
				-        return False

			
 
				-

			
 
				-    if not args.source_path:

			
 
				-        print("Error: You need to specify a source path.")

			
 
				-        return False

			
 
				-

			
 
				-    return True

			
 
				-

			
 
				-

			
 
				-def main():

			
 
				-    """

			
 
				-    Run autosub as a command-line program.

			
 
				-    """

			
 
				-    parser = argparse.ArgumentParser()

			
 
				-    parser.add_argument('source_path', help="Path to the video or audio file to subtitle",

			
 
				-                        nargs='?')

			
 
				-    parser.add_argument('-C', '--concurrency', help="Number of concurrent API requests to make",

			
 
				-                        type=int, default=DEFAULT_CONCURRENCY)

			
 
				-    parser.add_argument('-o', '--output',

			
 
				-                        help="Output path for subtitles (by default, subtitles are saved in \

			
 
				-                        the same directory and name as the source path)")

			
 
				-    parser.add_argument('-F', '--format', help="Destination subtitle format",

			
 
				-                        default=DEFAULT_SUBTITLE_FORMAT)

			
 
				-    parser.add_argument('-S', '--src-language', help="Language spoken in source file",

			
 
				-                        default=DEFAULT_SRC_LANGUAGE)

			
 
				-    parser.add_argument('-D', '--dst-language', help="Desired language for the subtitles",

			
 
				-                        default=DEFAULT_DST_LANGUAGE)

			
 
				-    parser.add_argument('-K', '--api-key',

			
 
				-                        help="The Google Translate API key to be used. \

			
 
				-                        (Required for subtitle translation)")

			
 
				-    parser.add_argument('--list-formats', help="List all available subtitle formats",

			
 
				-                        action='store_true')

			
 
				-    parser.add_argument('--list-languages', help="List all available source/destination languages",

			
 
				-                        action='store_true')

			
 
				-

			
 
				-    args = parser.parse_args()

			
 
				-

			
 
				-    if args.list_formats:

			
 
				-        print("List of formats:")

			
 
				-        for subtitle_format in FORMATTERS:

			
 
				-            print("{format}".format(format=subtitle_format))

			
 
				-        return 0

			
 
				-

			
 
				-    if args.list_languages:

			
 
				-        print("List of all languages:")

			
 
				-        for code, language in sorted(LANGUAGE_CODES.items()):

			
 
				-            print("{code}\t{language}".format(code=code, language=language))

			
 
				-        return 0

			
 
				-

			
 
				-    if not validate(args):

			
 
				-        return 1

			
 
				-

			
 
				-    try:

			
 
				-        subtitle_file_path = generate_subtitles(

			
 
				-            source_path=args.source_path,

			
 
				-            concurrency=args.concurrency,

			
 
				-            src_language=args.src_language,

			
 
				-            dst_language=args.dst_language,

			
 
				-            api_key=args.api_key,

			
 
				-            subtitle_file_format=args.format,

			
 
				-            output=args.output,

			
 
				-        )

			
 
				-        print("Subtitles file created at {}".format(subtitle_file_path))

			
 
				-    except KeyboardInterrupt:

			
 
				-        return 1

			
 
				-

			
 
				-    return 0

			
 
				-

			
 
				-

			
 
				-if __name__ == '__main__':

			
 
				-    sys.exit(main())

			
 
				+"""
			
 
				+Defines autosub's main functionality.
			
 
				+"""
			
 
				+
			
 
				+#!/usr/bin/env python
			
 
				+
			
 
				+from __future__ import absolute_import, print_function, unicode_literals
			
 
				+
			
 
				+import argparse
			
 
				+import audioop
			
 
				+import math
			
 
				+import multiprocessing
			
 
				+import os
			
 
				+from json import JSONDecodeError
			
 
				+import subprocess
			
 
				+import sys
			
 
				+import tempfile
			
 
				+import wave
			
 
				+
			
 
				+import json
			
 
				+import requests
			
 
				+try:
			
 
				+    from json.decoder import JSONDecodeError
			
 
				+except ImportError:
			
 
				+    JSONDecodeError = ValueError
			
 
				+
			
 
				+from googleapiclient.discovery import build
			
 
				+from progressbar import ProgressBar, Percentage, Bar, ETA
			
 
				+
			
 
				+from autosub.constants import (
			
 
				+    LANGUAGE_CODES, GOOGLE_SPEECH_API_KEY, GOOGLE_SPEECH_API_URL,
			
 
				+)
			
 
				+from autosub.formatters import FORMATTERS
			
 
				+
			
 
				+DEFAULT_SUBTITLE_FORMAT = 'srt'
			
 
				+DEFAULT_CONCURRENCY = 10
			
 
				+DEFAULT_SRC_LANGUAGE = 'en'
			
 
				+DEFAULT_DST_LANGUAGE = 'en'
			
 
				+
			
 
				+
			
 
				+def percentile(arr, percent):
			
 
				+    """
			
 
				+    Calculate the given percentile of arr.
			
 
				+    """
			
 
				+    arr = sorted(arr)
			
 
				+    index = (len(arr) - 1) * percent
			
 
				+    floor = math.floor(index)
			
 
				+    ceil = math.ceil(index)
			
 
				+    if floor == ceil:
			
 
				+        return arr[int(index)]
			
 
				+    low_value = arr[int(floor)] * (ceil - index)
			
 
				+    high_value = arr[int(ceil)] * (index - floor)
			
 
				+    return low_value + high_value
			
 
				+
			
 
				+
			
 
				+class FLACConverter(object): # pylint: disable=too-few-public-methods
			
 
				+    """
			
 
				+    Class for converting a region of an input audio or video file into a FLAC audio file
			
 
				+    """
			
 
				+    def __init__(self, source_path, include_before=0.25, include_after=0.25):
			
 
				+        self.source_path = source_path
			
 
				+        self.include_before = include_before
			
 
				+        self.include_after = include_after
			
 
				+
			
 
				+    def __call__(self, region):
			
 
				+        try:
			
 
				+            start, end = region
			
 
				+            start = max(0, start - self.include_before)
			
 
				+            end += self.include_after
			
 
				+            #delete=False necessary for running on Windows
			
 
				+            temp = tempfile.NamedTemporaryFile(suffix='.flac', delete=False)
			
 
				+            program_ffmpeg = which("ffmpeg")
			
 
				+            command = [str(program_ffmpeg), "-ss", str(start), "-t", str(end - start),
			
 
				+                       "-y", "-i", self.source_path,
			
 
				+                       "-loglevel", "error", temp.name]
			
 
				+            use_shell = True if os.name == "nt" else False
			
 
				+            subprocess.check_output(command, stdin=open(os.devnull), shell=use_shell)
			
 
				+            read_data = temp.read()
			
 
				+            temp.close()
			
 
				+            os.unlink(temp.name)
			
 
				+            return read_data
			
 
				+
			
 
				+        except KeyboardInterrupt:
			
 
				+            return None
			
 
				+
			
 
				+
			
 
				+class SpeechRecognizer(object): # pylint: disable=too-few-public-methods
			
 
				+    """
			
 
				+    Class for performing speech-to-text for an input FLAC file.
			
 
				+    """
			
 
				+    def __init__(self, language="en", rate=44100, retries=3, api_key=GOOGLE_SPEECH_API_KEY):
			
 
				+        self.language = language
			
 
				+        self.rate = rate
			
 
				+        self.api_key = api_key
			
 
				+        self.retries = retries
			
 
				+
			
 
				+    def __call__(self, data):
			
 
				+        try:
			
 
				+            for _ in range(self.retries):
			
 
				+                url = GOOGLE_SPEECH_API_URL.format(lang=self.language, key=self.api_key)
			
 
				+                headers = {"Content-Type": "audio/x-flac; rate=%d" % self.rate}
			
 
				+
			
 
				+                try:
			
 
				+                    resp = requests.post(url, data=data, headers=headers)
			
 
				+                except requests.exceptions.ConnectionError:
			
 
				+                    continue
			
 
				+
			
 
				+                for line in resp.content.decode('utf-8').split("\n"):
			
 
				+                    try:
			
 
				+                        line = json.loads(line)
			
 
				+                        line = line['result'][0]['alternative'][0]['transcript']
			
 
				+                        return line[:1].upper() + line[1:]
			
 
				+                    except IndexError:
			
 
				+                        # no result
			
 
				+                        continue
			
 
				+                    except JSONDecodeError:
			
 
				+                        continue
			
 
				+
			
 
				+        except KeyboardInterrupt:
			
 
				+            return None
			
 
				+
			
 
				+
			
 
				+class Translator(object): # pylint: disable=too-few-public-methods
			
 
				+    """
			
 
				+    Class for translating a sentence from a one language to another.
			
 
				+    """
			
 
				+    def __init__(self, language, api_key, src, dst):
			
 
				+        self.language = language
			
 
				+        self.api_key = api_key
			
 
				+        self.service = build('translate', 'v2',
			
 
				+                             developerKey=self.api_key)
			
 
				+        self.src = src
			
 
				+        self.dst = dst
			
 
				+
			
 
				+    def __call__(self, sentence):
			
 
				+        try:
			
 
				+            if not sentence:
			
 
				+                return None
			
 
				+
			
 
				+            result = self.service.translations().list( # pylint: disable=no-member
			
 
				+                source=self.src,
			
 
				+                target=self.dst,
			
 
				+                q=[sentence]
			
 
				+            ).execute()
			
 
				+
			
 
				+            if 'translations' in result and result['translations'] and \
			
 
				+                'translatedText' in result['translations'][0]:
			
 
				+                return result['translations'][0]['translatedText']
			
 
				+
			
 
				+            return None
			
 
				+
			
 
				+        except KeyboardInterrupt:
			
 
				+            return None
			
 
				+
			
 
				+
			
 
				+def which(program):
			
 
				+    """
			
 
				+    Return the path for a given executable.
			
 
				+    """
			
 
				+    def is_exe(file_path):
			
 
				+        """
			
 
				+        Checks whether a file is executable.
			
 
				+        """
			
 
				+        return os.path.isfile(file_path) and os.access(file_path, os.X_OK)
			
 
				+    #necessary to run on Windows
			
 
				+    if os.name == "nt":
			
 
				+        program += ".exe"
			
 
				+    fpath, _ = os.path.split(program)
			
 
				+    if fpath:
			
 
				+        if is_exe(program):
			
 
				+            return program
			
 
				+    else:
			
 
				+        #looks for file in the script execution folder before checking on system path
			
 
				+        current_dir = os.getcwd()
			
 
				+        local_program = os.path.join(current_dir, program)
			
 
				+        if is_exe(local_program):
			
 
				+            return local_program
			
 
				+        else:
			
 
				+            for path in os.environ["PATH"].split(os.pathsep):
			
 
				+                path = path.strip('"')
			
 
				+                exe_file = os.path.join(path, program)
			
 
				+                if is_exe(exe_file):
			
 
				+                    return exe_file
			
 
				+    return None
			
 
				+
			
 
				+
			
 
				+def extract_audio(filename, channels=1, rate=16000):
			
 
				+    """
			
 
				+    Extract audio from an input file to a temporary WAV file.
			
 
				+    """
			
 
				+    temp = tempfile.NamedTemporaryFile(suffix='.wav', delete=False)
			
 
				+    if not os.path.isfile(filename):
			
 
				+        print("The given file does not exist: {}".format(filename))
			
 
				+        raise Exception("Invalid filepath: {}".format(filename))
			
 
				+    program_ffmpeg = which("ffmpeg")
			
 
				+    if not program_ffmpeg:
			
 
				+        print("ffmpeg: Executable not found on machine.")
			
 
				+        raise Exception("Dependency not found: ffmpeg")
			
 
				+    command = [str(program_ffmpeg), "-y", "-i", filename,
			
 
				+               "-ac", str(channels), "-ar", str(rate),
			
 
				+               "-loglevel", "error", temp.name]
			
 
				+    use_shell = True if os.name == "nt" else False
			
 
				+    subprocess.check_output(command, stdin=open(os.devnull), shell=use_shell)
			
 
				+    return temp.name, rate
			
 
				+
			
 
				+
			
 
				+def find_speech_regions(filename, frame_width=4096, min_region_size=0.5, max_region_size=6): # pylint: disable=too-many-locals
			
 
				+    """
			
 
				+    Perform voice activity detection on a given audio file.
			
 
				+    """
			
 
				+    reader = wave.open(filename)
			
 
				+    sample_width = reader.getsampwidth()
			
 
				+    rate = reader.getframerate()
			
 
				+    n_channels = reader.getnchannels()
			
 
				+    chunk_duration = float(frame_width) / rate
			
 
				+
			
 
				+    n_chunks = int(math.ceil(reader.getnframes()*1.0 / frame_width))
			
 
				+    energies = []
			
 
				+
			
 
				+    for _ in range(n_chunks):
			
 
				+        chunk = reader.readframes(frame_width)
			
 
				+        energies.append(audioop.rms(chunk, sample_width * n_channels))
			
 
				+
			
 
				+    threshold = percentile(energies, 0.2)
			
 
				+
			
 
				+    elapsed_time = 0
			
 
				+
			
 
				+    regions = []
			
 
				+    region_start = None
			
 
				+
			
 
				+    for energy in energies:
			
 
				+        is_silence = energy <= threshold
			
 
				+        max_exceeded = region_start and elapsed_time - region_start >= max_region_size
			
 
				+
			
 
				+        if (max_exceeded or is_silence) and region_start:
			
 
				+            if elapsed_time - region_start >= min_region_size:
			
 
				+                regions.append((region_start, elapsed_time))
			
 
				+                region_start = None
			
 
				+
			
 
				+        elif (not region_start) and (not is_silence):
			
 
				+            region_start = elapsed_time
			
 
				+        elapsed_time += chunk_duration
			
 
				+    return regions
			
 
				+
			
 
				+
			
 
				+def generate_subtitles( # pylint: disable=too-many-locals,too-many-arguments
			
 
				+        source_path,
			
 
				+        output=None,
			
 
				+        concurrency=DEFAULT_CONCURRENCY,
			
 
				+        src_language=DEFAULT_SRC_LANGUAGE,
			
 
				+        dst_language=DEFAULT_DST_LANGUAGE,
			
 
				+        subtitle_file_format=DEFAULT_SUBTITLE_FORMAT,
			
 
				+        api_key=None,
			
 
				+    ):
			
 
				+    """
			
 
				+    Given an input audio/video file, generate subtitles in the specified language and format.
			
 
				+    """
			
 
				+
			
 
				+    if os.name != "nt" and "Darwin" in os.uname():
			
 
				+        #the default unix fork method does not work on Mac OS
			
 
				+        #need to use forkserver
			
 
				+        if 'forkserver' != multiprocessing.get_start_method(allow_none=True):
			
 
				+            multiprocessing.set_start_method('forkserver')
			
 
				+
			
 
				+    audio_filename, audio_rate = extract_audio(source_path)
			
 
				+
			
 
				+    regions = find_speech_regions(audio_filename)
			
 
				+
			
 
				+    pool = multiprocessing.Pool(concurrency)
			
 
				+    converter = FLACConverter(source_path=audio_filename)
			
 
				+    recognizer = SpeechRecognizer(language=src_language, rate=audio_rate,
			
 
				+                                  api_key=GOOGLE_SPEECH_API_KEY)
			
 
				+
			
 
				+    transcripts = []
			
 
				+    if regions:
			
 
				+        try:
			
 
				+            widgets = ["Converting speech regions to FLAC files: ", Percentage(), ' ', Bar(), ' ',
			
 
				+                       ETA()]
			
 
				+            pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start()
			
 
				+            extracted_regions = []
			
 
				+            for i, extracted_region in enumerate(pool.imap(converter, regions)):
			
 
				+                extracted_regions.append(extracted_region)
			
 
				+                pbar.update(i)
			
 
				+            pbar.finish()
			
 
				+
			
 
				+            widgets = ["Performing speech recognition: ", Percentage(), ' ', Bar(), ' ', ETA()]
			
 
				+            pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start()
			
 
				+
			
 
				+            for i, transcript in enumerate(pool.imap(recognizer, extracted_regions)):
			
 
				+                transcripts.append(transcript)
			
 
				+                pbar.update(i)
			
 
				+            pbar.finish()
			
 
				+
			
 
				+            if src_language.split("-")[0] != dst_language.split("-")[0]:
			
 
				+                if api_key:
			
 
				+                    google_translate_api_key = api_key
			
 
				+                    translator = Translator(dst_language, google_translate_api_key,
			
 
				+                                            dst=dst_language,
			
 
				+                                            src=src_language)
			
 
				+                    prompt = "Translating from {0} to {1}: ".format(src_language, dst_language)
			
 
				+                    widgets = [prompt, Percentage(), ' ', Bar(), ' ', ETA()]
			
 
				+                    pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start()
			
 
				+                    translated_transcripts = []
			
 
				+                    for i, transcript in enumerate(pool.imap(translator, transcripts)):
			
 
				+                        translated_transcripts.append(transcript)
			
 
				+                        pbar.update(i)
			
 
				+                    pbar.finish()
			
 
				+                    transcripts = translated_transcripts
			
 
				+                else:
			
 
				+                    print(
			
 
				+                        "Error: Subtitle translation requires specified Google Translate API key. "
			
 
				+                        "See --help for further information."
			
 
				+                    )
			
 
				+                    return 1
			
 
				+
			
 
				+        except KeyboardInterrupt:
			
 
				+            pbar.finish()
			
 
				+            pool.terminate()
			
 
				+            pool.join()
			
 
				+            print("Cancelling transcription")
			
 
				+            raise
			
 
				+
			
 
				+    timed_subtitles = [(r, t) for r, t in zip(regions, transcripts) if t]
			
 
				+    formatter = FORMATTERS.get(subtitle_file_format)
			
 
				+    formatted_subtitles = formatter(timed_subtitles)
			
 
				+
			
 
				+    dest = output
			
 
				+
			
 
				+    if not dest:
			
 
				+        base = os.path.splitext(source_path)[0]
			
 
				+        dest = "{base}.{format}".format(base=base, format=subtitle_file_format)
			
 
				+
			
 
				+    with open(dest, 'wb') as output_file:
			
 
				+        output_file.write(formatted_subtitles.encode("utf-8"))
			
 
				+
			
 
				+    os.remove(audio_filename)
			
 
				+
			
 
				+    return dest
			
 
				+
			
 
				+
			
 
				+def validate(args):
			
 
				+    """
			
 
				+    Check that the CLI arguments passed to autosub are valid.
			
 
				+    """
			
 
				+    if args.format not in FORMATTERS:
			
 
				+        print(
			
 
				+            "Subtitle format not supported. "
			
 
				+            "Run with --list-formats to see all supported formats."
			
 
				+        )
			
 
				+        return False
			
 
				+
			
 
				+    if args.src_language not in LANGUAGE_CODES.keys():
			
 
				+        print(
			
 
				+            "Source language not supported. "
			
 
				+            "Run with --list-languages to see all supported languages."
			
 
				+        )
			
 
				+        return False
			
 
				+
			
 
				+    if args.dst_language not in LANGUAGE_CODES.keys():
			
 
				+        print(
			
 
				+            "Destination language not supported. "
			
 
				+            "Run with --list-languages to see all supported languages."
			
 
				+        )
			
 
				+        return False
			
 
				+
			
 
				+    if not args.source_path:
			
 
				+        print("Error: You need to specify a source path.")
			
 
				+        return False
			
 
				+
			
 
				+    return True
			
 
				+
			
 
				+
			
 
				+def main():
			
 
				+    """
			
 
				+    Run autosub as a command-line program.
			
 
				+    """
			
 
				+    parser = argparse.ArgumentParser()
			
 
				+    parser.add_argument('source_path', help="Path to the video or audio file to subtitle",
			
 
				+                        nargs='?')
			
 
				+    parser.add_argument('-C', '--concurrency', help="Number of concurrent API requests to make",
			
 
				+                        type=int, default=DEFAULT_CONCURRENCY)
			
 
				+    parser.add_argument('-o', '--output',
			
 
				+                        help="Output path for subtitles (by default, subtitles are saved in \
			
 
				+                        the same directory and name as the source path)")
			
 
				+    parser.add_argument('-F', '--format', help="Destination subtitle format",
			
 
				+                        default=DEFAULT_SUBTITLE_FORMAT)
			
 
				+    parser.add_argument('-S', '--src-language', help="Language spoken in source file",
			
 
				+                        default=DEFAULT_SRC_LANGUAGE)
			
 
				+    parser.add_argument('-D', '--dst-language', help="Desired language for the subtitles",
			
 
				+                        default=DEFAULT_DST_LANGUAGE)
			
 
				+    parser.add_argument('-K', '--api-key',
			
 
				+                        help="The Google Translate API key to be used. \
			
 
				+                        (Required for subtitle translation)")
			
 
				+    parser.add_argument('--list-formats', help="List all available subtitle formats",
			
 
				+                        action='store_true')
			
 
				+    parser.add_argument('--list-languages', help="List all available source/destination languages",
			
 
				+                        action='store_true')
			
 
				+
			
 
				+    args = parser.parse_args()
			
 
				+
			
 
				+    if args.list_formats:
			
 
				+        print("List of formats:")
			
 
				+        for subtitle_format in FORMATTERS:
			
 
				+            print("{format}".format(format=subtitle_format))
			
 
				+        return 0
			
 
				+
			
 
				+    if args.list_languages:
			
 
				+        print("List of all languages:")
			
 
				+        for code, language in sorted(LANGUAGE_CODES.items()):
			
 
				+            print("{code}\t{language}".format(code=code, language=language))
			
 
				+        return 0
			
 
				+
			
 
				+    if not validate(args):
			
 
				+        return 1
			
 
				+
			
 
				+    try:
			
 
				+        subtitle_file_path = generate_subtitles(
			
 
				+            source_path=args.source_path,
			
 
				+            concurrency=args.concurrency,
			
 
				+            src_language=args.src_language,
			
 
				+            dst_language=args.dst_language,
			
 
				+            api_key=args.api_key,
			
 
				+            subtitle_file_format=args.format,
			
 
				+            output=args.output,
			
 
				+        )
			
 
				+        print("Subtitles file created at {}".format(subtitle_file_path))
			
 
				+    except KeyboardInterrupt:
			
 
				+        return 1
			
 
				+
			
 
				+    return 0
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    sys.exit(main())
			
--- a/composer/autosub/__pycache__/__init__.cpython-37.pyc
+++ b/composer/autosub/__pycache__/__init__.cpython-37.pyc
--- a/composer/autosub/__pycache__/__init__.cpython-38.pyc
+++ b/composer/autosub/__pycache__/__init__.cpython-38.pyc
--- a/composer/autosub/__pycache__/__init__.cpython-39.pyc
+++ b/composer/autosub/__pycache__/__init__.cpython-39.pyc
--- a/composer/autosub/__pycache__/constants.cpython-37.pyc
+++ b/composer/autosub/__pycache__/constants.cpython-37.pyc
--- a/composer/autosub/__pycache__/constants.cpython-38.pyc
+++ b/composer/autosub/__pycache__/constants.cpython-38.pyc
--- a/composer/autosub/__pycache__/constants.cpython-39.pyc
+++ b/composer/autosub/__pycache__/constants.cpython-39.pyc
--- a/composer/autosub/__pycache__/formatters.cpython-37.pyc
+++ b/composer/autosub/__pycache__/formatters.cpython-37.pyc
--- a/composer/autosub/__pycache__/formatters.cpython-38.pyc
+++ b/composer/autosub/__pycache__/formatters.cpython-38.pyc
--- a/composer/autosub/__pycache__/formatters.cpython-39.pyc
+++ b/composer/autosub/__pycache__/formatters.cpython-39.pyc
--- a/composer/autosub/constants.py
+++ b/composer/autosub/constants.py
@@ -1,118 +1,118 @@
 
				-"""

			
 
				-Defines constants used by autosub.s

			
 
				-"""

			
 
				-

			
 
				-from __future__ import unicode_literals

			
 
				-

			
 
				-GOOGLE_SPEECH_API_KEY = "AIzaSyBOti4mM-6x9WDnZIjIeyEU21OpBXqWBgw"

			
 
				-GOOGLE_SPEECH_API_URL = "http://www.google.com/speech-api/v2/recognize?client=chromium&lang={lang}&key={key}" # pylint: disable=line-too-long

			
 
				-

			
 
				-LANGUAGE_CODES = {

			
 
				-    'af': 'Afrikaans',

			
 
				-    'ar': 'Arabic',

			
 
				-    'az': 'Azerbaijani',

			
 
				-    'be': 'Belarusian',

			
 
				-    'bg': 'Bulgarian',

			
 
				-    'bn': 'Bengali',

			
 
				-    'bs': 'Bosnian',

			
 
				-    'ca': 'Catalan',

			
 
				-    'ceb': 'Cebuano',

			
 
				-    'cs': 'Czech',

			
 
				-    'cy': 'Welsh',

			
 
				-    'da': 'Danish',

			
 
				-    'de': 'German',

			
 
				-    'el': 'Greek',

			
 
				-    'en-AU': 'English (Australia)',

			
 
				-    'en-CA': 'English (Canada)',

			
 
				-    'en-GB': 'English (United Kingdom)',

			
 
				-    'en-IN': 'English (India)',

			
 
				-    'en-IE': 'English (Ireland)',

			
 
				-    'en-NZ': 'English (New Zealand)',

			
 
				-    'en-PH': 'English (Philippines)',

			
 
				-    'en-SG': 'English (Singapore)',

			
 
				-    'en-US': 'English (United States)',

			
 
				-    'eo': 'Esperanto',

			
 
				-    'es-AR': 'Spanish (Argentina)',

			
 
				-    'es-CL': 'Spanish (Chile)',

			
 
				-    'es-ES': 'Spanish (Spain)',

			
 
				-    'es-US': 'Spanish (United States)',

			
 
				-    'es-MX': 'Spanish (Mexico)',

			
 
				-    'es': 'Spanish',

			
 
				-    'et': 'Estonian',

			
 
				-    'eu': 'Basque',

			
 
				-    'fa': 'Persian',

			
 
				-    'fi': 'Finnish',

			
 
				-    'fr': 'French',

			
 
				-    'ga': 'Irish',

			
 
				-    'gl': 'Galician',

			
 
				-    'gu': 'Gujarati',

			
 
				-    'ha': 'Hausa',

			
 
				-    'hi': 'Hindi',

			
 
				-    'hmn': 'Hmong',

			
 
				-    'hr': 'Croatian',

			
 
				-    'ht': 'Haitian Creole',

			
 
				-    'hu': 'Hungarian',

			
 
				-    'hy': 'Armenian',

			
 
				-    'id': 'Indonesian',

			
 
				-    'ig': 'Igbo',

			
 
				-    'is': 'Icelandic',

			
 
				-    'it': 'Italian',

			
 
				-    'iw': 'Hebrew',

			
 
				-    'ja': 'Japanese',

			
 
				-    'jw': 'Javanese',

			
 
				-    'ka': 'Georgian',

			
 
				-    'kk': 'Kazakh',

			
 
				-    'km': 'Khmer',

			
 
				-    'kn': 'Kannada',

			
 
				-    'ko': 'Korean',

			
 
				-    'la': 'Latin',

			
 
				-    'lo': 'Lao',

			
 
				-    'lt': 'Lithuanian',

			
 
				-    'lv': 'Latvian',

			
 
				-    'mg': 'Malagasy',

			
 
				-    'mi': 'Maori',

			
 
				-    'mk': 'Macedonian',

			
 
				-    'ml': 'Malayalam',

			
 
				-    'mn': 'Mongolian',

			
 
				-    'mr': 'Marathi',

			
 
				-    'ms': 'Malay',

			
 
				-    'mt': 'Maltese',

			
 
				-    'my': 'Myanmar (Burmese)',

			
 
				-    'ne': 'Nepali',

			
 
				-    'nl': 'Dutch',

			
 
				-    'no': 'Norwegian',

			
 
				-    'ny': 'Chichewa',

			
 
				-    'pa': 'Punjabi',

			
 
				-    'pl': 'Polish',

			
 
				-    'pt-BR': 'Portuguese (Brazil)',

			
 
				-    'pt-PT': 'Portuguese (Portugal)',

			
 
				-    'ro': 'Romanian',

			
 
				-    'ru': 'Russian',

			
 
				-    'si': 'Sinhala',

			
 
				-    'sk': 'Slovak',

			
 
				-    'sl': 'Slovenian',

			
 
				-    'so': 'Somali',

			
 
				-    'sq': 'Albanian',

			
 
				-    'sr': 'Serbian',

			
 
				-    'st': 'Sesotho',

			
 
				-    'su': 'Sudanese',

			
 
				-    'sv': 'Swedish',

			
 
				-    'sw': 'Swahili',

			
 
				-    'ta': 'Tamil',

			
 
				-    'te': 'Telugu',

			
 
				-    'tg': 'Tajik',

			
 
				-    'th': 'Thai',

			
 
				-    'tl': 'Filipino',

			
 
				-    'tr': 'Turkish',

			
 
				-    'uk': 'Ukrainian',

			
 
				-    'ur': 'Urdu',

			
 
				-    'uz': 'Uzbek',

			
 
				-    'vi': 'Vietnamese',

			
 
				-    'yi': 'Yiddish',

			
 
				-    'yo': 'Yoruba',

			
 
				-    'yue-Hant-HK': 'Cantonese, (Traditional HK)',

			
 
				-    'zh': 'Chinese (Simplified, China)',

			
 
				-    'zh-HK': 'Chinese (Simplified, Hong Kong)',

			
 
				-    'zh-TW': 'Chinese (Traditional, Taiwan)',

			
 
				-    'zu': 'Zulu',

			
 
				-}

			
 
				+"""
			
 
				+Defines constants used by autosub.s
			
 
				+"""
			
 
				+
			
 
				+from __future__ import unicode_literals
			
 
				+
			
 
				+GOOGLE_SPEECH_API_KEY = "AIzaSyBOti4mM-6x9WDnZIjIeyEU21OpBXqWBgw"
			
 
				+GOOGLE_SPEECH_API_URL = "http://www.google.com/speech-api/v2/recognize?client=chromium&lang={lang}&key={key}" # pylint: disable=line-too-long
			
 
				+
			
 
				+LANGUAGE_CODES = {
			
 
				+    'af': 'Afrikaans',
			
 
				+    'ar': 'Arabic',
			
 
				+    'az': 'Azerbaijani',
			
 
				+    'be': 'Belarusian',
			
 
				+    'bg': 'Bulgarian',
			
 
				+    'bn': 'Bengali',
			
 
				+    'bs': 'Bosnian',
			
 
				+    'ca': 'Catalan',
			
 
				+    'ceb': 'Cebuano',
			
 
				+    'cs': 'Czech',
			
 
				+    'cy': 'Welsh',
			
 
				+    'da': 'Danish',
			
 
				+    'de': 'German',
			
 
				+    'el': 'Greek',
			
 
				+    'en-AU': 'English (Australia)',
			
 
				+    'en-CA': 'English (Canada)',
			
 
				+    'en-GB': 'English (United Kingdom)',
			
 
				+    'en-IN': 'English (India)',
			
 
				+    'en-IE': 'English (Ireland)',
			
 
				+    'en-NZ': 'English (New Zealand)',
			
 
				+    'en-PH': 'English (Philippines)',
			
 
				+    'en-SG': 'English (Singapore)',
			
 
				+    'en-US': 'English (United States)',
			
 
				+    'eo': 'Esperanto',
			
 
				+    'es-AR': 'Spanish (Argentina)',
			
 
				+    'es-CL': 'Spanish (Chile)',
			
 
				+    'es-ES': 'Spanish (Spain)',
			
 
				+    'es-US': 'Spanish (United States)',
			
 
				+    'es-MX': 'Spanish (Mexico)',
			
 
				+    'es': 'Spanish',
			
 
				+    'et': 'Estonian',
			
 
				+    'eu': 'Basque',
			
 
				+    'fa': 'Persian',
			
 
				+    'fi': 'Finnish',
			
 
				+    'fr': 'French',
			
 
				+    'ga': 'Irish',
			
 
				+    'gl': 'Galician',
			
 
				+    'gu': 'Gujarati',
			
 
				+    'ha': 'Hausa',
			
 
				+    'hi': 'Hindi',
			
 
				+    'hmn': 'Hmong',
			
 
				+    'hr': 'Croatian',
			
 
				+    'ht': 'Haitian Creole',
			
 
				+    'hu': 'Hungarian',
			
 
				+    'hy': 'Armenian',
			
 
				+    'id': 'Indonesian',
			
 
				+    'ig': 'Igbo',
			
 
				+    'is': 'Icelandic',
			
 
				+    'it': 'Italian',
			
 
				+    'iw': 'Hebrew',
			
 
				+    'ja': 'Japanese',
			
 
				+    'jw': 'Javanese',
			
 
				+    'ka': 'Georgian',
			
 
				+    'kk': 'Kazakh',
			
 
				+    'km': 'Khmer',
			
 
				+    'kn': 'Kannada',
			
 
				+    'ko': 'Korean',
			
 
				+    'la': 'Latin',
			
 
				+    'lo': 'Lao',
			
 
				+    'lt': 'Lithuanian',
			
 
				+    'lv': 'Latvian',
			
 
				+    'mg': 'Malagasy',
			
 
				+    'mi': 'Maori',
			
 
				+    'mk': 'Macedonian',
			
 
				+    'ml': 'Malayalam',
			
 
				+    'mn': 'Mongolian',
			
 
				+    'mr': 'Marathi',
			
 
				+    'ms': 'Malay',
			
 
				+    'mt': 'Maltese',
			
 
				+    'my': 'Myanmar (Burmese)',
			
 
				+    'ne': 'Nepali',
			
 
				+    'nl': 'Dutch',
			
 
				+    'no': 'Norwegian',
			
 
				+    'ny': 'Chichewa',
			
 
				+    'pa': 'Punjabi',
			
 
				+    'pl': 'Polish',
			
 
				+    'pt-BR': 'Portuguese (Brazil)',
			
 
				+    'pt-PT': 'Portuguese (Portugal)',
			
 
				+    'ro': 'Romanian',
			
 
				+    'ru': 'Russian',
			
 
				+    'si': 'Sinhala',
			
 
				+    'sk': 'Slovak',
			
 
				+    'sl': 'Slovenian',
			
 
				+    'so': 'Somali',
			
 
				+    'sq': 'Albanian',
			
 
				+    'sr': 'Serbian',
			
 
				+    'st': 'Sesotho',
			
 
				+    'su': 'Sudanese',
			
 
				+    'sv': 'Swedish',
			
 
				+    'sw': 'Swahili',
			
 
				+    'ta': 'Tamil',
			
 
				+    'te': 'Telugu',
			
 
				+    'tg': 'Tajik',
			
 
				+    'th': 'Thai',
			
 
				+    'tl': 'Filipino',
			
 
				+    'tr': 'Turkish',
			
 
				+    'uk': 'Ukrainian',
			
 
				+    'ur': 'Urdu',
			
 
				+    'uz': 'Uzbek',
			
 
				+    'vi': 'Vietnamese',
			
 
				+    'yi': 'Yiddish',
			
 
				+    'yo': 'Yoruba',
			
 
				+    'yue-Hant-HK': 'Cantonese, (Traditional HK)',
			
 
				+    'zh': 'Chinese (Simplified, China)',
			
 
				+    'zh-HK': 'Chinese (Simplified, Hong Kong)',
			
 
				+    'zh-TW': 'Chinese (Traditional, Taiwan)',
			
 
				+    'zu': 'Zulu',
			
 
				+}
			
--- a/composer/autosub/formatters.py
+++ b/composer/autosub/formatters.py
@@ -1,66 +1,66 @@
 
				-"""

			
 
				-Defines subtitle formatters used by autosub.s

			
 
				-"""

			
 
				-

			
 
				-# -*- coding: utf-8 -*-

			
 
				-from __future__ import unicode_literals

			
 
				-

			
 
				-import json

			
 
				-

			
 
				-import pysrt

			
 
				-import six

			
 
				-

			
 
				-

			
 
				-def srt_formatter(subtitles, padding_before=0, padding_after=0):

			
 
				-    """

			
 
				-    Serialize a list of subtitles according to the SRT format, with optional time padding.

			
 
				-    """

			
 
				-    sub_rip_file = pysrt.SubRipFile()

			
 
				-    for i, ((start, end), text) in enumerate(subtitles, start=1):

			
 
				-        item = pysrt.SubRipItem()

			
 
				-        item.index = i

			
 
				-        item.text = six.text_type(text)

			
 
				-        item.start.seconds = max(0, start - padding_before)

			
 
				-        item.end.seconds = end + padding_after

			
 
				-        sub_rip_file.append(item)

			
 
				-    return '\n'.join(six.text_type(item) for item in sub_rip_file)

			
 
				-

			
 
				-

			
 
				-def vtt_formatter(subtitles, padding_before=0, padding_after=0):

			
 
				-    """

			
 
				-    Serialize a list of subtitles according to the VTT format, with optional time padding.

			
 
				-    """

			
 
				-    text = srt_formatter(subtitles, padding_before, padding_after)

			
 
				-    text = 'WEBVTT\n\n' + text.replace(',', '.')

			
 
				-    return text

			
 
				-

			
 
				-

			
 
				-def json_formatter(subtitles):

			
 
				-    """

			
 
				-    Serialize a list of subtitles as a JSON blob.

			
 
				-    """

			
 
				-    subtitle_dicts = [

			
 
				-        {

			
 
				-            'start': start,

			
 
				-            'end': end,

			
 
				-            'content': text,

			
 
				-        }

			
 
				-        for ((start, end), text)

			
 
				-        in subtitles

			
 
				-    ]

			
 
				-    return json.dumps(subtitle_dicts)

			
 
				-

			
 
				-

			
 
				-def raw_formatter(subtitles):

			
 
				-    """

			
 
				-    Serialize a list of subtitles as a newline-delimited string.

			
 
				-    """

			
 
				-    return ' '.join(text for (_rng, text) in subtitles)

			
 
				-

			
 
				-

			
 
				-FORMATTERS = {

			
 
				-    'srt': srt_formatter,

			
 
				-    'vtt': vtt_formatter,

			
 
				-    'json': json_formatter,

			
 
				-    'raw': raw_formatter,

			
 
				-}

			
 
				+"""
			
 
				+Defines subtitle formatters used by autosub.s
			
 
				+"""
			
 
				+
			
 
				+# -*- coding: utf-8 -*-
			
 
				+from __future__ import unicode_literals
			
 
				+
			
 
				+import json
			
 
				+
			
 
				+import pysrt
			
 
				+import six
			
 
				+
			
 
				+
			
 
				+def srt_formatter(subtitles, padding_before=0, padding_after=0):
			
 
				+    """
			
 
				+    Serialize a list of subtitles according to the SRT format, with optional time padding.
			
 
				+    """
			
 
				+    sub_rip_file = pysrt.SubRipFile()
			
 
				+    for i, ((start, end), text) in enumerate(subtitles, start=1):
			
 
				+        item = pysrt.SubRipItem()
			
 
				+        item.index = i
			
 
				+        item.text = six.text_type(text)
			
 
				+        item.start.seconds = max(0, start - padding_before)
			
 
				+        item.end.seconds = end + padding_after
			
 
				+        sub_rip_file.append(item)
			
 
				+    return '\n'.join(six.text_type(item) for item in sub_rip_file)
			
 
				+
			
 
				+
			
 
				+def vtt_formatter(subtitles, padding_before=0, padding_after=0):
			
 
				+    """
			
 
				+    Serialize a list of subtitles according to the VTT format, with optional time padding.
			
 
				+    """
			
 
				+    text = srt_formatter(subtitles, padding_before, padding_after)
			
 
				+    text = 'WEBVTT\n\n' + text.replace(',', '.')
			
 
				+    return text
			
 
				+
			
 
				+
			
 
				+def json_formatter(subtitles):
			
 
				+    """
			
 
				+    Serialize a list of subtitles as a JSON blob.
			
 
				+    """
			
 
				+    subtitle_dicts = [
			
 
				+        {
			
 
				+            'start': start,
			
 
				+            'end': end,
			
 
				+            'content': text,
			
 
				+        }
			
 
				+        for ((start, end), text)
			
 
				+        in subtitles
			
 
				+    ]
			
 
				+    return json.dumps(subtitle_dicts)
			
 
				+
			
 
				+
			
 
				+def raw_formatter(subtitles):
			
 
				+    """
			
 
				+    Serialize a list of subtitles as a newline-delimited string.
			
 
				+    """
			
 
				+    return ' '.join(text for (_rng, text) in subtitles)
			
 
				+
			
 
				+
			
 
				+FORMATTERS = {
			
 
				+    'srt': srt_formatter,
			
 
				+    'vtt': vtt_formatter,
			
 
				+    'json': json_formatter,
			
 
				+    'raw': raw_formatter,
			
 
				+}