3 年之前 · aedb2ffba1
--- a/composer/autosub/__init__-0.4.0.py
+++ b/composer/autosub/__init__-0.4.0.py
@@ -1,405 +1,405 @@
 
															-"""

														
 
															-Defines autosub's main functionality.

														
 
															-"""

														
 
															-

														
 
															-#!/usr/bin/env python

														
 
															-

														
 
															-from __future__ import absolute_import, print_function, unicode_literals

														
 
															-

														
 
															-import argparse

														
 
															-import audioop

														
 
															-import json

														
 
															-import math

														
 
															-import multiprocessing

														
 
															-import os

														
 
															-import subprocess

														
 
															-import sys

														
 
															-import tempfile

														
 
															-import wave

														
 
															-

														
 
															-import requests

														
 
															-from googleapiclient.discovery import build

														
 
															-from progressbar import ProgressBar, Percentage, Bar, ETA

														
 
															-

														
 
															-from autosub.constants import (

														
 
															-    LANGUAGE_CODES, GOOGLE_SPEECH_API_KEY, GOOGLE_SPEECH_API_URL,

														
 
															-)

														
 
															-from autosub.formatters import FORMATTERS

														
 
															-

														
 
															-DEFAULT_SUBTITLE_FORMAT = 'srt'

														
 
															-DEFAULT_CONCURRENCY = 10

														
 
															-DEFAULT_SRC_LANGUAGE = 'en'

														
 
															-DEFAULT_DST_LANGUAGE = 'en'

														
 
															-

														
 
															-

														
 
															-def percentile(arr, percent):

														
 
															-    """

														
 
															-    Calculate the given percentile of arr.

														
 
															-    """

														
 
															-    arr = sorted(arr)

														
 
															-    index = (len(arr) - 1) * percent

														
 
															-    floor = math.floor(index)

														
 
															-    ceil = math.ceil(index)

														
 
															-    if floor == ceil:

														
 
															-        return arr[int(index)]

														
 
															-    low_value = arr[int(floor)] * (ceil - index)

														
 
															-    high_value = arr[int(ceil)] * (index - floor)

														
 
															-    return low_value + high_value

														
 
															-

														
 
															-

														
 
															-class FLACConverter(object): # pylint: disable=too-few-public-methods

														
 
															-    """

														
 
															-    Class for converting a region of an input audio or video file into a FLAC audio file

														
 
															-    """

														
 
															-    def __init__(self, source_path, include_before=0.25, include_after=0.25):

														
 
															-        self.source_path = source_path

														
 
															-        self.include_before = include_before

														
 
															-        self.include_after = include_after

														
 
															-

														
 
															-    def __call__(self, region):

														
 
															-        try:

														
 
															-            start, end = region

														
 
															-            start = max(0, start - self.include_before)

														
 
															-            end += self.include_after

														
 
															-            temp = tempfile.NamedTemporaryFile(suffix='.flac')

														
 
															-            command = ["ffmpeg", "-ss", str(start), "-t", str(end - start),

														
 
															-                       "-y", "-i", self.source_path,

														
 
															-                       "-loglevel", "error", temp.name]

														
 
															-            use_shell = True if os.name == "nt" else False

														
 
															-            subprocess.check_output(command, stdin=open(os.devnull), shell=use_shell)

														
 
															-            return temp.read()

														
 
															-

														
 
															-        except KeyboardInterrupt:

														
 
															-            return None

														
 
															-

														
 
															-

														
 
															-class SpeechRecognizer(object): # pylint: disable=too-few-public-methods

														
 
															-    """

														
 
															-    Class for performing speech-to-text for an input FLAC file.

														
 
															-    """

														
 
															-    def __init__(self, language="en", rate=44100, retries=3, api_key=GOOGLE_SPEECH_API_KEY):

														
 
															-        self.language = language

														
 
															-        self.rate = rate

														
 
															-        self.api_key = api_key

														
 
															-        self.retries = retries

														
 
															-

														
 
															-    def __call__(self, data):

														
 
															-        try:

														
 
															-            for _ in range(self.retries):

														
 
															-                url = GOOGLE_SPEECH_API_URL.format(lang=self.language, key=self.api_key)

														
 
															-                headers = {"Content-Type": "audio/x-flac; rate=%d" % self.rate}

														
 
															-

														
 
															-                try:

														
 
															-                    resp = requests.post(url, data=data, headers=headers)

														
 
															-                except requests.exceptions.ConnectionError:

														
 
															-                    continue

														
 
															-

														
 
															-                for line in resp.content.decode('utf-8').split("\n"):

														
 
															-                    try:

														
 
															-                        line = json.loads(line)

														
 
															-                        line = line['result'][0]['alternative'][0]['transcript']

														
 
															-                        return line[:1].upper() + line[1:]

														
 
															-                    except IndexError:

														
 
															-                        # no result

														
 
															-                        continue

														
 
															-

														
 
															-        except KeyboardInterrupt:

														
 
															-            return None

														
 
															-

														
 
															-

														
 
															-class Translator(object): # pylint: disable=too-few-public-methods

														
 
															-    """

														
 
															-    Class for translating a sentence from a one language to another.

														
 
															-    """

														
 
															-    def __init__(self, language, api_key, src, dst):

														
 
															-        self.language = language

														
 
															-        self.api_key = api_key

														
 
															-        self.service = build('translate', 'v2',

														
 
															-                             developerKey=self.api_key)

														
 
															-        self.src = src

														
 
															-        self.dst = dst

														
 
															-

														
 
															-    def __call__(self, sentence):

														
 
															-        try:

														
 
															-            if not sentence:

														
 
															-                return None

														
 
															-

														
 
															-            result = self.service.translations().list( # pylint: disable=no-member

														
 
															-                source=self.src,

														
 
															-                target=self.dst,

														
 
															-                q=[sentence]

														
 
															-            ).execute()

														
 
															-

														
 
															-            if 'translations' in result and result['translations'] and \

														
 
															-                'translatedText' in result['translations'][0]:

														
 
															-                return result['translations'][0]['translatedText']

														
 
															-

														
 
															-            return None

														
 
															-

														
 
															-        except KeyboardInterrupt:

														
 
															-            return None

														
 
															-

														
 
															-

														
 
															-def which(program):

														
 
															-    """

														
 
															-    Return the path for a given executable.

														
 
															-    """

														
 
															-    def is_exe(file_path):

														
 
															-        """

														
 
															-        Checks whether a file is executable.

														
 
															-        """

														
 
															-        return os.path.isfile(file_path) and os.access(file_path, os.X_OK)

														
 
															-

														
 
															-    fpath, _ = os.path.split(program)

														
 
															-    if fpath:

														
 
															-        if is_exe(program):

														
 
															-            return program

														
 
															-    else:

														
 
															-        for path in os.environ["PATH"].split(os.pathsep):

														
 
															-            path = path.strip('"')

														
 
															-            exe_file = os.path.join(path, program)

														
 
															-            if is_exe(exe_file):

														
 
															-                return exe_file

														
 
															-    return None

														
 
															-

														
 
															-

														
 
															-def extract_audio(filename, channels=1, rate=16000):

														
 
															-    """

														
 
															-    Extract audio from an input file to a temporary WAV file.

														
 
															-    """

														
 
															-    temp = tempfile.NamedTemporaryFile(suffix='.wav', delete=False)

														
 
															-    if not os.path.isfile(filename):

														
 
															-        print("The given file does not exist: {}".format(filename))

														
 
															-        raise Exception("Invalid filepath: {}".format(filename))

														
 
															-    if not which("ffmpeg"):

														
 
															-        print("ffmpeg: Executable not found on machine.")

														
 
															-        raise Exception("Dependency not found: ffmpeg")

														
 
															-    command = ["ffmpeg", "-y", "-i", filename,

														
 
															-               "-ac", str(channels), "-ar", str(rate),

														
 
															-               "-loglevel", "error", temp.name]

														
 
															-    use_shell = True if os.name == "nt" else False

														
 
															-    subprocess.check_output(command, stdin=open(os.devnull), shell=use_shell)

														
 
															-    return temp.name, rate

														
 
															-

														
 
															-

														
 
															-def find_speech_regions(filename, frame_width=4096, min_region_size=0.5, max_region_size=6): # pylint: disable=too-many-locals

														
 
															-    """

														
 
															-    Perform voice activity detection on a given audio file.

														
 
															-    """

														
 
															-    reader = wave.open(filename)

														
 
															-    sample_width = reader.getsampwidth()

														
 
															-    rate = reader.getframerate()

														
 
															-    n_channels = reader.getnchannels()

														
 
															-    chunk_duration = float(frame_width) / rate

														
 
															-

														
 
															-    n_chunks = int(math.ceil(reader.getnframes()*1.0 / frame_width))

														
 
															-    energies = []

														
 
															-

														
 
															-    for _ in range(n_chunks):

														
 
															-        chunk = reader.readframes(frame_width)

														
 
															-        energies.append(audioop.rms(chunk, sample_width * n_channels))

														
 
															-

														
 
															-    threshold = percentile(energies, 0.2)

														
 
															-

														
 
															-    elapsed_time = 0

														
 
															-

														
 
															-    regions = []

														
 
															-    region_start = None

														
 
															-

														
 
															-    for energy in energies:

														
 
															-        is_silence = energy <= threshold

														
 
															-        max_exceeded = region_start and elapsed_time - region_start >= max_region_size

														
 
															-

														
 
															-        if (max_exceeded or is_silence) and region_start:

														
 
															-            if elapsed_time - region_start >= min_region_size:

														
 
															-                regions.append((region_start, elapsed_time))

														
 
															-                region_start = None

														
 
															-

														
 
															-        elif (not region_start) and (not is_silence):

														
 
															-            region_start = elapsed_time

														
 
															-        elapsed_time += chunk_duration

														
 
															-    return regions

														
 
															-

														
 
															-

														
 
															-def generate_subtitles( # pylint: disable=too-many-locals,too-many-arguments

														
 
															-        source_path,

														
 
															-        output=None,

														
 
															-        concurrency=DEFAULT_CONCURRENCY,

														
 
															-        src_language=DEFAULT_SRC_LANGUAGE,

														
 
															-        dst_language=DEFAULT_DST_LANGUAGE,

														
 
															-        subtitle_file_format=DEFAULT_SUBTITLE_FORMAT,

														
 
															-        api_key=None,

														
 
															-    ):

														
 
															-    """

														
 
															-    Given an input audio/video file, generate subtitles in the specified language and format.

														
 
															-    """

														
 
															-    audio_filename, audio_rate = extract_audio(source_path)

														
 
															-

														
 
															-    regions = find_speech_regions(audio_filename)

														
 
															-

														
 
															-    pool = multiprocessing.Pool(concurrency)

														
 
															-    converter = FLACConverter(source_path=audio_filename)

														
 
															-    recognizer = SpeechRecognizer(language=src_language, rate=audio_rate,

														
 
															-                                  api_key=GOOGLE_SPEECH_API_KEY)

														
 
															-

														
 
															-    transcripts = []

														
 
															-    if regions:

														
 
															-        try:

														
 
															-            widgets = ["Converting speech regions to FLAC files: ", Percentage(), ' ', Bar(), ' ',

														
 
															-                       ETA()]

														
 
															-            pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start()

														
 
															-            extracted_regions = []

														
 
															-            for i, extracted_region in enumerate(pool.imap(converter, regions)):

														
 
															-                extracted_regions.append(extracted_region)

														
 
															-                pbar.update(i)

														
 
															-            pbar.finish()

														
 
															-

														
 
															-            widgets = ["Performing speech recognition: ", Percentage(), ' ', Bar(), ' ', ETA()]

														
 
															-            pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start()

														
 
															-

														
 
															-            for i, transcript in enumerate(pool.imap(recognizer, extracted_regions)):

														
 
															-                transcripts.append(transcript)

														
 
															-                pbar.update(i)

														
 
															-            pbar.finish()

														
 
															-

														
 
															-            if src_language.split("-")[0] != dst_language.split("-")[0]:

														
 
															-                if api_key:

														
 
															-                    google_translate_api_key = api_key

														
 
															-                    translator = Translator(dst_language, google_translate_api_key,

														
 
															-                                            dst=dst_language,

														
 
															-                                            src=src_language)

														
 
															-                    prompt = "Translating from {0} to {1}: ".format(src_language, dst_language)

														
 
															-                    widgets = [prompt, Percentage(), ' ', Bar(), ' ', ETA()]

														
 
															-                    pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start()

														
 
															-                    translated_transcripts = []

														
 
															-                    for i, transcript in enumerate(pool.imap(translator, transcripts)):

														
 
															-                        translated_transcripts.append(transcript)

														
 
															-                        pbar.update(i)

														
 
															-                    pbar.finish()

														
 
															-                    transcripts = translated_transcripts

														
 
															-                else:

														
 
															-                    print(

														
 
															-                        "Error: Subtitle translation requires specified Google Translate API key. "

														
 
															-                        "See --help for further information."

														
 
															-                    )

														
 
															-                    return 1

														
 
															-

														
 
															-        except KeyboardInterrupt:

														
 
															-            pbar.finish()

														
 
															-            pool.terminate()

														
 
															-            pool.join()

														
 
															-            print("Cancelling transcription")

														
 
															-            raise

														
 
															-

														
 
															-    timed_subtitles = [(r, t) for r, t in zip(regions, transcripts) if t]

														
 
															-    formatter = FORMATTERS.get(subtitle_file_format)

														
 
															-    formatted_subtitles = formatter(timed_subtitles)

														
 
															-

														
 
															-    dest = output

														
 
															-

														
 
															-    if not dest:

														
 
															-        base = os.path.splitext(source_path)[0]

														
 
															-        dest = "{base}.{format}".format(base=base, format=subtitle_file_format)

														
 
															-

														
 
															-    with open(dest, 'wb') as output_file:

														
 
															-        output_file.write(formatted_subtitles.encode("utf-8"))

														
 
															-

														
 
															-    os.remove(audio_filename)

														
 
															-

														
 
															-    return dest

														
 
															-

														
 
															-

														
 
															-def validate(args):

														
 
															-    """

														
 
															-    Check that the CLI arguments passed to autosub are valid.

														
 
															-    """

														
 
															-    if args.format not in FORMATTERS:

														
 
															-        print(

														
 
															-            "Subtitle format not supported. "

														
 
															-            "Run with --list-formats to see all supported formats."

														
 
															-        )

														
 
															-        return False

														
 
															-

														
 
															-    if args.src_language not in LANGUAGE_CODES.keys():

														
 
															-        print(

														
 
															-            "Source language not supported. "

														
 
															-            "Run with --list-languages to see all supported languages."

														
 
															-        )

														
 
															-        return False

														
 
															-

														
 
															-    if args.dst_language not in LANGUAGE_CODES.keys():

														
 
															-        print(

														
 
															-            "Destination language not supported. "

														
 
															-            "Run with --list-languages to see all supported languages."

														
 
															-        )

														
 
															-        return False

														
 
															-

														
 
															-    if not args.source_path:

														
 
															-        print("Error: You need to specify a source path.")

														
 
															-        return False

														
 
															-

														
 
															-    return True

														
 
															-

														
 
															-

														
 
															-def main():

														
 
															-    """

														
 
															-    Run autosub as a command-line program.

														
 
															-    """

														
 
															-    parser = argparse.ArgumentParser()

														
 
															-    parser.add_argument('source_path', help="Path to the video or audio file to subtitle",

														
 
															-                        nargs='?')

														
 
															-    parser.add_argument('-C', '--concurrency', help="Number of concurrent API requests to make",

														
 
															-                        type=int, default=DEFAULT_CONCURRENCY)

														
 
															-    parser.add_argument('-o', '--output',

														
 
															-                        help="Output path for subtitles (by default, subtitles are saved in \

														
 
															-                        the same directory and name as the source path)")

														
 
															-    parser.add_argument('-F', '--format', help="Destination subtitle format",

														
 
															-                        default=DEFAULT_SUBTITLE_FORMAT)

														
 
															-    parser.add_argument('-S', '--src-language', help="Language spoken in source file",

														
 
															-                        default=DEFAULT_SRC_LANGUAGE)

														
 
															-    parser.add_argument('-D', '--dst-language', help="Desired language for the subtitles",

														
 
															-                        default=DEFAULT_DST_LANGUAGE)

														
 
															-    parser.add_argument('-K', '--api-key',

														
 
															-                        help="The Google Translate API key to be used. \

														
 
															-                        (Required for subtitle translation)")

														
 
															-    parser.add_argument('--list-formats', help="List all available subtitle formats",

														
 
															-                        action='store_true')

														
 
															-    parser.add_argument('--list-languages', help="List all available source/destination languages",

														
 
															-                        action='store_true')

														
 
															-

														
 
															-    args = parser.parse_args()

														
 
															-

														
 
															-    if args.list_formats:

														
 
															-        print("List of formats:")

														
 
															-        for subtitle_format in FORMATTERS:

														
 
															-            print("{format}".format(format=subtitle_format))

														
 
															-        return 0

														
 
															-

														
 
															-    if args.list_languages:

														
 
															-        print("List of all languages:")

														
 
															-        for code, language in sorted(LANGUAGE_CODES.items()):

														
 
															-            print("{code}\t{language}".format(code=code, language=language))

														
 
															-        return 0

														
 
															-

														
 
															-    if not validate(args):

														
 
															-        return 1

														
 
															-

														
 
															-    try:

														
 
															-        subtitle_file_path = generate_subtitles(

														
 
															-            source_path=args.source_path,

														
 
															-            concurrency=args.concurrency,

														
 
															-            src_language=args.src_language,

														
 
															-            dst_language=args.dst_language,

														
 
															-            api_key=args.api_key,

														
 
															-            subtitle_file_format=args.format,

														
 
															-            output=args.output,

														
 
															-        )

														
 
															-        print("Subtitles file created at {}".format(subtitle_file_path))

														
 
															-    except KeyboardInterrupt:

														
 
															-        return 1

														
 
															-

														
 
															-    return 0

														
 
															-

														
 
															-

														
 
															-if __name__ == '__main__':

														
 
															-    sys.exit(main())

														
 
															+"""
														
 
															+Defines autosub's main functionality.
														
 
															+"""
														
 
															+
														
 
															+#!/usr/bin/env python
														
 
															+
														
 
															+from __future__ import absolute_import, print_function, unicode_literals
														
 
															+
														
 
															+import argparse
														
 
															+import audioop
														
 
															+import json
														
 
															+import math
														
 
															+import multiprocessing
														
 
															+import os
														
 
															+import subprocess
														
 
															+import sys
														
 
															+import tempfile
														
 
															+import wave
														
 
															+
														
 
															+import requests
														
 
															+from googleapiclient.discovery import build
														
 
															+from progressbar import ProgressBar, Percentage, Bar, ETA
														
 
															+
														
 
															+from autosub.constants import (
														
 
															+    LANGUAGE_CODES, GOOGLE_SPEECH_API_KEY, GOOGLE_SPEECH_API_URL,
														
 
															+)
														
 
															+from autosub.formatters import FORMATTERS
														
 
															+
														
 
															+DEFAULT_SUBTITLE_FORMAT = 'srt'
														
 
															+DEFAULT_CONCURRENCY = 10
														
 
															+DEFAULT_SRC_LANGUAGE = 'en'
														
 
															+DEFAULT_DST_LANGUAGE = 'en'
														
 
															+
														
 
															+
														
 
															+def percentile(arr, percent):
														
 
															+    """
														
 
															+    Calculate the given percentile of arr.
														
 
															+    """
														
 
															+    arr = sorted(arr)
														
 
															+    index = (len(arr) - 1) * percent
														
 
															+    floor = math.floor(index)
														
 
															+    ceil = math.ceil(index)
														
 
															+    if floor == ceil:
														
 
															+        return arr[int(index)]
														
 
															+    low_value = arr[int(floor)] * (ceil - index)
														
 
															+    high_value = arr[int(ceil)] * (index - floor)
														
 
															+    return low_value + high_value
														
 
															+
														
 
															+
														
 
															+class FLACConverter(object): # pylint: disable=too-few-public-methods
														
 
															+    """
														
 
															+    Class for converting a region of an input audio or video file into a FLAC audio file
														
 
															+    """
														
 
															+    def __init__(self, source_path, include_before=0.25, include_after=0.25):
														
 
															+        self.source_path = source_path
														
 
															+        self.include_before = include_before
														
 
															+        self.include_after = include_after
														
 
															+
														
 
															+    def __call__(self, region):
														
 
															+        try:
														
 
															+            start, end = region
														
 
															+            start = max(0, start - self.include_before)
														
 
															+            end += self.include_after
														
 
															+            temp = tempfile.NamedTemporaryFile(suffix='.flac')
														
 
															+            command = ["ffmpeg", "-ss", str(start), "-t", str(end - start),
														
 
															+                       "-y", "-i", self.source_path,
														
 
															+                       "-loglevel", "error", temp.name]
														
 
															+            use_shell = True if os.name == "nt" else False
														
 
															+            subprocess.check_output(command, stdin=open(os.devnull), shell=use_shell)
														
 
															+            return temp.read()
														
 
															+
														
 
															+        except KeyboardInterrupt:
														
 
															+            return None
														
 
															+
														
 
															+
														
 
															+class SpeechRecognizer(object): # pylint: disable=too-few-public-methods
														
 
															+    """
														
 
															+    Class for performing speech-to-text for an input FLAC file.
														
 
															+    """
														
 
															+    def __init__(self, language="en", rate=44100, retries=3, api_key=GOOGLE_SPEECH_API_KEY):
														
 
															+        self.language = language
														
 
															+        self.rate = rate
														
 
															+        self.api_key = api_key
														
 
															+        self.retries = retries
														
 
															+
														
 
															+    def __call__(self, data):
														
 
															+        try:
														
 
															+            for _ in range(self.retries):
														
 
															+                url = GOOGLE_SPEECH_API_URL.format(lang=self.language, key=self.api_key)
														
 
															+                headers = {"Content-Type": "audio/x-flac; rate=%d" % self.rate}
														
 
															+
														
 
															+                try:
														
 
															+                    resp = requests.post(url, data=data, headers=headers)
														
 
															+                except requests.exceptions.ConnectionError:
														
 
															+                    continue
														
 
															+
														
 
															+                for line in resp.content.decode('utf-8').split("\n"):
														
 
															+                    try:
														
 
															+                        line = json.loads(line)
														
 
															+                        line = line['result'][0]['alternative'][0]['transcript']
														
 
															+                        return line[:1].upper() + line[1:]
														
 
															+                    except IndexError:
														
 
															+                        # no result
														
 
															+                        continue
														
 
															+
														
 
															+        except KeyboardInterrupt:
														
 
															+            return None
														
 
															+
														
 
															+
														
 
															+class Translator(object): # pylint: disable=too-few-public-methods
														
 
															+    """
														
 
															+    Class for translating a sentence from a one language to another.
														
 
															+    """
														
 
															+    def __init__(self, language, api_key, src, dst):
														
 
															+        self.language = language
														
 
															+        self.api_key = api_key
														
 
															+        self.service = build('translate', 'v2',
														
 
															+                             developerKey=self.api_key)
														
 
															+        self.src = src
														
 
															+        self.dst = dst
														
 
															+
														
 
															+    def __call__(self, sentence):
														
 
															+        try:
														
 
															+            if not sentence:
														
 
															+                return None
														
 
															+
														
 
															+            result = self.service.translations().list( # pylint: disable=no-member
														
 
															+                source=self.src,
														
 
															+                target=self.dst,
														
 
															+                q=[sentence]
														
 
															+            ).execute()
														
 
															+
														
 
															+            if 'translations' in result and result['translations'] and \
														
 
															+                'translatedText' in result['translations'][0]:
														
 
															+                return result['translations'][0]['translatedText']
														
 
															+
														
 
															+            return None
														
 
															+
														
 
															+        except KeyboardInterrupt:
														
 
															+            return None
														
 
															+
														
 
															+
														
 
															+def which(program):
														
 
															+    """
														
 
															+    Return the path for a given executable.
														
 
															+    """
														
 
															+    def is_exe(file_path):
														
 
															+        """
														
 
															+        Checks whether a file is executable.
														
 
															+        """
														
 
															+        return os.path.isfile(file_path) and os.access(file_path, os.X_OK)
														
 
															+
														
 
															+    fpath, _ = os.path.split(program)
														
 
															+    if fpath:
														
 
															+        if is_exe(program):
														
 
															+            return program
														
 
															+    else:
														
 
															+        for path in os.environ["PATH"].split(os.pathsep):
														
 
															+            path = path.strip('"')
														
 
															+            exe_file = os.path.join(path, program)
														
 
															+            if is_exe(exe_file):
														
 
															+                return exe_file
														
 
															+    return None
														
 
															+
														
 
															+
														
 
															+def extract_audio(filename, channels=1, rate=16000):
														
 
															+    """
														
 
															+    Extract audio from an input file to a temporary WAV file.
														
 
															+    """
														
 
															+    temp = tempfile.NamedTemporaryFile(suffix='.wav', delete=False)
														
 
															+    if not os.path.isfile(filename):
														
 
															+        print("The given file does not exist: {}".format(filename))
														
 
															+        raise Exception("Invalid filepath: {}".format(filename))
														
 
															+    if not which("ffmpeg"):
														
 
															+        print("ffmpeg: Executable not found on machine.")
														
 
															+        raise Exception("Dependency not found: ffmpeg")
														
 
															+    command = ["ffmpeg", "-y", "-i", filename,
														
 
															+               "-ac", str(channels), "-ar", str(rate),
														
 
															+               "-loglevel", "error", temp.name]
														
 
															+    use_shell = True if os.name == "nt" else False
														
 
															+    subprocess.check_output(command, stdin=open(os.devnull), shell=use_shell)
														
 
															+    return temp.name, rate
														
 
															+
														
 
															+
														
 
															+def find_speech_regions(filename, frame_width=4096, min_region_size=0.5, max_region_size=6): # pylint: disable=too-many-locals
														
 
															+    """
														
 
															+    Perform voice activity detection on a given audio file.
														
 
															+    """
														
 
															+    reader = wave.open(filename)
														
 
															+    sample_width = reader.getsampwidth()
														
 
															+    rate = reader.getframerate()
														
 
															+    n_channels = reader.getnchannels()
														
 
															+    chunk_duration = float(frame_width) / rate
														
 
															+
														
 
															+    n_chunks = int(math.ceil(reader.getnframes()*1.0 / frame_width))
														
 
															+    energies = []
														
 
															+
														
 
															+    for _ in range(n_chunks):
														
 
															+        chunk = reader.readframes(frame_width)
														
 
															+        energies.append(audioop.rms(chunk, sample_width * n_channels))
														
 
															+
														
 
															+    threshold = percentile(energies, 0.2)
														
 
															+
														
 
															+    elapsed_time = 0
														
 
															+
														
 
															+    regions = []
														
 
															+    region_start = None
														
 
															+
														
 
															+    for energy in energies:
														
 
															+        is_silence = energy <= threshold
														
 
															+        max_exceeded = region_start and elapsed_time - region_start >= max_region_size
														
 
															+
														
 
															+        if (max_exceeded or is_silence) and region_start:
														
 
															+            if elapsed_time - region_start >= min_region_size:
														
 
															+                regions.append((region_start, elapsed_time))
														
 
															+                region_start = None
														
 
															+
														
 
															+        elif (not region_start) and (not is_silence):
														
 
															+            region_start = elapsed_time
														
 
															+        elapsed_time += chunk_duration
														
 
															+    return regions
														
 
															+
														
 
															+
														
 
															+def generate_subtitles( # pylint: disable=too-many-locals,too-many-arguments
														
 
															+        source_path,
														
 
															+        output=None,
														
 
															+        concurrency=DEFAULT_CONCURRENCY,
														
 
															+        src_language=DEFAULT_SRC_LANGUAGE,
														
 
															+        dst_language=DEFAULT_DST_LANGUAGE,
														
 
															+        subtitle_file_format=DEFAULT_SUBTITLE_FORMAT,
														
 
															+        api_key=None,
														
 
															+    ):
														
 
															+    """
														
 
															+    Given an input audio/video file, generate subtitles in the specified language and format.
														
 
															+    """
														
 
															+    audio_filename, audio_rate = extract_audio(source_path)
														
 
															+
														
 
															+    regions = find_speech_regions(audio_filename)
														
 
															+
														
 
															+    pool = multiprocessing.Pool(concurrency)
														
 
															+    converter = FLACConverter(source_path=audio_filename)
														
 
															+    recognizer = SpeechRecognizer(language=src_language, rate=audio_rate,
														
 
															+                                  api_key=GOOGLE_SPEECH_API_KEY)
														
 
															+
														
 
															+    transcripts = []
														
 
															+    if regions:
														
 
															+        try:
														
 
															+            widgets = ["Converting speech regions to FLAC files: ", Percentage(), ' ', Bar(), ' ',
														
 
															+                       ETA()]
														
 
															+            pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start()
														
 
															+            extracted_regions = []
														
 
															+            for i, extracted_region in enumerate(pool.imap(converter, regions)):
														
 
															+                extracted_regions.append(extracted_region)
														
 
															+                pbar.update(i)
														
 
															+            pbar.finish()
														
 
															+
														
 
															+            widgets = ["Performing speech recognition: ", Percentage(), ' ', Bar(), ' ', ETA()]
														
 
															+            pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start()
														
 
															+
														
 
															+            for i, transcript in enumerate(pool.imap(recognizer, extracted_regions)):
														
 
															+                transcripts.append(transcript)
														
 
															+                pbar.update(i)
														
 
															+            pbar.finish()
														
 
															+
														
 
															+            if src_language.split("-")[0] != dst_language.split("-")[0]:
														
 
															+                if api_key:
														
 
															+                    google_translate_api_key = api_key
														
 
															+                    translator = Translator(dst_language, google_translate_api_key,
														
 
															+                                            dst=dst_language,
														
 
															+                                            src=src_language)
														
 
															+                    prompt = "Translating from {0} to {1}: ".format(src_language, dst_language)
														
 
															+                    widgets = [prompt, Percentage(), ' ', Bar(), ' ', ETA()]
														
 
															+                    pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start()
														
 
															+                    translated_transcripts = []
														
 
															+                    for i, transcript in enumerate(pool.imap(translator, transcripts)):
														
 
															+                        translated_transcripts.append(transcript)
														
 
															+                        pbar.update(i)
														
 
															+                    pbar.finish()
														
 
															+                    transcripts = translated_transcripts
														
 
															+                else:
														
 
															+                    print(
														
 
															+                        "Error: Subtitle translation requires specified Google Translate API key. "
														
 
															+                        "See --help for further information."
														
 
															+                    )
														
 
															+                    return 1
														
 
															+
														
 
															+        except KeyboardInterrupt:
														
 
															+            pbar.finish()
														
 
															+            pool.terminate()
														
 
															+            pool.join()
														
 
															+            print("Cancelling transcription")
														
 
															+            raise
														
 
															+
														
 
															+    timed_subtitles = [(r, t) for r, t in zip(regions, transcripts) if t]
														
 
															+    formatter = FORMATTERS.get(subtitle_file_format)
														
 
															+    formatted_subtitles = formatter(timed_subtitles)
														
 
															+
														
 
															+    dest = output
														
 
															+
														
 
															+    if not dest:
														
 
															+        base = os.path.splitext(source_path)[0]
														
 
															+        dest = "{base}.{format}".format(base=base, format=subtitle_file_format)
														
 
															+
														
 
															+    with open(dest, 'wb') as output_file:
														
 
															+        output_file.write(formatted_subtitles.encode("utf-8"))
														
 
															+
														
 
															+    os.remove(audio_filename)
														
 
															+
														
 
															+    return dest
														
 
															+
														
 
															+
														
 
															+def validate(args):
														
 
															+    """
														
 
															+    Check that the CLI arguments passed to autosub are valid.
														
 
															+    """
														
 
															+    if args.format not in FORMATTERS:
														
 
															+        print(
														
 
															+            "Subtitle format not supported. "
														
 
															+            "Run with --list-formats to see all supported formats."
														
 
															+        )
														
 
															+        return False
														
 
															+
														
 
															+    if args.src_language not in LANGUAGE_CODES.keys():
														
 
															+        print(
														
 
															+            "Source language not supported. "
														
 
															+            "Run with --list-languages to see all supported languages."
														
 
															+        )
														
 
															+        return False
														
 
															+
														
 
															+    if args.dst_language not in LANGUAGE_CODES.keys():
														
 
															+        print(
														
 
															+            "Destination language not supported. "
														
 
															+            "Run with --list-languages to see all supported languages."
														
 
															+        )
														
 
															+        return False
														
 
															+
														
 
															+    if not args.source_path:
														
 
															+        print("Error: You need to specify a source path.")
														
 
															+        return False
														
 
															+
														
 
															+    return True
														
 
															+
														
 
															+
														
 
															+def main():
														
 
															+    """
														
 
															+    Run autosub as a command-line program.
														
 
															+    """
														
 
															+    parser = argparse.ArgumentParser()
														
 
															+    parser.add_argument('source_path', help="Path to the video or audio file to subtitle",
														
 
															+                        nargs='?')
														
 
															+    parser.add_argument('-C', '--concurrency', help="Number of concurrent API requests to make",
														
 
															+                        type=int, default=DEFAULT_CONCURRENCY)
														
 
															+    parser.add_argument('-o', '--output',
														
 
															+                        help="Output path for subtitles (by default, subtitles are saved in \
														
 
															+                        the same directory and name as the source path)")
														
 
															+    parser.add_argument('-F', '--format', help="Destination subtitle format",
														
 
															+                        default=DEFAULT_SUBTITLE_FORMAT)
														
 
															+    parser.add_argument('-S', '--src-language', help="Language spoken in source file",
														
 
															+                        default=DEFAULT_SRC_LANGUAGE)
														
 
															+    parser.add_argument('-D', '--dst-language', help="Desired language for the subtitles",
														
 
															+                        default=DEFAULT_DST_LANGUAGE)
														
 
															+    parser.add_argument('-K', '--api-key',
														
 
															+                        help="The Google Translate API key to be used. \
														
 
															+                        (Required for subtitle translation)")
														
 
															+    parser.add_argument('--list-formats', help="List all available subtitle formats",
														
 
															+                        action='store_true')
														
 
															+    parser.add_argument('--list-languages', help="List all available source/destination languages",
														
 
															+                        action='store_true')
														
 
															+
														
 
															+    args = parser.parse_args()
														
 
															+
														
 
															+    if args.list_formats:
														
 
															+        print("List of formats:")
														
 
															+        for subtitle_format in FORMATTERS:
														
 
															+            print("{format}".format(format=subtitle_format))
														
 
															+        return 0
														
 
															+
														
 
															+    if args.list_languages:
														
 
															+        print("List of all languages:")
														
 
															+        for code, language in sorted(LANGUAGE_CODES.items()):
														
 
															+            print("{code}\t{language}".format(code=code, language=language))
														
 
															+        return 0
														
 
															+
														
 
															+    if not validate(args):
														
 
															+        return 1
														
 
															+
														
 
															+    try:
														
 
															+        subtitle_file_path = generate_subtitles(
														
 
															+            source_path=args.source_path,
														
 
															+            concurrency=args.concurrency,
														
 
															+            src_language=args.src_language,
														
 
															+            dst_language=args.dst_language,
														
 
															+            api_key=args.api_key,
														
 
															+            subtitle_file_format=args.format,
														
 
															+            output=args.output,
														
 
															+        )
														
 
															+        print("Subtitles file created at {}".format(subtitle_file_path))
														
 
															+    except KeyboardInterrupt:
														
 
															+        return 1
														
 
															+
														
 
															+    return 0
														
 
															+
														
 
															+
														
 
															+if __name__ == '__main__':
														
 
															+    sys.exit(main())
														
--- a/composer/autosub/__init__.py
+++ b/composer/autosub/__init__.py
@@ -1,434 +1,434 @@
 
															-"""

														
 
															-Defines autosub's main functionality.

														
 
															-"""

														
 
															-

														
 
															-#!/usr/bin/env python

														
 
															-

														
 
															-from __future__ import absolute_import, print_function, unicode_literals

														
 
															-

														
 
															-import argparse

														
 
															-import audioop

														
 
															-import math

														
 
															-import multiprocessing

														
 
															-import os

														
 
															-from json import JSONDecodeError

														
 
															-import subprocess

														
 
															-import sys

														
 
															-import tempfile

														
 
															-import wave

														
 
															-

														
 
															-import json

														
 
															-import requests

														
 
															-try:

														
 
															-    from json.decoder import JSONDecodeError

														
 
															-except ImportError:

														
 
															-    JSONDecodeError = ValueError

														
 
															-

														
 
															-from googleapiclient.discovery import build

														
 
															-from progressbar import ProgressBar, Percentage, Bar, ETA

														
 
															-

														
 
															-from autosub.constants import (

														
 
															-    LANGUAGE_CODES, GOOGLE_SPEECH_API_KEY, GOOGLE_SPEECH_API_URL,

														
 
															-)

														
 
															-from autosub.formatters import FORMATTERS

														
 
															-

														
 
															-DEFAULT_SUBTITLE_FORMAT = 'srt'

														
 
															-DEFAULT_CONCURRENCY = 10

														
 
															-DEFAULT_SRC_LANGUAGE = 'en'

														
 
															-DEFAULT_DST_LANGUAGE = 'en'

														
 
															-

														
 
															-

														
 
															-def percentile(arr, percent):

														
 
															-    """

														
 
															-    Calculate the given percentile of arr.

														
 
															-    """

														
 
															-    arr = sorted(arr)

														
 
															-    index = (len(arr) - 1) * percent

														
 
															-    floor = math.floor(index)

														
 
															-    ceil = math.ceil(index)

														
 
															-    if floor == ceil:

														
 
															-        return arr[int(index)]

														
 
															-    low_value = arr[int(floor)] * (ceil - index)

														
 
															-    high_value = arr[int(ceil)] * (index - floor)

														
 
															-    return low_value + high_value

														
 
															-

														
 
															-

														
 
															-class FLACConverter(object): # pylint: disable=too-few-public-methods

														
 
															-    """

														
 
															-    Class for converting a region of an input audio or video file into a FLAC audio file

														
 
															-    """

														
 
															-    def __init__(self, source_path, include_before=0.25, include_after=0.25):

														
 
															-        self.source_path = source_path

														
 
															-        self.include_before = include_before

														
 
															-        self.include_after = include_after

														
 
															-

														
 
															-    def __call__(self, region):

														
 
															-        try:

														
 
															-            start, end = region

														
 
															-            start = max(0, start - self.include_before)

														
 
															-            end += self.include_after

														
 
															-            #delete=False necessary for running on Windows

														
 
															-            temp = tempfile.NamedTemporaryFile(suffix='.flac', delete=False)

														
 
															-            program_ffmpeg = which("ffmpeg")

														
 
															-            command = [str(program_ffmpeg), "-ss", str(start), "-t", str(end - start),

														
 
															-                       "-y", "-i", self.source_path,

														
 
															-                       "-loglevel", "error", temp.name]

														
 
															-            use_shell = True if os.name == "nt" else False

														
 
															-            subprocess.check_output(command, stdin=open(os.devnull), shell=use_shell)

														
 
															-            read_data = temp.read()

														
 
															-            temp.close()

														
 
															-            os.unlink(temp.name)

														
 
															-            return read_data

														
 
															-

														
 
															-        except KeyboardInterrupt:

														
 
															-            return None

														
 
															-

														
 
															-

														
 
															-class SpeechRecognizer(object): # pylint: disable=too-few-public-methods

														
 
															-    """

														
 
															-    Class for performing speech-to-text for an input FLAC file.

														
 
															-    """

														
 
															-    def __init__(self, language="en", rate=44100, retries=3, api_key=GOOGLE_SPEECH_API_KEY):

														
 
															-        self.language = language

														
 
															-        self.rate = rate

														
 
															-        self.api_key = api_key

														
 
															-        self.retries = retries

														
 
															-

														
 
															-    def __call__(self, data):

														
 
															-        try:

														
 
															-            for _ in range(self.retries):

														
 
															-                url = GOOGLE_SPEECH_API_URL.format(lang=self.language, key=self.api_key)

														
 
															-                headers = {"Content-Type": "audio/x-flac; rate=%d" % self.rate}

														
 
															-

														
 
															-                try:

														
 
															-                    resp = requests.post(url, data=data, headers=headers)

														
 
															-                except requests.exceptions.ConnectionError:

														
 
															-                    continue

														
 
															-

														
 
															-                for line in resp.content.decode('utf-8').split("\n"):

														
 
															-                    try:

														
 
															-                        line = json.loads(line)

														
 
															-                        line = line['result'][0]['alternative'][0]['transcript']

														
 
															-                        return line[:1].upper() + line[1:]

														
 
															-                    except IndexError:

														
 
															-                        # no result

														
 
															-                        continue

														
 
															-                    except JSONDecodeError:

														
 
															-                        continue

														
 
															-

														
 
															-        except KeyboardInterrupt:

														
 
															-            return None

														
 
															-

														
 
															-

														
 
															-class Translator(object): # pylint: disable=too-few-public-methods

														
 
															-    """

														
 
															-    Class for translating a sentence from a one language to another.

														
 
															-    """

														
 
															-    def __init__(self, language, api_key, src, dst):

														
 
															-        self.language = language

														
 
															-        self.api_key = api_key

														
 
															-        self.service = build('translate', 'v2',

														
 
															-                             developerKey=self.api_key)

														
 
															-        self.src = src

														
 
															-        self.dst = dst

														
 
															-

														
 
															-    def __call__(self, sentence):

														
 
															-        try:

														
 
															-            if not sentence:

														
 
															-                return None

														
 
															-

														
 
															-            result = self.service.translations().list( # pylint: disable=no-member

														
 
															-                source=self.src,

														
 
															-                target=self.dst,

														
 
															-                q=[sentence]

														
 
															-            ).execute()

														
 
															-

														
 
															-            if 'translations' in result and result['translations'] and \

														
 
															-                'translatedText' in result['translations'][0]:

														
 
															-                return result['translations'][0]['translatedText']

														
 
															-

														
 
															-            return None

														
 
															-

														
 
															-        except KeyboardInterrupt:

														
 
															-            return None

														
 
															-

														
 
															-

														
 
															-def which(program):

														
 
															-    """

														
 
															-    Return the path for a given executable.

														
 
															-    """

														
 
															-    def is_exe(file_path):

														
 
															-        """

														
 
															-        Checks whether a file is executable.

														
 
															-        """

														
 
															-        return os.path.isfile(file_path) and os.access(file_path, os.X_OK)

														
 
															-    #necessary to run on Windows

														
 
															-    if os.name == "nt":

														
 
															-        program += ".exe"

														
 
															-    fpath, _ = os.path.split(program)

														
 
															-    if fpath:

														
 
															-        if is_exe(program):

														
 
															-            return program

														
 
															-    else:

														
 
															-        #looks for file in the script execution folder before checking on system path

														
 
															-        current_dir = os.getcwd()

														
 
															-        local_program = os.path.join(current_dir, program)

														
 
															-        if is_exe(local_program):

														
 
															-            return local_program

														
 
															-        else:

														
 
															-            for path in os.environ["PATH"].split(os.pathsep):

														
 
															-                path = path.strip('"')

														
 
															-                exe_file = os.path.join(path, program)

														
 
															-                if is_exe(exe_file):

														
 
															-                    return exe_file

														
 
															-    return None

														
 
															-

														
 
															-

														
 
															-def extract_audio(filename, channels=1, rate=16000):

														
 
															-    """

														
 
															-    Extract audio from an input file to a temporary WAV file.

														
 
															-    """

														
 
															-    temp = tempfile.NamedTemporaryFile(suffix='.wav', delete=False)

														
 
															-    if not os.path.isfile(filename):

														
 
															-        print("The given file does not exist: {}".format(filename))

														
 
															-        raise Exception("Invalid filepath: {}".format(filename))

														
 
															-    program_ffmpeg = which("ffmpeg")

														
 
															-    if not program_ffmpeg:

														
 
															-        print("ffmpeg: Executable not found on machine.")

														
 
															-        raise Exception("Dependency not found: ffmpeg")

														
 
															-    command = [str(program_ffmpeg), "-y", "-i", filename,

														
 
															-               "-ac", str(channels), "-ar", str(rate),

														
 
															-               "-loglevel", "error", temp.name]

														
 
															-    use_shell = True if os.name == "nt" else False

														
 
															-    subprocess.check_output(command, stdin=open(os.devnull), shell=use_shell)

														
 
															-    return temp.name, rate

														
 
															-

														
 
															-

														
 
															-def find_speech_regions(filename, frame_width=4096, min_region_size=0.5, max_region_size=6): # pylint: disable=too-many-locals

														
 
															-    """

														
 
															-    Perform voice activity detection on a given audio file.

														
 
															-    """

														
 
															-    reader = wave.open(filename)

														
 
															-    sample_width = reader.getsampwidth()

														
 
															-    rate = reader.getframerate()

														
 
															-    n_channels = reader.getnchannels()

														
 
															-    chunk_duration = float(frame_width) / rate

														
 
															-

														
 
															-    n_chunks = int(math.ceil(reader.getnframes()*1.0 / frame_width))

														
 
															-    energies = []

														
 
															-

														
 
															-    for _ in range(n_chunks):

														
 
															-        chunk = reader.readframes(frame_width)

														
 
															-        energies.append(audioop.rms(chunk, sample_width * n_channels))

														
 
															-

														
 
															-    threshold = percentile(energies, 0.2)

														
 
															-

														
 
															-    elapsed_time = 0

														
 
															-

														
 
															-    regions = []

														
 
															-    region_start = None

														
 
															-

														
 
															-    for energy in energies:

														
 
															-        is_silence = energy <= threshold

														
 
															-        max_exceeded = region_start and elapsed_time - region_start >= max_region_size

														
 
															-

														
 
															-        if (max_exceeded or is_silence) and region_start:

														
 
															-            if elapsed_time - region_start >= min_region_size:

														
 
															-                regions.append((region_start, elapsed_time))

														
 
															-                region_start = None

														
 
															-

														
 
															-        elif (not region_start) and (not is_silence):

														
 
															-            region_start = elapsed_time

														
 
															-        elapsed_time += chunk_duration

														
 
															-    return regions

														
 
															-

														
 
															-

														
 
															-def generate_subtitles( # pylint: disable=too-many-locals,too-many-arguments

														
 
															-        source_path,

														
 
															-        output=None,

														
 
															-        concurrency=DEFAULT_CONCURRENCY,

														
 
															-        src_language=DEFAULT_SRC_LANGUAGE,

														
 
															-        dst_language=DEFAULT_DST_LANGUAGE,

														
 
															-        subtitle_file_format=DEFAULT_SUBTITLE_FORMAT,

														
 
															-        api_key=None,

														
 
															-    ):

														
 
															-    """

														
 
															-    Given an input audio/video file, generate subtitles in the specified language and format.

														
 
															-    """

														
 
															-

														
 
															-    if os.name != "nt" and "Darwin" in os.uname():

														
 
															-        #the default unix fork method does not work on Mac OS

														
 
															-        #need to use forkserver

														
 
															-        if 'forkserver' != multiprocessing.get_start_method(allow_none=True):

														
 
															-            multiprocessing.set_start_method('forkserver')

														
 
															-

														
 
															-    audio_filename, audio_rate = extract_audio(source_path)

														
 
															-

														
 
															-    regions = find_speech_regions(audio_filename)

														
 
															-

														
 
															-    pool = multiprocessing.Pool(concurrency)

														
 
															-    converter = FLACConverter(source_path=audio_filename)

														
 
															-    recognizer = SpeechRecognizer(language=src_language, rate=audio_rate,

														
 
															-                                  api_key=GOOGLE_SPEECH_API_KEY)

														
 
															-

														
 
															-    transcripts = []

														
 
															-    if regions:

														
 
															-        try:

														
 
															-            widgets = ["Converting speech regions to FLAC files: ", Percentage(), ' ', Bar(), ' ',

														
 
															-                       ETA()]

														
 
															-            pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start()

														
 
															-            extracted_regions = []

														
 
															-            for i, extracted_region in enumerate(pool.imap(converter, regions)):

														
 
															-                extracted_regions.append(extracted_region)

														
 
															-                pbar.update(i)

														
 
															-            pbar.finish()

														
 
															-

														
 
															-            widgets = ["Performing speech recognition: ", Percentage(), ' ', Bar(), ' ', ETA()]

														
 
															-            pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start()

														
 
															-

														
 
															-            for i, transcript in enumerate(pool.imap(recognizer, extracted_regions)):

														
 
															-                transcripts.append(transcript)

														
 
															-                pbar.update(i)

														
 
															-            pbar.finish()

														
 
															-

														
 
															-            if src_language.split("-")[0] != dst_language.split("-")[0]:

														
 
															-                if api_key:

														
 
															-                    google_translate_api_key = api_key

														
 
															-                    translator = Translator(dst_language, google_translate_api_key,

														
 
															-                                            dst=dst_language,

														
 
															-                                            src=src_language)

														
 
															-                    prompt = "Translating from {0} to {1}: ".format(src_language, dst_language)

														
 
															-                    widgets = [prompt, Percentage(), ' ', Bar(), ' ', ETA()]

														
 
															-                    pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start()

														
 
															-                    translated_transcripts = []

														
 
															-                    for i, transcript in enumerate(pool.imap(translator, transcripts)):

														
 
															-                        translated_transcripts.append(transcript)

														
 
															-                        pbar.update(i)

														
 
															-                    pbar.finish()

														
 
															-                    transcripts = translated_transcripts

														
 
															-                else:

														
 
															-                    print(

														
 
															-                        "Error: Subtitle translation requires specified Google Translate API key. "

														
 
															-                        "See --help for further information."

														
 
															-                    )

														
 
															-                    return 1

														
 
															-

														
 
															-        except KeyboardInterrupt:

														
 
															-            pbar.finish()

														
 
															-            pool.terminate()

														
 
															-            pool.join()

														
 
															-            print("Cancelling transcription")

														
 
															-            raise

														
 
															-

														
 
															-    timed_subtitles = [(r, t) for r, t in zip(regions, transcripts) if t]

														
 
															-    formatter = FORMATTERS.get(subtitle_file_format)

														
 
															-    formatted_subtitles = formatter(timed_subtitles)

														
 
															-

														
 
															-    dest = output

														
 
															-

														
 
															-    if not dest:

														
 
															-        base = os.path.splitext(source_path)[0]

														
 
															-        dest = "{base}.{format}".format(base=base, format=subtitle_file_format)

														
 
															-

														
 
															-    with open(dest, 'wb') as output_file:

														
 
															-        output_file.write(formatted_subtitles.encode("utf-8"))

														
 
															-

														
 
															-    os.remove(audio_filename)

														
 
															-

														
 
															-    return dest

														
 
															-

														
 
															-

														
 
															-def validate(args):

														
 
															-    """

														
 
															-    Check that the CLI arguments passed to autosub are valid.

														
 
															-    """

														
 
															-    if args.format not in FORMATTERS:

														
 
															-        print(

														
 
															-            "Subtitle format not supported. "

														
 
															-            "Run with --list-formats to see all supported formats."

														
 
															-        )

														
 
															-        return False

														
 
															-

														
 
															-    if args.src_language not in LANGUAGE_CODES.keys():

														
 
															-        print(

														
 
															-            "Source language not supported. "

														
 
															-            "Run with --list-languages to see all supported languages."

														
 
															-        )

														
 
															-        return False

														
 
															-

														
 
															-    if args.dst_language not in LANGUAGE_CODES.keys():

														
 
															-        print(

														
 
															-            "Destination language not supported. "

														
 
															-            "Run with --list-languages to see all supported languages."

														
 
															-        )

														
 
															-        return False

														
 
															-

														
 
															-    if not args.source_path:

														
 
															-        print("Error: You need to specify a source path.")

														
 
															-        return False

														
 
															-

														
 
															-    return True

														
 
															-

														
 
															-

														
 
															-def main():

														
 
															-    """

														
 
															-    Run autosub as a command-line program.

														
 
															-    """

														
 
															-    parser = argparse.ArgumentParser()

														
 
															-    parser.add_argument('source_path', help="Path to the video or audio file to subtitle",

														
 
															-                        nargs='?')

														
 
															-    parser.add_argument('-C', '--concurrency', help="Number of concurrent API requests to make",

														
 
															-                        type=int, default=DEFAULT_CONCURRENCY)

														
 
															-    parser.add_argument('-o', '--output',

														
 
															-                        help="Output path for subtitles (by default, subtitles are saved in \

														
 
															-                        the same directory and name as the source path)")

														
 
															-    parser.add_argument('-F', '--format', help="Destination subtitle format",

														
 
															-                        default=DEFAULT_SUBTITLE_FORMAT)

														
 
															-    parser.add_argument('-S', '--src-language', help="Language spoken in source file",

														
 
															-                        default=DEFAULT_SRC_LANGUAGE)

														
 
															-    parser.add_argument('-D', '--dst-language', help="Desired language for the subtitles",

														
 
															-                        default=DEFAULT_DST_LANGUAGE)

														
 
															-    parser.add_argument('-K', '--api-key',

														
 
															-                        help="The Google Translate API key to be used. \

														
 
															-                        (Required for subtitle translation)")

														
 
															-    parser.add_argument('--list-formats', help="List all available subtitle formats",

														
 
															-                        action='store_true')

														
 
															-    parser.add_argument('--list-languages', help="List all available source/destination languages",

														
 
															-                        action='store_true')

														
 
															-

														
 
															-    args = parser.parse_args()

														
 
															-

														
 
															-    if args.list_formats:

														
 
															-        print("List of formats:")

														
 
															-        for subtitle_format in FORMATTERS:

														
 
															-            print("{format}".format(format=subtitle_format))

														
 
															-        return 0

														
 
															-

														
 
															-    if args.list_languages:

														
 
															-        print("List of all languages:")

														
 
															-        for code, language in sorted(LANGUAGE_CODES.items()):

														
 
															-            print("{code}\t{language}".format(code=code, language=language))

														
 
															-        return 0

														
 
															-

														
 
															-    if not validate(args):

														
 
															-        return 1

														
 
															-

														
 
															-    try:

														
 
															-        subtitle_file_path = generate_subtitles(

														
 
															-            source_path=args.source_path,

														
 
															-            concurrency=args.concurrency,

														
 
															-            src_language=args.src_language,

														
 
															-            dst_language=args.dst_language,

														
 
															-            api_key=args.api_key,

														
 
															-            subtitle_file_format=args.format,

														
 
															-            output=args.output,

														
 
															-        )

														
 
															-        print("Subtitles file created at {}".format(subtitle_file_path))

														
 
															-    except KeyboardInterrupt:

														
 
															-        return 1

														
 
															-

														
 
															-    return 0

														
 
															-

														
 
															-

														
 
															-if __name__ == '__main__':

														
 
															-    sys.exit(main())

														
 
															+"""
														
 
															+Defines autosub's main functionality.
														
 
															+"""
														
 
															+
														
 
															+#!/usr/bin/env python
														
 
															+
														
 
															+from __future__ import absolute_import, print_function, unicode_literals
														
 
															+
														
 
															+import argparse
														
 
															+import audioop
														
 
															+import math
														
 
															+import multiprocessing
														
 
															+import os
														
 
															+from json import JSONDecodeError
														
 
															+import subprocess
														
 
															+import sys
														
 
															+import tempfile
														
 
															+import wave
														
 
															+
														
 
															+import json
														
 
															+import requests
														
 
															+try:
														
 
															+    from json.decoder import JSONDecodeError
														
 
															+except ImportError:
														
 
															+    JSONDecodeError = ValueError
														
 
															+
														
 
															+from googleapiclient.discovery import build
														
 
															+from progressbar import ProgressBar, Percentage, Bar, ETA
														
 
															+
														
 
															+from autosub.constants import (
														
 
															+    LANGUAGE_CODES, GOOGLE_SPEECH_API_KEY, GOOGLE_SPEECH_API_URL,
														
 
															+)
														
 
															+from autosub.formatters import FORMATTERS
														
 
															+
														
 
															+DEFAULT_SUBTITLE_FORMAT = 'srt'
														
 
															+DEFAULT_CONCURRENCY = 10
														
 
															+DEFAULT_SRC_LANGUAGE = 'en'
														
 
															+DEFAULT_DST_LANGUAGE = 'en'
														
 
															+
														
 
															+
														
 
															+def percentile(arr, percent):
														
 
															+    """
														
 
															+    Calculate the given percentile of arr.
														
 
															+    """
														
 
															+    arr = sorted(arr)
														
 
															+    index = (len(arr) - 1) * percent
														
 
															+    floor = math.floor(index)
														
 
															+    ceil = math.ceil(index)
														
 
															+    if floor == ceil:
														
 
															+        return arr[int(index)]
														
 
															+    low_value = arr[int(floor)] * (ceil - index)
														
 
															+    high_value = arr[int(ceil)] * (index - floor)
														
 
															+    return low_value + high_value
														
 
															+
														
 
															+
														
 
															+class FLACConverter(object): # pylint: disable=too-few-public-methods
														
 
															+    """
														
 
															+    Class for converting a region of an input audio or video file into a FLAC audio file
														
 
															+    """
														
 
															+    def __init__(self, source_path, include_before=0.25, include_after=0.25):
														
 
															+        self.source_path = source_path
														
 
															+        self.include_before = include_before
														
 
															+        self.include_after = include_after
														
 
															+
														
 
															+    def __call__(self, region):
														
 
															+        try:
														
 
															+            start, end = region
														
 
															+            start = max(0, start - self.include_before)
														
 
															+            end += self.include_after
														
 
															+            #delete=False necessary for running on Windows
														
 
															+            temp = tempfile.NamedTemporaryFile(suffix='.flac', delete=False)
														
 
															+            program_ffmpeg = which("ffmpeg")
														
 
															+            command = [str(program_ffmpeg), "-ss", str(start), "-t", str(end - start),
														
 
															+                       "-y", "-i", self.source_path,
														
 
															+                       "-loglevel", "error", temp.name]
														
 
															+            use_shell = True if os.name == "nt" else False
														
 
															+            subprocess.check_output(command, stdin=open(os.devnull), shell=use_shell)
														
 
															+            read_data = temp.read()
														
 
															+            temp.close()
														
 
															+            os.unlink(temp.name)
														
 
															+            return read_data
														
 
															+
														
 
															+        except KeyboardInterrupt:
														
 
															+            return None
														
 
															+
														
 
															+
														
 
															+class SpeechRecognizer(object): # pylint: disable=too-few-public-methods
														
 
															+    """
														
 
															+    Class for performing speech-to-text for an input FLAC file.
														
 
															+    """
														
 
															+    def __init__(self, language="en", rate=44100, retries=3, api_key=GOOGLE_SPEECH_API_KEY):
														
 
															+        self.language = language
														
 
															+        self.rate = rate
														
 
															+        self.api_key = api_key
														
 
															+        self.retries = retries
														
 
															+
														
 
															+    def __call__(self, data):
														
 
															+        try:
														
 
															+            for _ in range(self.retries):
														
 
															+                url = GOOGLE_SPEECH_API_URL.format(lang=self.language, key=self.api_key)
														
 
															+                headers = {"Content-Type": "audio/x-flac; rate=%d" % self.rate}
														
 
															+
														
 
															+                try:
														
 
															+                    resp = requests.post(url, data=data, headers=headers)
														
 
															+                except requests.exceptions.ConnectionError:
														
 
															+                    continue
														
 
															+
														
 
															+                for line in resp.content.decode('utf-8').split("\n"):
														
 
															+                    try:
														
 
															+                        line = json.loads(line)
														
 
															+                        line = line['result'][0]['alternative'][0]['transcript']
														
 
															+                        return line[:1].upper() + line[1:]
														
 
															+                    except IndexError:
														
 
															+                        # no result
														
 
															+                        continue
														
 
															+                    except JSONDecodeError:
														
 
															+                        continue
														
 
															+
														
 
															+        except KeyboardInterrupt:
														
 
															+            return None
														
 
															+
														
 
															+
														
 
															+class Translator(object): # pylint: disable=too-few-public-methods
														
 
															+    """
														
 
															+    Class for translating a sentence from a one language to another.
														
 
															+    """
														
 
															+    def __init__(self, language, api_key, src, dst):
														
 
															+        self.language = language
														
 
															+        self.api_key = api_key
														
 
															+        self.service = build('translate', 'v2',
														
 
															+                             developerKey=self.api_key)
														
 
															+        self.src = src
														
 
															+        self.dst = dst
														
 
															+
														
 
															+    def __call__(self, sentence):
														
 
															+        try:
														
 
															+            if not sentence:
														
 
															+                return None
														
 
															+
														
 
															+            result = self.service.translations().list( # pylint: disable=no-member
														
 
															+                source=self.src,
														
 
															+                target=self.dst,
														
 
															+                q=[sentence]
														
 
															+            ).execute()
														
 
															+
														
 
															+            if 'translations' in result and result['translations'] and \
														
 
															+                'translatedText' in result['translations'][0]:
														
 
															+                return result['translations'][0]['translatedText']
														
 
															+
														
 
															+            return None
														
 
															+
														
 
															+        except KeyboardInterrupt:
														
 
															+            return None
														
 
															+
														
 
															+
														
 
															+def which(program):
														
 
															+    """
														
 
															+    Return the path for a given executable.
														
 
															+    """
														
 
															+    def is_exe(file_path):
														
 
															+        """
														
 
															+        Checks whether a file is executable.
														
 
															+        """
														
 
															+        return os.path.isfile(file_path) and os.access(file_path, os.X_OK)
														
 
															+    #necessary to run on Windows
														
 
															+    if os.name == "nt":
														
 
															+        program += ".exe"
														
 
															+    fpath, _ = os.path.split(program)
														
 
															+    if fpath:
														
 
															+        if is_exe(program):
														
 
															+            return program
														
 
															+    else:
														
 
															+        #looks for file in the script execution folder before checking on system path
														
 
															+        current_dir = os.getcwd()
														
 
															+        local_program = os.path.join(current_dir, program)
														
 
															+        if is_exe(local_program):
														
 
															+            return local_program
														
 
															+        else:
														
 
															+            for path in os.environ["PATH"].split(os.pathsep):
														
 
															+                path = path.strip('"')
														
 
															+                exe_file = os.path.join(path, program)
														
 
															+                if is_exe(exe_file):
														
 
															+                    return exe_file
														
 
															+    return None
														
 
															+
														
 
															+
														
 
															+def extract_audio(filename, channels=1, rate=16000):
														
 
															+    """
														
 
															+    Extract audio from an input file to a temporary WAV file.
														
 
															+    """
														
 
															+    temp = tempfile.NamedTemporaryFile(suffix='.wav', delete=False)
														
 
															+    if not os.path.isfile(filename):
														
 
															+        print("The given file does not exist: {}".format(filename))
														
 
															+        raise Exception("Invalid filepath: {}".format(filename))
														
 
															+    program_ffmpeg = which("ffmpeg")
														
 
															+    if not program_ffmpeg:
														
 
															+        print("ffmpeg: Executable not found on machine.")
														
 
															+        raise Exception("Dependency not found: ffmpeg")
														
 
															+    command = [str(program_ffmpeg), "-y", "-i", filename,
														
 
															+               "-ac", str(channels), "-ar", str(rate),
														
 
															+               "-loglevel", "error", temp.name]
														
 
															+    use_shell = True if os.name == "nt" else False
														
 
															+    subprocess.check_output(command, stdin=open(os.devnull), shell=use_shell)
														
 
															+    return temp.name, rate
														
 
															+
														
 
															+
														
 
															+def find_speech_regions(filename, frame_width=4096, min_region_size=0.5, max_region_size=6): # pylint: disable=too-many-locals
														
 
															+    """
														
 
															+    Perform voice activity detection on a given audio file.
														
 
															+    """
														
 
															+    reader = wave.open(filename)
														
 
															+    sample_width = reader.getsampwidth()
														
 
															+    rate = reader.getframerate()
														
 
															+    n_channels = reader.getnchannels()
														
 
															+    chunk_duration = float(frame_width) / rate
														
 
															+
														
 
															+    n_chunks = int(math.ceil(reader.getnframes()*1.0 / frame_width))
														
 
															+    energies = []
														
 
															+
														
 
															+    for _ in range(n_chunks):
														
 
															+        chunk = reader.readframes(frame_width)
														
 
															+        energies.append(audioop.rms(chunk, sample_width * n_channels))
														
 
															+
														
 
															+    threshold = percentile(energies, 0.2)
														
 
															+
														
 
															+    elapsed_time = 0
														
 
															+
														
 
															+    regions = []
														
 
															+    region_start = None
														
 
															+
														
 
															+    for energy in energies:
														
 
															+        is_silence = energy <= threshold
														
 
															+        max_exceeded = region_start and elapsed_time - region_start >= max_region_size
														
 
															+
														
 
															+        if (max_exceeded or is_silence) and region_start:
														
 
															+            if elapsed_time - region_start >= min_region_size:
														
 
															+                regions.append((region_start, elapsed_time))
														
 
															+                region_start = None
														
 
															+
														
 
															+        elif (not region_start) and (not is_silence):
														
 
															+            region_start = elapsed_time
														
 
															+        elapsed_time += chunk_duration
														
 
															+    return regions
														
 
															+
														
 
															+
														
 
															+def generate_subtitles( # pylint: disable=too-many-locals,too-many-arguments
														
 
															+        source_path,
														
 
															+        output=None,
														
 
															+        concurrency=DEFAULT_CONCURRENCY,
														
 
															+        src_language=DEFAULT_SRC_LANGUAGE,
														
 
															+        dst_language=DEFAULT_DST_LANGUAGE,
														
 
															+        subtitle_file_format=DEFAULT_SUBTITLE_FORMAT,
														
 
															+        api_key=None,
														
 
															+    ):
														
 
															+    """
														
 
															+    Given an input audio/video file, generate subtitles in the specified language and format.
														
 
															+    """
														
 
															+
														
 
															+    if os.name != "nt" and "Darwin" in os.uname():
														
 
															+        #the default unix fork method does not work on Mac OS
														
 
															+        #need to use forkserver
														
 
															+        if 'forkserver' != multiprocessing.get_start_method(allow_none=True):
														
 
															+            multiprocessing.set_start_method('forkserver')
														
 
															+
														
 
															+    audio_filename, audio_rate = extract_audio(source_path)
														
 
															+
														
 
															+    regions = find_speech_regions(audio_filename)
														
 
															+
														
 
															+    pool = multiprocessing.Pool(concurrency)
														
 
															+    converter = FLACConverter(source_path=audio_filename)
														
 
															+    recognizer = SpeechRecognizer(language=src_language, rate=audio_rate,
														
 
															+                                  api_key=GOOGLE_SPEECH_API_KEY)
														
 
															+
														
 
															+    transcripts = []
														
 
															+    if regions:
														
 
															+        try:
														
 
															+            widgets = ["Converting speech regions to FLAC files: ", Percentage(), ' ', Bar(), ' ',
														
 
															+                       ETA()]
														
 
															+            pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start()
														
 
															+            extracted_regions = []
														
 
															+            for i, extracted_region in enumerate(pool.imap(converter, regions)):
														
 
															+                extracted_regions.append(extracted_region)
														
 
															+                pbar.update(i)
														
 
															+            pbar.finish()
														
 
															+
														
 
															+            widgets = ["Performing speech recognition: ", Percentage(), ' ', Bar(), ' ', ETA()]
														
 
															+            pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start()
														
 
															+
														
 
															+            for i, transcript in enumerate(pool.imap(recognizer, extracted_regions)):
														
 
															+                transcripts.append(transcript)
														
 
															+                pbar.update(i)
														
 
															+            pbar.finish()
														
 
															+
														
 
															+            if src_language.split("-")[0] != dst_language.split("-")[0]:
														
 
															+                if api_key:
														
 
															+                    google_translate_api_key = api_key
														
 
															+                    translator = Translator(dst_language, google_translate_api_key,
														
 
															+                                            dst=dst_language,
														
 
															+                                            src=src_language)
														
 
															+                    prompt = "Translating from {0} to {1}: ".format(src_language, dst_language)
														
 
															+                    widgets = [prompt, Percentage(), ' ', Bar(), ' ', ETA()]
														
 
															+                    pbar = ProgressBar(widgets=widgets, maxval=len(regions)).start()
														
 
															+                    translated_transcripts = []
														
 
															+                    for i, transcript in enumerate(pool.imap(translator, transcripts)):
														
 
															+                        translated_transcripts.append(transcript)
														
 
															+                        pbar.update(i)
														
 
															+                    pbar.finish()
														
 
															+                    transcripts = translated_transcripts
														
 
															+                else:
														
 
															+                    print(
														
 
															+                        "Error: Subtitle translation requires specified Google Translate API key. "
														
 
															+                        "See --help for further information."
														
 
															+                    )
														
 
															+                    return 1
														
 
															+
														
 
															+        except KeyboardInterrupt:
														
 
															+            pbar.finish()
														
 
															+            pool.terminate()
														
 
															+            pool.join()
														
 
															+            print("Cancelling transcription")
														
 
															+            raise
														
 
															+
														
 
															+    timed_subtitles = [(r, t) for r, t in zip(regions, transcripts) if t]
														
 
															+    formatter = FORMATTERS.get(subtitle_file_format)
														
 
															+    formatted_subtitles = formatter(timed_subtitles)
														
 
															+
														
 
															+    dest = output
														
 
															+
														
 
															+    if not dest:
														
 
															+        base = os.path.splitext(source_path)[0]
														
 
															+        dest = "{base}.{format}".format(base=base, format=subtitle_file_format)
														
 
															+
														
 
															+    with open(dest, 'wb') as output_file:
														
 
															+        output_file.write(formatted_subtitles.encode("utf-8"))
														
 
															+
														
 
															+    os.remove(audio_filename)
														
 
															+
														
 
															+    return dest
														
 
															+
														
 
															+
														
 
															+def validate(args):
														
 
															+    """
														
 
															+    Check that the CLI arguments passed to autosub are valid.
														
 
															+    """
														
 
															+    if args.format not in FORMATTERS:
														
 
															+        print(
														
 
															+            "Subtitle format not supported. "
														
 
															+            "Run with --list-formats to see all supported formats."
														
 
															+        )
														
 
															+        return False
														
 
															+
														
 
															+    if args.src_language not in LANGUAGE_CODES.keys():
														
 
															+        print(
														
 
															+            "Source language not supported. "
														
 
															+            "Run with --list-languages to see all supported languages."
														
 
															+        )
														
 
															+        return False
														
 
															+
														
 
															+    if args.dst_language not in LANGUAGE_CODES.keys():
														
 
															+        print(
														
 
															+            "Destination language not supported. "
														
 
															+            "Run with --list-languages to see all supported languages."
														
 
															+        )
														
 
															+        return False
														
 
															+
														
 
															+    if not args.source_path:
														
 
															+        print("Error: You need to specify a source path.")
														
 
															+        return False
														
 
															+
														
 
															+    return True
														
 
															+
														
 
															+
														
 
															+def main():
														
 
															+    """
														
 
															+    Run autosub as a command-line program.
														
 
															+    """
														
 
															+    parser = argparse.ArgumentParser()
														
 
															+    parser.add_argument('source_path', help="Path to the video or audio file to subtitle",
														
 
															+                        nargs='?')
														
 
															+    parser.add_argument('-C', '--concurrency', help="Number of concurrent API requests to make",
														
 
															+                        type=int, default=DEFAULT_CONCURRENCY)
														
 
															+    parser.add_argument('-o', '--output',
														
 
															+                        help="Output path for subtitles (by default, subtitles are saved in \
														
 
															+                        the same directory and name as the source path)")
														
 
															+    parser.add_argument('-F', '--format', help="Destination subtitle format",
														
 
															+                        default=DEFAULT_SUBTITLE_FORMAT)
														
 
															+    parser.add_argument('-S', '--src-language', help="Language spoken in source file",
														
 
															+                        default=DEFAULT_SRC_LANGUAGE)
														
 
															+    parser.add_argument('-D', '--dst-language', help="Desired language for the subtitles",
														
 
															+                        default=DEFAULT_DST_LANGUAGE)
														
 
															+    parser.add_argument('-K', '--api-key',
														
 
															+                        help="The Google Translate API key to be used. \
														
 
															+                        (Required for subtitle translation)")
														
 
															+    parser.add_argument('--list-formats', help="List all available subtitle formats",
														
 
															+                        action='store_true')
														
 
															+    parser.add_argument('--list-languages', help="List all available source/destination languages",
														
 
															+                        action='store_true')
														
 
															+
														
 
															+    args = parser.parse_args()
														
 
															+
														
 
															+    if args.list_formats:
														
 
															+        print("List of formats:")
														
 
															+        for subtitle_format in FORMATTERS:
														
 
															+            print("{format}".format(format=subtitle_format))
														
 
															+        return 0
														
 
															+
														
 
															+    if args.list_languages:
														
 
															+        print("List of all languages:")
														
 
															+        for code, language in sorted(LANGUAGE_CODES.items()):
														
 
															+            print("{code}\t{language}".format(code=code, language=language))
														
 
															+        return 0
														
 
															+
														
 
															+    if not validate(args):
														
 
															+        return 1
														
 
															+
														
 
															+    try:
														
 
															+        subtitle_file_path = generate_subtitles(
														
 
															+            source_path=args.source_path,
														
 
															+            concurrency=args.concurrency,
														
 
															+            src_language=args.src_language,
														
 
															+            dst_language=args.dst_language,
														
 
															+            api_key=args.api_key,
														
 
															+            subtitle_file_format=args.format,
														
 
															+            output=args.output,
														
 
															+        )
														
 
															+        print("Subtitles file created at {}".format(subtitle_file_path))
														
 
															+    except KeyboardInterrupt:
														
 
															+        return 1
														
 
															+
														
 
															+    return 0
														
 
															+
														
 
															+
														
 
															+if __name__ == '__main__':
														
 
															+    sys.exit(main())
														
--- a/composer/autosub/__pycache__/__init__.cpython-37.pyc
+++ b/composer/autosub/__pycache__/__init__.cpython-37.pyc
--- a/composer/autosub/__pycache__/__init__.cpython-38.pyc
+++ b/composer/autosub/__pycache__/__init__.cpython-38.pyc
--- a/composer/autosub/__pycache__/__init__.cpython-39.pyc
+++ b/composer/autosub/__pycache__/__init__.cpython-39.pyc
--- a/composer/autosub/__pycache__/constants.cpython-37.pyc
+++ b/composer/autosub/__pycache__/constants.cpython-37.pyc
--- a/composer/autosub/__pycache__/constants.cpython-38.pyc
+++ b/composer/autosub/__pycache__/constants.cpython-38.pyc
--- a/composer/autosub/__pycache__/constants.cpython-39.pyc
+++ b/composer/autosub/__pycache__/constants.cpython-39.pyc
--- a/composer/autosub/__pycache__/formatters.cpython-37.pyc
+++ b/composer/autosub/__pycache__/formatters.cpython-37.pyc
--- a/composer/autosub/__pycache__/formatters.cpython-38.pyc
+++ b/composer/autosub/__pycache__/formatters.cpython-38.pyc
--- a/composer/autosub/__pycache__/formatters.cpython-39.pyc
+++ b/composer/autosub/__pycache__/formatters.cpython-39.pyc
--- a/composer/autosub/constants.py
+++ b/composer/autosub/constants.py
@@ -1,118 +1,118 @@
 
															-"""

														
 
															-Defines constants used by autosub.s

														
 
															-"""

														
 
															-

														
 
															-from __future__ import unicode_literals

														
 
															-

														
 
															-GOOGLE_SPEECH_API_KEY = "AIzaSyBOti4mM-6x9WDnZIjIeyEU21OpBXqWBgw"

														
 
															-GOOGLE_SPEECH_API_URL = "http://www.google.com/speech-api/v2/recognize?client=chromium&lang={lang}&key={key}" # pylint: disable=line-too-long

														
 
															-

														
 
															-LANGUAGE_CODES = {

														
 
															-    'af': 'Afrikaans',

														
 
															-    'ar': 'Arabic',

														
 
															-    'az': 'Azerbaijani',

														
 
															-    'be': 'Belarusian',

														
 
															-    'bg': 'Bulgarian',

														
 
															-    'bn': 'Bengali',

														
 
															-    'bs': 'Bosnian',

														
 
															-    'ca': 'Catalan',

														
 
															-    'ceb': 'Cebuano',

														
 
															-    'cs': 'Czech',

														
 
															-    'cy': 'Welsh',

														
 
															-    'da': 'Danish',

														
 
															-    'de': 'German',

														
 
															-    'el': 'Greek',

														
 
															-    'en-AU': 'English (Australia)',

														
 
															-    'en-CA': 'English (Canada)',

														
 
															-    'en-GB': 'English (United Kingdom)',

														
 
															-    'en-IN': 'English (India)',

														
 
															-    'en-IE': 'English (Ireland)',

														
 
															-    'en-NZ': 'English (New Zealand)',

														
 
															-    'en-PH': 'English (Philippines)',

														
 
															-    'en-SG': 'English (Singapore)',

														
 
															-    'en-US': 'English (United States)',

														
 
															-    'eo': 'Esperanto',

														
 
															-    'es-AR': 'Spanish (Argentina)',

														
 
															-    'es-CL': 'Spanish (Chile)',

														
 
															-    'es-ES': 'Spanish (Spain)',

														
 
															-    'es-US': 'Spanish (United States)',

														
 
															-    'es-MX': 'Spanish (Mexico)',

														
 
															-    'es': 'Spanish',

														
 
															-    'et': 'Estonian',

														
 
															-    'eu': 'Basque',

														
 
															-    'fa': 'Persian',

														
 
															-    'fi': 'Finnish',

														
 
															-    'fr': 'French',

														
 
															-    'ga': 'Irish',

														
 
															-    'gl': 'Galician',

														
 
															-    'gu': 'Gujarati',

														
 
															-    'ha': 'Hausa',

														
 
															-    'hi': 'Hindi',

														
 
															-    'hmn': 'Hmong',

														
 
															-    'hr': 'Croatian',

														
 
															-    'ht': 'Haitian Creole',

														
 
															-    'hu': 'Hungarian',

														
 
															-    'hy': 'Armenian',

														
 
															-    'id': 'Indonesian',

														
 
															-    'ig': 'Igbo',

														
 
															-    'is': 'Icelandic',

														
 
															-    'it': 'Italian',

														
 
															-    'iw': 'Hebrew',

														
 
															-    'ja': 'Japanese',

														
 
															-    'jw': 'Javanese',

														
 
															-    'ka': 'Georgian',

														
 
															-    'kk': 'Kazakh',

														
 
															-    'km': 'Khmer',

														
 
															-    'kn': 'Kannada',

														
 
															-    'ko': 'Korean',

														
 
															-    'la': 'Latin',

														
 
															-    'lo': 'Lao',

														
 
															-    'lt': 'Lithuanian',

														
 
															-    'lv': 'Latvian',

														
 
															-    'mg': 'Malagasy',

														
 
															-    'mi': 'Maori',

														
 
															-    'mk': 'Macedonian',

														
 
															-    'ml': 'Malayalam',

														
 
															-    'mn': 'Mongolian',

														
 
															-    'mr': 'Marathi',

														
 
															-    'ms': 'Malay',

														
 
															-    'mt': 'Maltese',

														
 
															-    'my': 'Myanmar (Burmese)',

														
 
															-    'ne': 'Nepali',

														
 
															-    'nl': 'Dutch',

														
 
															-    'no': 'Norwegian',

														
 
															-    'ny': 'Chichewa',

														
 
															-    'pa': 'Punjabi',

														
 
															-    'pl': 'Polish',

														
 
															-    'pt-BR': 'Portuguese (Brazil)',

														
 
															-    'pt-PT': 'Portuguese (Portugal)',

														
 
															-    'ro': 'Romanian',

														
 
															-    'ru': 'Russian',

														
 
															-    'si': 'Sinhala',

														
 
															-    'sk': 'Slovak',

														
 
															-    'sl': 'Slovenian',

														
 
															-    'so': 'Somali',

														
 
															-    'sq': 'Albanian',

														
 
															-    'sr': 'Serbian',

														
 
															-    'st': 'Sesotho',

														
 
															-    'su': 'Sudanese',

														
 
															-    'sv': 'Swedish',

														
 
															-    'sw': 'Swahili',

														
 
															-    'ta': 'Tamil',

														
 
															-    'te': 'Telugu',

														
 
															-    'tg': 'Tajik',

														
 
															-    'th': 'Thai',

														
 
															-    'tl': 'Filipino',

														
 
															-    'tr': 'Turkish',

														
 
															-    'uk': 'Ukrainian',

														
 
															-    'ur': 'Urdu',

														
 
															-    'uz': 'Uzbek',

														
 
															-    'vi': 'Vietnamese',

														
 
															-    'yi': 'Yiddish',

														
 
															-    'yo': 'Yoruba',

														
 
															-    'yue-Hant-HK': 'Cantonese, (Traditional HK)',

														
 
															-    'zh': 'Chinese (Simplified, China)',

														
 
															-    'zh-HK': 'Chinese (Simplified, Hong Kong)',

														
 
															-    'zh-TW': 'Chinese (Traditional, Taiwan)',

														
 
															-    'zu': 'Zulu',

														
 
															-}

														
 
															+"""
														
 
															+Defines constants used by autosub.s
														
 
															+"""
														
 
															+
														
 
															+from __future__ import unicode_literals
														
 
															+
														
 
															+GOOGLE_SPEECH_API_KEY = "AIzaSyBOti4mM-6x9WDnZIjIeyEU21OpBXqWBgw"
														
 
															+GOOGLE_SPEECH_API_URL = "http://www.google.com/speech-api/v2/recognize?client=chromium&lang={lang}&key={key}" # pylint: disable=line-too-long
														
 
															+
														
 
															+LANGUAGE_CODES = {
														
 
															+    'af': 'Afrikaans',
														
 
															+    'ar': 'Arabic',
														
 
															+    'az': 'Azerbaijani',
														
 
															+    'be': 'Belarusian',
														
 
															+    'bg': 'Bulgarian',
														
 
															+    'bn': 'Bengali',
														
 
															+    'bs': 'Bosnian',
														
 
															+    'ca': 'Catalan',
														
 
															+    'ceb': 'Cebuano',
														
 
															+    'cs': 'Czech',
														
 
															+    'cy': 'Welsh',
														
 
															+    'da': 'Danish',
														
 
															+    'de': 'German',
														
 
															+    'el': 'Greek',
														
 
															+    'en-AU': 'English (Australia)',
														
 
															+    'en-CA': 'English (Canada)',
														
 
															+    'en-GB': 'English (United Kingdom)',
														
 
															+    'en-IN': 'English (India)',
														
 
															+    'en-IE': 'English (Ireland)',
														
 
															+    'en-NZ': 'English (New Zealand)',
														
 
															+    'en-PH': 'English (Philippines)',
														
 
															+    'en-SG': 'English (Singapore)',
														
 
															+    'en-US': 'English (United States)',
														
 
															+    'eo': 'Esperanto',
														
 
															+    'es-AR': 'Spanish (Argentina)',
														
 
															+    'es-CL': 'Spanish (Chile)',
														
 
															+    'es-ES': 'Spanish (Spain)',
														
 
															+    'es-US': 'Spanish (United States)',
														
 
															+    'es-MX': 'Spanish (Mexico)',
														
 
															+    'es': 'Spanish',
														
 
															+    'et': 'Estonian',
														
 
															+    'eu': 'Basque',
														
 
															+    'fa': 'Persian',
														
 
															+    'fi': 'Finnish',
														
 
															+    'fr': 'French',
														
 
															+    'ga': 'Irish',
														
 
															+    'gl': 'Galician',
														
 
															+    'gu': 'Gujarati',
														
 
															+    'ha': 'Hausa',
														
 
															+    'hi': 'Hindi',
														
 
															+    'hmn': 'Hmong',
														
 
															+    'hr': 'Croatian',
														
 
															+    'ht': 'Haitian Creole',
														
 
															+    'hu': 'Hungarian',
														
 
															+    'hy': 'Armenian',
														
 
															+    'id': 'Indonesian',
														
 
															+    'ig': 'Igbo',
														
 
															+    'is': 'Icelandic',
														
 
															+    'it': 'Italian',
														
 
															+    'iw': 'Hebrew',
														
 
															+    'ja': 'Japanese',
														
 
															+    'jw': 'Javanese',
														
 
															+    'ka': 'Georgian',
														
 
															+    'kk': 'Kazakh',
														
 
															+    'km': 'Khmer',
														
 
															+    'kn': 'Kannada',
														
 
															+    'ko': 'Korean',
														
 
															+    'la': 'Latin',
														
 
															+    'lo': 'Lao',
														
 
															+    'lt': 'Lithuanian',
														
 
															+    'lv': 'Latvian',
														
 
															+    'mg': 'Malagasy',
														
 
															+    'mi': 'Maori',
														
 
															+    'mk': 'Macedonian',
														
 
															+    'ml': 'Malayalam',
														
 
															+    'mn': 'Mongolian',
														
 
															+    'mr': 'Marathi',
														
 
															+    'ms': 'Malay',
														
 
															+    'mt': 'Maltese',
														
 
															+    'my': 'Myanmar (Burmese)',
														
 
															+    'ne': 'Nepali',
														
 
															+    'nl': 'Dutch',
														
 
															+    'no': 'Norwegian',
														
 
															+    'ny': 'Chichewa',
														
 
															+    'pa': 'Punjabi',
														
 
															+    'pl': 'Polish',
														
 
															+    'pt-BR': 'Portuguese (Brazil)',
														
 
															+    'pt-PT': 'Portuguese (Portugal)',
														
 
															+    'ro': 'Romanian',
														
 
															+    'ru': 'Russian',
														
 
															+    'si': 'Sinhala',
														
 
															+    'sk': 'Slovak',
														
 
															+    'sl': 'Slovenian',
														
 
															+    'so': 'Somali',
														
 
															+    'sq': 'Albanian',
														
 
															+    'sr': 'Serbian',
														
 
															+    'st': 'Sesotho',
														
 
															+    'su': 'Sudanese',
														
 
															+    'sv': 'Swedish',
														
 
															+    'sw': 'Swahili',
														
 
															+    'ta': 'Tamil',
														
 
															+    'te': 'Telugu',
														
 
															+    'tg': 'Tajik',
														
 
															+    'th': 'Thai',
														
 
															+    'tl': 'Filipino',
														
 
															+    'tr': 'Turkish',
														
 
															+    'uk': 'Ukrainian',
														
 
															+    'ur': 'Urdu',
														
 
															+    'uz': 'Uzbek',
														
 
															+    'vi': 'Vietnamese',
														
 
															+    'yi': 'Yiddish',
														
 
															+    'yo': 'Yoruba',
														
 
															+    'yue-Hant-HK': 'Cantonese, (Traditional HK)',
														
 
															+    'zh': 'Chinese (Simplified, China)',
														
 
															+    'zh-HK': 'Chinese (Simplified, Hong Kong)',
														
 
															+    'zh-TW': 'Chinese (Traditional, Taiwan)',
														
 
															+    'zu': 'Zulu',
														
 
															+}
														
--- a/composer/autosub/formatters.py
+++ b/composer/autosub/formatters.py
@@ -1,66 +1,66 @@
 
															-"""

														
 
															-Defines subtitle formatters used by autosub.s

														
 
															-"""

														
 
															-

														
 
															-# -*- coding: utf-8 -*-

														
 
															-from __future__ import unicode_literals

														
 
															-

														
 
															-import json

														
 
															-

														
 
															-import pysrt

														
 
															-import six

														
 
															-

														
 
															-

														
 
															-def srt_formatter(subtitles, padding_before=0, padding_after=0):

														
 
															-    """

														
 
															-    Serialize a list of subtitles according to the SRT format, with optional time padding.

														
 
															-    """

														
 
															-    sub_rip_file = pysrt.SubRipFile()

														
 
															-    for i, ((start, end), text) in enumerate(subtitles, start=1):

														
 
															-        item = pysrt.SubRipItem()

														
 
															-        item.index = i

														
 
															-        item.text = six.text_type(text)

														
 
															-        item.start.seconds = max(0, start - padding_before)

														
 
															-        item.end.seconds = end + padding_after

														
 
															-        sub_rip_file.append(item)

														
 
															-    return '\n'.join(six.text_type(item) for item in sub_rip_file)

														
 
															-

														
 
															-

														
 
															-def vtt_formatter(subtitles, padding_before=0, padding_after=0):

														
 
															-    """

														
 
															-    Serialize a list of subtitles according to the VTT format, with optional time padding.

														
 
															-    """

														
 
															-    text = srt_formatter(subtitles, padding_before, padding_after)

														
 
															-    text = 'WEBVTT\n\n' + text.replace(',', '.')

														
 
															-    return text

														
 
															-

														
 
															-

														
 
															-def json_formatter(subtitles):

														
 
															-    """

														
 
															-    Serialize a list of subtitles as a JSON blob.

														
 
															-    """

														
 
															-    subtitle_dicts = [

														
 
															-        {

														
 
															-            'start': start,

														
 
															-            'end': end,

														
 
															-            'content': text,

														
 
															-        }

														
 
															-        for ((start, end), text)

														
 
															-        in subtitles

														
 
															-    ]

														
 
															-    return json.dumps(subtitle_dicts)

														
 
															-

														
 
															-

														
 
															-def raw_formatter(subtitles):

														
 
															-    """

														
 
															-    Serialize a list of subtitles as a newline-delimited string.

														
 
															-    """

														
 
															-    return ' '.join(text for (_rng, text) in subtitles)

														
 
															-

														
 
															-

														
 
															-FORMATTERS = {

														
 
															-    'srt': srt_formatter,

														
 
															-    'vtt': vtt_formatter,

														
 
															-    'json': json_formatter,

														
 
															-    'raw': raw_formatter,

														
 
															-}

														
 
															+"""
														
 
															+Defines subtitle formatters used by autosub.s
														
 
															+"""
														
 
															+
														
 
															+# -*- coding: utf-8 -*-
														
 
															+from __future__ import unicode_literals
														
 
															+
														
 
															+import json
														
 
															+
														
 
															+import pysrt
														
 
															+import six
														
 
															+
														
 
															+
														
 
															+def srt_formatter(subtitles, padding_before=0, padding_after=0):
														
 
															+    """
														
 
															+    Serialize a list of subtitles according to the SRT format, with optional time padding.
														
 
															+    """
														
 
															+    sub_rip_file = pysrt.SubRipFile()
														
 
															+    for i, ((start, end), text) in enumerate(subtitles, start=1):
														
 
															+        item = pysrt.SubRipItem()
														
 
															+        item.index = i
														
 
															+        item.text = six.text_type(text)
														
 
															+        item.start.seconds = max(0, start - padding_before)
														
 
															+        item.end.seconds = end + padding_after
														
 
															+        sub_rip_file.append(item)
														
 
															+    return '\n'.join(six.text_type(item) for item in sub_rip_file)
														
 
															+
														
 
															+
														
 
															+def vtt_formatter(subtitles, padding_before=0, padding_after=0):
														
 
															+    """
														
 
															+    Serialize a list of subtitles according to the VTT format, with optional time padding.
														
 
															+    """
														
 
															+    text = srt_formatter(subtitles, padding_before, padding_after)
														
 
															+    text = 'WEBVTT\n\n' + text.replace(',', '.')
														
 
															+    return text
														
 
															+
														
 
															+
														
 
															+def json_formatter(subtitles):
														
 
															+    """
														
 
															+    Serialize a list of subtitles as a JSON blob.
														
 
															+    """
														
 
															+    subtitle_dicts = [
														
 
															+        {
														
 
															+            'start': start,
														
 
															+            'end': end,
														
 
															+            'content': text,
														
 
															+        }
														
 
															+        for ((start, end), text)
														
 
															+        in subtitles
														
 
															+    ]
														
 
															+    return json.dumps(subtitle_dicts)
														
 
															+
														
 
															+
														
 
															+def raw_formatter(subtitles):
														
 
															+    """
														
 
															+    Serialize a list of subtitles as a newline-delimited string.
														
 
															+    """
														
 
															+    return ' '.join(text for (_rng, text) in subtitles)
														
 
															+
														
 
															+
														
 
															+FORMATTERS = {
														
 
															+    'srt': srt_formatter,
														
 
															+    'vtt': vtt_formatter,
														
 
															+    'json': json_formatter,
														
 
															+    'raw': raw_formatter,
														
 
															+}