|
@@ -10,8 +10,18 @@ from translate import Translator
|
|
|
from langdetect import detect
|
|
|
from chardet.universaldetector import UniversalDetector
|
|
|
import numpy as np
|
|
|
+from openai import OpenAI
|
|
|
+from iso639 import Lang
|
|
|
|
|
|
DEFAULT_ENCODING = "utf-8"
|
|
|
+client = OpenAI(base_url="http://192.168.192.84:8080/v1", api_key='choozmo9')
|
|
|
+system_prompt = (
|
|
|
+ "You are a precise and literal translator. "
|
|
|
+ "Translate the user's input from {from_lang} to {to_lang} as faithfully and literally as possible. "
|
|
|
+ "Preserve the original structure and vocabulary. "
|
|
|
+ "Avoid paraphrasing, interpretation, or creative rewrites. "
|
|
|
+ "Do not add explanations. Just return the translated text only."
|
|
|
+)
|
|
|
|
|
|
def guess_codec(filenames: list) -> str:
|
|
|
codec_detector = UniversalDetector()
|
|
@@ -104,21 +114,41 @@ def update_zip(zip_path, lang, new_filename, voice):
|
|
|
os.remove(zip_path)
|
|
|
|
|
|
def translate_table(table, lang):
|
|
|
-
|
|
|
print(f"translate to {lang}")
|
|
|
for i in range(len(table)):
|
|
|
- if (text:=table.loc[i, ['大標']].item()):
|
|
|
+ if (not table.loc[i, ['大標']].isna().item()) and (text:=table.loc[i, ['字幕']].item()):
|
|
|
print("大標:",text)
|
|
|
- translator= Translator(to_lang=lang, from_lang=detect(text))
|
|
|
- translation = translator.translate(text)
|
|
|
+ #translator= Translator(to_lang=lang, from_lang=detect(text))
|
|
|
+ #translation = translator.translate(text)
|
|
|
+ translation = translate(text, lang)
|
|
|
print("大標翻譯:",translation)
|
|
|
table.loc[i, ['大標']] = translation
|
|
|
- if (text:=table.loc[i, ['字幕']].item()):
|
|
|
+ if (not table.loc[i, ['字幕']].isna().item()) and (text:=table.loc[i, ['字幕']].item()):
|
|
|
print('字幕:',text)
|
|
|
- translator= Translator(to_lang=lang, from_lang=detect(text))
|
|
|
- translation = translator.translate(text)
|
|
|
+ #translator= Translator(to_lang=lang, from_lang=detect(text))
|
|
|
+ #translation = translator.translate(text)
|
|
|
+ translation = translate(text, lang)
|
|
|
print('字幕翻譯:',translation)
|
|
|
table.loc[i, ['字幕']] = translation
|
|
|
return table
|
|
|
|
|
|
+def translate(text, to_lang:str):
|
|
|
+ from_lang = Lang(detect(text)).name
|
|
|
+ to_lang = Lang(to_lang.split("-")).name
|
|
|
+ if to_lang == "Chinese":
|
|
|
+ to_lang = "Traditional Chinese"
|
|
|
+ completion = client.chat.completions.create(
|
|
|
+ model="gemma",
|
|
|
+ messages=[
|
|
|
+ {
|
|
|
+ "role": "system",
|
|
|
+ "content": system_prompt.format(from_lang=from_lang, to_lang=to_lang)
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "role": "user",
|
|
|
+ "content": text
|
|
|
+ }
|
|
|
+ ]
|
|
|
+ )
|
|
|
+ return completion.choices[0].message.content
|
|
|
|