123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115 |
- import pandas as pd
- from pathlib import Path
- import subprocess
- import shutil
- import os
- import chardet
- import zipfile
- from io import BytesIO
- from translate import Translator
- from chardet.universaldetector import UniversalDetector
- DEFAULT_ENCODING = "utf-8"
- def guess_codec(filenames: list) -> str:
- codec_detector = UniversalDetector()
- for filename in filenames:
- codec_detector.feed(filename.encode('cp437'))
- if codec_detector.done:
- break
- result = codec_detector.close()
- encoding = result.get("encoding")
- return encoding or DEFAULT_ENCODING
-
- def check_zip(zip_filepath:str):
- path = Path(zip_filepath)
- with zipfile.ZipFile(str(path)) as zf:
- filenames = [x for x in zf.namelist() if not x.endswith('/')]
- result = guess_codec(filenames)
- true_filenames = [x.encode('cp437').decode(result) for x in zf.namelist() if not x.endswith('/')]
- # print(true_filenames)
- scenarios_files = [(x, i) for i, x in enumerate(true_filenames) if Path(x).suffix in [".xlsx", ".csv"] and not Path(x).name.startswith("._") and Path(x).stem != "style"]
- # print(scenarios_files)
-
- if len(scenarios_files) == 0:
- raise ValueError("no excel or csv file in zip.")
- if len(scenarios_files) > 1:
- raise ValueError("too many excel or csv file in zip.")
- f = zf.read(filenames[scenarios_files[0][1]])
- if Path(scenarios_files[0][0]).suffix == ".xlsx":
- table = pd.read_excel(BytesIO(f), dtype=object)
- elif Path(scenarios_files[0][0]).suffix == ".csv":
- table = pd.read_csv(BytesIO(f), dtype=object)
- table.reset_index(inplace=True)
- # print(table)
-
- stems = [Path(x).stem for x in true_filenames]
- for i in range(len(table)):
- # excel 裡的圖檔跟zip裡的檔案要一致
- if table.loc[i, ['素材']].isna().item():
- img = table.loc[i, ['素材']].item()
- print(img)
- img_files = [x.strip() for x in img.split(',')]
- for img in img_files:
- print(img)
- n = stems.count(img)
- if n == 0:
- raise ValueError(f"{img}: no such media file in zip.")
- elif n > 1:
- raise ValueError(f'too many same name media files as {img} in zip')
-
- # 需要tts文字或音檔
- if table.loc[i, ['字幕']].isna().item():
- if table.loc[i, ['音檔']].isna().item():
- raise ValueError(f'text or voice file is needed at scene {i+1}.')
- voice_file = table.loc[i, ['音檔']].item()
- n = stems.count(voice_file)
- if n != 1:
- raise ValueError(f"voice file is can't find is zip at scene {i+1}.")
-
- def update_zip(zip_path, lang):
- temp_zip_path = zip_path + ".tmp"
- with zipfile.ZipFile(zip_path, 'r') as zip_in, zipfile.ZipFile(temp_zip_path, 'w') as zip_out:
- for item in zip_in.infolist():
- with zip_in.open(item.filename) as src_file:
- if item.filename.split('.')[-1] == "xlsx":
- table = pd.read_excel(src_file, dtype=object)
- table = translate_table(table, lang)
- table.to_excel(Path(item.filename).name ,sheet_name='Sheet_name_1')
- zip_out.write(Path(item.filename).name, item.filename)
- os.remove(Path(item.filename).name)
- elif item.filename.split('.')[-1] == "csv":
- table = pd.read_csv(src_file, dtype=object)
- table = translate_table(table, lang)
- table.to_excel(Path(item.filename).name ,sheet_name='Sheet_name_1')
- zip_out.write(Path(item.filename).name, item.filename)
- os.remove(Path(item.filename).name)
- else:
- # それ以外のファイルはそのままコピー
- with zip_out.open(item.filename, 'w') as dst_file:
- shutil.copyfileobj(src_file, dst_file)
- # 旧ZIPを削除し、新ZIPをリネーム
- os.remove(zip_path)
- os.rename(temp_zip_path, zip_path)
-
- def translate_table(table, lang):
- translator= Translator(to_lang=lang)
- print(f"translate to {lang}")
- for i in range(len(table)):
- if (text:=table.loc[i, ['大標']].item()):
- print("大標:",text)
- translation = translator.translate(text)
- print("大標翻譯:",translation)
- table.loc[i, ['字幕']] = translation
- if (text:=table.loc[i, ['字幕']].item()):
- print('字幕:',text)
- translation = translator.translate(text)
- print('字幕翻譯:',translation)
- table.loc[i, ['字幕']] = translation
- return table
-
-
|