import pandas as pd from pathlib import Path import subprocess import shutil import os import chardet import zipfile from io import BytesIO from translate import Translator from chardet.universaldetector import UniversalDetector DEFAULT_ENCODING = "utf-8" def guess_codec(filenames: list) -> str: codec_detector = UniversalDetector() for filename in filenames: codec_detector.feed(filename.encode('cp437')) if codec_detector.done: break result = codec_detector.close() encoding = result.get("encoding") return encoding or DEFAULT_ENCODING def check_zip(zip_filepath:str): path = Path(zip_filepath) with zipfile.ZipFile(str(path)) as zf: filenames = [x for x in zf.namelist() if not x.endswith('/')] result = guess_codec(filenames) true_filenames = [x.encode('cp437').decode(result) for x in zf.namelist() if not x.endswith('/')] # print(true_filenames) scenarios_files = [(x, i) for i, x in enumerate(true_filenames) if Path(x).suffix in [".xlsx", ".csv"] and not Path(x).name.startswith("._") and Path(x).stem != "style"] # print(scenarios_files) if len(scenarios_files) == 0: raise ValueError("no excel or csv file in zip.") if len(scenarios_files) > 1: raise ValueError("too many excel or csv file in zip.") f = zf.read(filenames[scenarios_files[0][1]]) if Path(scenarios_files[0][0]).suffix == ".xlsx": table = pd.read_excel(BytesIO(f), dtype=object) elif Path(scenarios_files[0][0]).suffix == ".csv": table = pd.read_csv(BytesIO(f), dtype=object) table.reset_index(inplace=True) # print(table) stems = [Path(x).stem for x in true_filenames] for i in range(len(table)): # excel 裡的圖檔跟zip裡的檔案要一致 if table.loc[i, ['素材']].isna().item(): img = table.loc[i, ['素材']].item() print(img) img_files = [x.strip() for x in img.split(',')] for img in img_files: print(img) n = stems.count(img) if n == 0: raise ValueError(f"{img}: no such media file in zip.") elif n > 1: raise ValueError(f'too many same name media files as {img} in zip') # 需要tts文字或音檔 if table.loc[i, ['字幕']].isna().item(): if table.loc[i, ['音檔']].isna().item(): raise ValueError(f'text or voice file is needed at scene {i+1}.') voice_file = table.loc[i, ['音檔']].item() n = stems.count(voice_file) if n != 1: raise ValueError(f"voice file is can't find is zip at scene {i+1}.") def update_zip(zip_path, lang): temp_zip_path = zip_path + ".tmp" with zipfile.ZipFile(zip_path, 'r') as zip_in, zipfile.ZipFile(temp_zip_path, 'w') as zip_out: for item in zip_in.infolist(): with zip_in.open(item.filename) as src_file: if item.filename.split('.')[-1] == "xlsx": table = pd.read_excel(src_file, dtype=object) table = translate_table(table, lang) table.to_excel(Path(item.filename).name ,sheet_name='Sheet_name_1') zip_out.write(Path(item.filename).name, item.filename) os.remove(Path(item.filename).name) elif item.filename.split('.')[-1] == "csv": table = pd.read_csv(src_file, dtype=object) table = translate_table(table, lang) table.to_excel(Path(item.filename).name ,sheet_name='Sheet_name_1') zip_out.write(Path(item.filename).name, item.filename) os.remove(Path(item.filename).name) else: # それ以外のファイルはそのままコピー with zip_out.open(item.filename, 'w') as dst_file: shutil.copyfileobj(src_file, dst_file) # 旧ZIPを削除し、新ZIPをリネーム os.remove(zip_path) os.rename(temp_zip_path, zip_path) def translate_table(table, lang): translator= Translator(to_lang=lang) print(f"translate to {lang}") for i in range(len(table)): if (text:=table.loc[i, ['大標']].item()): print("大標:",text) translation = translator.translate(text) print("大標翻譯:",translation) table.loc[i, ['字幕']] = translation if (text:=table.loc[i, ['字幕']].item()): print('字幕:',text) translation = translator.translate(text) print('字幕翻譯:',translation) table.loc[i, ['字幕']] = translation return table