video_utils.py 4.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115
  1. import pandas as pd
  2. from pathlib import Path
  3. import subprocess
  4. import shutil
  5. import os
  6. import chardet
  7. import zipfile
  8. from io import BytesIO
  9. from translate import Translator
  10. from chardet.universaldetector import UniversalDetector
  11. DEFAULT_ENCODING = "utf-8"
  12. def guess_codec(filenames: list) -> str:
  13. codec_detector = UniversalDetector()
  14. for filename in filenames:
  15. codec_detector.feed(filename.encode('cp437'))
  16. if codec_detector.done:
  17. break
  18. result = codec_detector.close()
  19. encoding = result.get("encoding")
  20. return encoding or DEFAULT_ENCODING
  21. def check_zip(zip_filepath:str):
  22. path = Path(zip_filepath)
  23. with zipfile.ZipFile(str(path)) as zf:
  24. filenames = [x for x in zf.namelist() if not x.endswith('/')]
  25. result = guess_codec(filenames)
  26. true_filenames = [x.encode('cp437').decode(result) for x in zf.namelist() if not x.endswith('/')]
  27. # print(true_filenames)
  28. scenarios_files = [(x, i) for i, x in enumerate(true_filenames) if Path(x).suffix in [".xlsx", ".csv"] and not Path(x).name.startswith("._") and Path(x).stem != "style"]
  29. # print(scenarios_files)
  30. if len(scenarios_files) == 0:
  31. raise ValueError("no excel or csv file in zip.")
  32. if len(scenarios_files) > 1:
  33. raise ValueError("too many excel or csv file in zip.")
  34. f = zf.read(filenames[scenarios_files[0][1]])
  35. if Path(scenarios_files[0][0]).suffix == ".xlsx":
  36. table = pd.read_excel(BytesIO(f), dtype=object)
  37. elif Path(scenarios_files[0][0]).suffix == ".csv":
  38. table = pd.read_csv(BytesIO(f), dtype=object)
  39. table.reset_index(inplace=True)
  40. # print(table)
  41. stems = [Path(x).stem for x in true_filenames]
  42. for i in range(len(table)):
  43. # excel 裡的圖檔跟zip裡的檔案要一致
  44. if table.loc[i, ['素材']].isna().item():
  45. img = table.loc[i, ['素材']].item()
  46. print(img)
  47. img_files = [x.strip() for x in img.split(',')]
  48. for img in img_files:
  49. print(img)
  50. n = stems.count(img)
  51. if n == 0:
  52. raise ValueError(f"{img}: no such media file in zip.")
  53. elif n > 1:
  54. raise ValueError(f'too many same name media files as {img} in zip')
  55. # 需要tts文字或音檔
  56. if table.loc[i, ['字幕']].isna().item():
  57. if table.loc[i, ['音檔']].isna().item():
  58. raise ValueError(f'text or voice file is needed at scene {i+1}.')
  59. voice_file = table.loc[i, ['音檔']].item()
  60. n = stems.count(voice_file)
  61. if n != 1:
  62. raise ValueError(f"voice file is can't find is zip at scene {i+1}.")
  63. def update_zip(zip_path, lang):
  64. temp_zip_path = zip_path + ".tmp"
  65. with zipfile.ZipFile(zip_path, 'r') as zip_in, zipfile.ZipFile(temp_zip_path, 'w') as zip_out:
  66. for item in zip_in.infolist():
  67. with zip_in.open(item.filename) as src_file:
  68. if item.filename.split('.')[-1] == "xlsx":
  69. table = pd.read_excel(src_file, dtype=object)
  70. table = translate_table(table, lang)
  71. table.to_excel(Path(item.filename).name ,sheet_name='Sheet_name_1')
  72. zip_out.write(Path(item.filename).name, item.filename)
  73. os.remove(Path(item.filename).name)
  74. elif item.filename.split('.')[-1] == "csv":
  75. table = pd.read_csv(src_file, dtype=object)
  76. table = translate_table(table, lang)
  77. table.to_excel(Path(item.filename).name ,sheet_name='Sheet_name_1')
  78. zip_out.write(Path(item.filename).name, item.filename)
  79. os.remove(Path(item.filename).name)
  80. else:
  81. # それ以外のファイルはそのままコピー
  82. with zip_out.open(item.filename, 'w') as dst_file:
  83. shutil.copyfileobj(src_file, dst_file)
  84. # 旧ZIPを削除し、新ZIPをリネーム
  85. os.remove(zip_path)
  86. os.rename(temp_zip_path, zip_path)
  87. def translate_table(table, lang):
  88. translator= Translator(to_lang=lang)
  89. print(f"translate to {lang}")
  90. for i in range(len(table)):
  91. if (text:=table.loc[i, ['大標']].item()):
  92. print("大標:",text)
  93. translation = translator.translate(text)
  94. print("大標翻譯:",translation)
  95. table.loc[i, ['字幕']] = translation
  96. if (text:=table.loc[i, ['字幕']].item()):
  97. print('字幕:',text)
  98. translation = translator.translate(text)
  99. print('字幕翻譯:',translation)
  100. table.loc[i, ['字幕']] = translation
  101. return table