video_utils.py 2.2 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758
  1. import pandas as pd
  2. from pathlib import Path
  3. import subprocess
  4. import shutil
  5. import os
  6. import chardet
  7. import zipfile
  8. from test_chardet import guess_codec
  9. from io import BytesIO
  10. def check_zip(zip_filepath:str):
  11. path = Path(zip_filepath)
  12. with zipfile.ZipFile(str(path)) as zf:
  13. filenames = [x for x in zf.namelist() if not x.endswith('/')]
  14. result = guess_codec(filenames)
  15. true_filenames = [x.encode('cp437').decode(result) for x in zf.namelist() if not x.endswith('/')]
  16. # print(true_filenames)
  17. scenarios_files = [(x, i) for i, x in enumerate(true_filenames) if Path(x).suffix in [".xlsx", ".csv"] and not Path(x).name.startswith("._") and Path(x).stem != "style"]
  18. # print(scenarios_files)
  19. if len(scenarios_files) == 0:
  20. raise ValueError("no excel or csv file in zip.")
  21. if len(scenarios_files) > 1:
  22. raise ValueError("too many excel or csv file in zip.")
  23. f = zf.read(filenames[scenarios_files[0][1]])
  24. if Path(scenarios_files[0][0]).suffix == ".xlsx":
  25. table = pd.read_excel(BytesIO(f), dtype=object)
  26. elif Path(scenarios_files[0][0]).suffix == ".csv":
  27. table = pd.read_csv(BytesIO(f), dtype=object)
  28. table.reset_index(inplace=True)
  29. # print(table)
  30. stems = [Path(x).stem for x in true_filenames]
  31. for i in range(len(table)):
  32. # excel 裡的圖檔跟zip裡的檔案要一致
  33. if table.loc[i, ['素材']].isna().item():
  34. img = table.loc[i, ['素材']].item()
  35. print(img)
  36. img_files = [x.strip() for x in img.split(',')]
  37. for img in img_files:
  38. print(img)
  39. n = stems.count(img)
  40. if n == 0:
  41. raise ValueError(f"{img}: no such media file in zip.")
  42. elif n > 1:
  43. raise ValueError(f'too many same name media files as {img} in zip')
  44. # 需要tts文字或音檔
  45. if table.loc[i, ['字幕']].isna().item():
  46. if table.loc[i, ['音檔']].isna().item():
  47. raise ValueError(f'text or voice file is needed at scene {i+1}.')
  48. voice_file = table.loc[i, ['音檔']].item()
  49. n = stems.count(voice_file)
  50. if n != 1:
  51. raise ValueError(f"voice file is can't find is zip at scene {i+1}.")