12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758 |
- import pandas as pd
- from pathlib import Path
- import subprocess
- import shutil
- import os
- import chardet
- import zipfile
- from test_chardet import guess_codec
- from io import BytesIO
-
- def check_zip(zip_filepath:str):
- path = Path(zip_filepath)
- with zipfile.ZipFile(str(path)) as zf:
- filenames = [x for x in zf.namelist() if not x.endswith('/')]
- result = guess_codec(filenames)
- true_filenames = [x.encode('cp437').decode(result) for x in zf.namelist() if not x.endswith('/')]
- # print(true_filenames)
- scenarios_files = [(x, i) for i, x in enumerate(true_filenames) if Path(x).suffix in [".xlsx", ".csv"] and not Path(x).name.startswith("._") and Path(x).stem != "style"]
- # print(scenarios_files)
-
- if len(scenarios_files) == 0:
- raise ValueError("no excel or csv file in zip.")
- if len(scenarios_files) > 1:
- raise ValueError("too many excel or csv file in zip.")
- f = zf.read(filenames[scenarios_files[0][1]])
- if Path(scenarios_files[0][0]).suffix == ".xlsx":
- table = pd.read_excel(BytesIO(f), dtype=object)
- elif Path(scenarios_files[0][0]).suffix == ".csv":
- table = pd.read_csv(BytesIO(f), dtype=object)
- table.reset_index(inplace=True)
- # print(table)
-
- stems = [Path(x).stem for x in true_filenames]
- for i in range(len(table)):
- # excel 裡的圖檔跟zip裡的檔案要一致
- if table.loc[i, ['素材']].isna().item():
- img = table.loc[i, ['素材']].item()
- print(img)
- img_files = [x.strip() for x in img.split(',')]
- for img in img_files:
- print(img)
- n = stems.count(img)
- if n == 0:
- raise ValueError(f"{img}: no such media file in zip.")
- elif n > 1:
- raise ValueError(f'too many same name media files as {img} in zip')
-
- # 需要tts文字或音檔
- if table.loc[i, ['字幕']].isna().item():
- if table.loc[i, ['音檔']].isna().item():
- raise ValueError(f'text or voice file is needed at scene {i+1}.')
- voice_file = table.loc[i, ['音檔']].item()
- n = stems.count(voice_file)
- if n != 1:
- raise ValueError(f"voice file is can't find is zip at scene {i+1}.")
-
-
|