ai-anchor
/
video-maker


			
				
					
						
						
							12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758
							import pandas as pd
from pathlib import Path
import subprocess
import shutil
import os
import chardet
import zipfile
from test_chardet import guess_codec
from io import BytesIO
  
def check_zip(zip_filepath:str):
  path = Path(zip_filepath)
  with zipfile.ZipFile(str(path)) as zf:
    filenames = [x for x in zf.namelist() if not x.endswith('/')]
    result = guess_codec(filenames)
    true_filenames = [x.encode('cp437').decode(result) for x in zf.namelist() if not x.endswith('/')]
    # print(true_filenames)
    scenarios_files = [(x, i) for i, x in enumerate(true_filenames) if Path(x).suffix in [".xlsx", ".csv"] and not Path(x).name.startswith("._") and Path(x).stem != "style"]
    # print(scenarios_files)
    
    if len(scenarios_files) == 0:
      raise ValueError("no excel or csv file in zip.")
    if len(scenarios_files) > 1:
      raise ValueError("too many excel or csv file in zip.")
    f = zf.read(filenames[scenarios_files[0][1]])
    if Path(scenarios_files[0][0]).suffix == ".xlsx":
      table = pd.read_excel(BytesIO(f), dtype=object)
    elif Path(scenarios_files[0][0]).suffix == ".csv":
      table = pd.read_csv(BytesIO(f), dtype=object)
    table.reset_index(inplace=True)
    # print(table)
    
    stems = [Path(x).stem for x in true_filenames]
    for i in range(len(table)):
      # excel 裡的圖檔跟zip裡的檔案要一致
      if table.loc[i, ['素材']].isna().item():
        img =  table.loc[i, ['素材']].item()
        print(img)

        img_files = [x.strip() for x in img.split(',')]
        for img in img_files:
          print(img)
          n = stems.count(img)
          if n == 0:
            raise ValueError(f"{img}: no such media file in zip.")
          elif n > 1:
            raise ValueError(f'too many same name media files as {img} in zip')
      
      # 需要tts文字或音檔
      if table.loc[i, ['字幕']].isna().item():
        if table.loc[i, ['音檔']].isna().item():
          raise ValueError(f'text or voice file is needed at scene {i+1}.')
        voice_file = table.loc[i, ['音檔']].item()
        n = stems.count(voice_file)
        if n != 1:
          raise ValueError(f"voice file is can't find is zip at scene {i+1}.")