Browse Source

mod utils2.py

tomoya 1 year ago
parent
commit
89a6e71abf
1 changed files with 124 additions and 18 deletions
  1. 124 18
      backend/app/app/aianchor/utils2.py

+ 124 - 18
backend/app/app/aianchor/utils2.py

@@ -1,8 +1,13 @@
+import shutil
 import pandas as pd
 import pandas as pd
 from pathlib import Path
 from pathlib import Path
 import zipfile
 import zipfile
 from io import BytesIO
 from io import BytesIO
 from chardet.universaldetector import UniversalDetector
 from chardet.universaldetector import UniversalDetector
+try:
+    from app.aianchor.config import *
+except ImportError:
+    from config import *
 
 
 DEFAULT_ENCODING = "utf-8"
 DEFAULT_ENCODING = "utf-8"
 
 
@@ -25,9 +30,8 @@ def check_zip(zip_filepath:str):
   with zipfile.ZipFile(str(path)) as zf:
   with zipfile.ZipFile(str(path)) as zf:
     filenames = [x for x in zf.namelist() if not x.endswith('/')]
     filenames = [x for x in zf.namelist() if not x.endswith('/')]
     result = guess_codec(filenames)
     result = guess_codec(filenames)
-    true_filenames = [x.encode('cp437').decode(result) for x in zf.namelist() if not x.endswith('/')]
-    print(true_filenames)
-    scenarios_files = [(x, i) for i, x in enumerate(true_filenames) if Path(x).suffix in [".xlsx", ".csv"] and not Path(x).name.startswith("._") and Path(x).stem != "style"]
+    true_filepaths = [x.encode('cp437').decode(result) for x in zf.namelist() if not x.endswith('/')]
+    scenarios_files = [(Path(x).name, i) for i, x in enumerate(true_filepaths) if Path(x).suffix in [".xlsx", ".csv"] and not Path(x).name.startswith("._") and not Path(x).name.startswith("~$") and Path(x).stem != "style"]
     print(scenarios_files)
     print(scenarios_files)
     
     
     if len(scenarios_files) == 0:
     if len(scenarios_files) == 0:
@@ -40,31 +44,133 @@ def check_zip(zip_filepath:str):
     elif Path(scenarios_files[0][0]).suffix == ".csv":
     elif Path(scenarios_files[0][0]).suffix == ".csv":
       table = pd.read_csv(BytesIO(f), dtype=object)
       table = pd.read_csv(BytesIO(f), dtype=object)
     table.reset_index(inplace=True)
     table.reset_index(inplace=True)
-    print(table)
     
     
-    stems = [Path(x).stem for x in true_filenames]
+    true_filenames = [(Path(x).name, i) for i, x in enumerate(true_filepaths)]
+    true_stems = [(Path(x).stem, i) for i, x in enumerate(true_filepaths)]
     for i in range(len(table)):
     for i in range(len(table)):
       # excel 裡的圖檔跟zip裡的檔案要一致
       # excel 裡的圖檔跟zip裡的檔案要一致
       if not table.loc[i, ['素材']].isna().item():
       if not table.loc[i, ['素材']].isna().item():
         img =  table.loc[i, ['素材']].item()
         img =  table.loc[i, ['素材']].item()
-        print(img)
-
         img_files = [x.strip() for x in img.split(',')]
         img_files = [x.strip() for x in img.split(',')]
         for img in img_files:
         for img in img_files:
-          print(img)
-          n = stems.count(img)
+          if Path(img).suffix:
+            n = len([x for x in true_filenames if x[0] == img])
+          else:
+            n = len([x for x in true_stems if x[0] == img])
           if n == 0:
           if n == 0:
-            raise VideoMakerError(f"{img}: no such media file in zip.")
+            raise VideoMakerError(f"{img}: no such img file in zip.")
           elif n > 1:
           elif n > 1:
-            raise VideoMakerError(f'too many same name media files as {img} in zip')
-      
+            raise VideoMakerError(f'too many same name img files as {img} in zip')
+
+      # 需要tts文字或音檔
+      if '字幕' in table.columns and table.loc[i, ['字幕']].isna().item():
+        if '音檔' in table.columns and table.loc[i, ['音檔']].isna().item():
+          raise VideoMakerError(f'text or voice file is needed at scene {i+1}.')
+        voice_file = table.loc[i, ['音檔']].item()
+        if Path(voice_file).suffix:
+          n = len([x for x in true_filenames if x[0]==voice_file])
+        else:
+          n = true_stems.count(voice_file)
+        if n == 0:
+          raise VideoMakerError(f"{voice_file}: no such voice file in zip.")
+        elif n > 1:
+          raise VideoMakerError(f'too many same name voice files as {voice_file} in zip')
+  
+  return True
+
+def prepare_workingdir(work_dir_path:str):
+  work_dir_Path = Path(work_dir_path)
+  (work_dir_Path/voice_dir).mkdir(exist_ok=True)
+  (work_dir_Path/caption_dir).mkdir(exist_ok=True)
+  (work_dir_Path/input_dir).mkdir(exist_ok=True)
+  (work_dir_Path/output_dir).mkdir(exist_ok=True)
+  (work_dir_Path/srt_dir).mkdir(exist_ok=True)
+  (work_dir_Path/ai_character_dir).mkdir(exist_ok=True)
+
+def check_and_extract_zip(zip_filepath:str, working_dirpath:str):
+  working_dir_Path = Path(working_dirpath)
+  with zipfile.ZipFile(zip_filepath) as zf:
+    filenames = [x for x in zf.namelist() if not x.endswith('/')]
+    result = guess_codec(filenames)
+    true_filepaths = [x.encode('cp437').decode(result) for x in zf.namelist() if not x.endswith('/')]
+    scenarios_filenames = [(Path(x).name, i) for i, x in enumerate(true_filepaths) if Path(x).suffix in [".xlsx", ".csv"] and not Path(x).name.startswith("._") and not Path(x).name.startswith("~$") and Path(x).stem != "style" ]
+    media_filenames = [(Path(x).name, i) for i, x in enumerate(true_filepaths) if Path(x).suffix not in [".xlsx", ".csv"] and not Path(x).name.startswith("._") and not Path(x).name.startswith("~$")]
+    
+    if len(scenarios_filenames) == 0:
+      raise VideoMakerError("no excel or csv file in zip.")
+    if len(scenarios_filenames) > 1:
+      raise VideoMakerError("too many excel or csv file in zip.")
+    
+    save_filepath = working_dir_Path / input_dir / scenarios_filenames[0][0]
+    if not save_filepath.exists():
+      with open(save_filepath, 'wb') as output_file:  # 建立並開啟新檔案
+        with zf.open(filenames[scenarios_filenames[0][1]], 'r') as origin_file:  # 開啟原檔案
+          shutil.copyfileobj(origin_file, output_file)  # 將原檔案內容複製到新檔案 
+    else:
+      raise VideoMakerError(f"{scenarios_filenames[0]} already exists.")
+    
+    f = zf.read(filenames[scenarios_filenames[0][1]])
+    if Path(scenarios_filenames[0][0]).suffix == ".xlsx":
+      table = pd.read_excel(BytesIO(f), dtype=object)
+    elif Path(scenarios_filenames[0][0]).suffix == ".csv":
+      table = pd.read_csv(BytesIO(f), dtype=object)
+    table.reset_index(inplace=True)
+    
+    true_filenames = [(Path(x).name, i) for i, x in enumerate(true_filepaths)]
+    true_stems = [(Path(x).stem, i) for i, x in enumerate(true_filepaths)]
+    for i in range(len(table)):
+      # excel 裡的圖檔跟zip裡的檔案要一致
+      if not table.loc[i, ['素材']].isna().item():
+        img =  table.loc[i, ['素材']].item()
+        img_files = [x.strip() for x in img.split(',')]
+        for img in img_files:
+          if Path(img).suffix:
+            target_filenames = [x for x in true_filenames if x[0] == img]
+          else:
+            target_filenames = [x for x in true_stems if x[0] == img]
+          if len(target_filenames) == 0:
+            raise VideoMakerError(f"{img}: no such img file in zip.")
+          elif len(target_filenames) > 1:
+            raise VideoMakerError(f'too many same name img files as {img} in zip')
+          
+          save_filepath = working_dir_Path / input_dir / true_filenames[target_filenames[0][1]][0]
+          if not save_filepath.exists():
+            with open(save_filepath, 'wb') as output_file:  # 建立並開啟新檔案
+              with zf.open(filenames[target_filenames[0][1]], 'r') as origin_file:  # 開啟原檔案
+                shutil.copyfileobj(origin_file, output_file)  # 將原檔案內容複製到新檔案
+          else:
+            raise VideoMakerError(f"{target_filenames[0][0]} already exists.")
+        
       # 需要tts文字或音檔
       # 需要tts文字或音檔
-      if not table.loc[i, ['字幕']].isna().item():
-        if not '音檔' in table.columns or table.loc[i, ['音檔']].isna().item():
+      if '字幕' in table.columns and table.loc[i, ['字幕']].isna().item():
+        if '音檔' in table.columns and table.loc[i, ['音檔']].isna().item():
           raise VideoMakerError(f'text or voice file is needed at scene {i+1}.')
           raise VideoMakerError(f'text or voice file is needed at scene {i+1}.')
         voice_file = table.loc[i, ['音檔']].item()
         voice_file = table.loc[i, ['音檔']].item()
-        n = stems.count(voice_file)
-        if n != 1:
-          raise VideoMakerError(f"voice file is can't find is zip at scene {i+1}.")
+        if Path(voice_file).suffix:
+          target_filenames = [x for x in true_filenames if x[0] == voice_file]
+        else:
+          target_filenames = [x for x in true_stems if x[0] == voice_file]
+        if len(target_filenames) == 0:
+          raise VideoMakerError(f"{voice_file}: no such voice file in zip.")
+        elif len(target_filenames) > 1:
+          raise VideoMakerError(f'too many same name voice files as {voice_file} in zip')
+        
+        save_filepath = working_dir_Path / input_dir / true_filenames[target_filenames[0][1]][0]
+        if not save_filepath.exists():
+          with open(save_filepath, 'wb') as output_file:  # 建立並開啟新檔案
+            with zf.open(filenames[target_filenames[0][1]], 'r') as origin_file:  # 開啟原檔案
+              shutil.copyfileobj(origin_file, output_file)  # 將原檔案內容複製到新檔案
+        else:
+          raise VideoMakerError(f"{target_filenames[0][0]} already exists.")
+        
+if __name__ == "__main__":
+  from fire import Fire
+  
+  def fire_check_zip(zip_filepath:str):
+    try:
+      if check_zip(zip_filepath):
+        print("passed check_zip")
+    except VideoMakerError as e:
+      print(e)
   
   
-  return True
+  Fire(fire_check_zip)