Browse Source

我要看孫子

SherryLiu 3 months ago
parent
commit
c67e7fea70
4 changed files with 50 additions and 12 deletions
  1. 1 0
      src/audio_processing.py
  2. 8 2
      src/config.py
  3. 38 9
      src/main_script.py
  4. 3 1
      src/text_processing.py

+ 1 - 0
src/audio_processing.py

@@ -34,6 +34,7 @@ def post_process_transcript(transcript, temperature=0):
 
 def process_audio(audio_data):
     raw_transcript = transcribe(audio_data)
+    print(raw_transcript)
     if raw_transcript is None:
         return None, None
     corrected_transcript = post_process_transcript(raw_transcript)

+ 8 - 2
src/config.py

@@ -1,11 +1,16 @@
 import os
 from dotenv import load_dotenv
 
-load_dotenv('environment_systex.env')
+env_path = 'environment.env'
+print(f"Attempting to load .env file from: {os.path.abspath(env_path)}")
+load_dotenv(env_path)
 
 SUPABASE_URL = os.getenv("SUPABASE_URL")
 SUPABASE_KEY = os.getenv("SUPABASE_KEY")
 
+if not SUPABASE_URL or not SUPABASE_KEY:
+    raise ValueError("SUPABASE_URL and SUPABASE_KEY must be set in the .env file")
+
 
 SYSTEM_PROMPT = """你是一位專業的語音到文字轉錄校對助理,專門處理有關溫室氣體、碳排放和碳管理的對話轉錄。
 你的任務是:
@@ -19,7 +24,8 @@ SYSTEM_PROMPT = """你是一位專業的語音到文字轉錄校對助理,專
 請只根據提供的原文進行必要的更正,不要添加或刪除任何實質性內容。在修正時,請特別注意上下文,確保修正後的詞語符合整句話的語境。"""
 
 CORRECT_TERMS = [
-    "溫室氣體", "碳排放", "碳管理", "碳盤查", "碳權交易", "碳費",
+    "溫室氣體"
+    , "碳排放", "碳管理", "碳盤查", "碳權交易", "碳費",
     "碳權", "碳足跡", "足跡", "淨零排放", "零排放", "淨零",
     "氣候變遷法", "氣候", "氣候變遷", "台灣"
 ]

+ 38 - 9
src/main_script.py

@@ -7,31 +7,60 @@ def initialize():
     if not success:
         print("Warning: Dictionary loading failed. Proceeding with default dictionary.")
 
+
 def process_audio_file(audio_file):
     try:
-        raw_transcript, corrected_transcript = process_audio(audio_file)
-        if raw_transcript and corrected_transcript:
-            return corrected_transcript
+        result = process_audio(audio_file)
+        if isinstance(result, tuple) and len(result) == 2:
+            return result
         else:
-            return None
+            print("Unexpected result from process_audio")
+            return None, None
     except Exception as e:
         print(f"Error processing audio: {str(e)}")
-        return None
-
+        return None, None
+    
+# 加入檢查user是否詢問特定問題
 def main(audio_file_path):
     initialize()
     try:
         with open(audio_file_path, "rb") as audio_file:
-            result = process_audio_file(audio_file)
+            raw_transcript, corrected_transcript = process_audio_file(audio_file)
         
-        if result:
-            print(result)
+        if raw_transcript and corrected_transcript:
+            print(f"Raw transcript: {raw_transcript}")
+            print(f"Corrected transcript: {corrected_transcript}")
+            
+            if "我要看孫" in raw_transcript:
+                print("找到 我要看孫子!連結至cache")
+            else:
+                print(corrected_transcript)
         else:
             print("Audio processing failed.")
     except FileNotFoundError:
         print(f"Error: The file '{audio_file_path}' was not found.")
     except Exception as e:
         print(f"An unexpected error occurred: {str(e)}")
+        import traceback
+        traceback.print_exc()
+
+
+## 原本的main
+# def main(audio_file_path):
+#     initialize()
+#     try:
+#         with open(audio_file_path, "rb") as audio_file:
+#             result = process_audio_file(audio_file)
+        
+#         if result:
+#             print(result)
+#         else:
+#             print("Audio processing failed.")
+#     except FileNotFoundError:
+#         print(f"Error: The file '{audio_file_path}' was not found.")
+#     except Exception as e:
+#         print(f"An unexpected error occurred: {str(e)}")
+
 
 if __name__ == "__main__":
     if len(sys.argv) != 2:

+ 3 - 1
src/text_processing.py

@@ -40,8 +40,10 @@ def compare_chinese_words(word1, word2, tone_sensitive=True):
     pinyin2 = ''.join([p[0] for p in pinyin(word2, style=Style.TONE3, neutral_tone_with_five=True)])
     
     soundex1 = chinese_soundex(pinyin1)
+    # print(soundex1)
     soundex2 = chinese_soundex(pinyin2)
-    
+    # print('soundex2', soundex2)
+
     if tone_sensitive:
         return soundex1 == soundex2
     else: