8 months ago · cbfb282505
--- a/README.md
+++ b/README.md
@@ -1,14 +0,0 @@
 
				-# 語音轉文字 ASR
			
 
				-
			
 
				-### Prerequisite
			
 
				-- 語音檔存放需放在`data/`
			
 
				-- `environment.env`放openai key
			
 
				-
			
 
				-### To run the code
			
 
				-`pip install -r requirements.txt`
			
 
				-
			
 
				-執行所有在data目錄下的語音檔
			
 
				-`python whisper.py`
			
 
				-
			
 
				-執行單個語音檔
			
 
				-`python whisper.py --file 語音檔名`
			
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,152 +0,0 @@
 
				-aiohappyeyeballs==2.4.0
			
 
				-aiohttp==3.10.5
			
 
				-aiosignal==1.3.1
			
 
				-annotated-types==0.7.0
			
 
				-anyio==4.4.0
			
 
				-attrs==24.2.0
			
 
				-audioread==3.0.1
			
 
				-babel==2.16.0
			
 
				-bce-python-sdk==0.9.19
			
 
				-blinker==1.8.2
			
 
				-Bottleneck==1.4.0
			
 
				-certifi==2024.7.4
			
 
				-cffi==1.17.0
			
 
				-charset-normalizer==3.3.2
			
 
				-click==8.1.7
			
 
				-colorama==0.4.6
			
 
				-coloredlogs==15.0.1
			
 
				-colorlog==6.8.2
			
 
				-contourpy==1.3.0
			
 
				-cycler==0.12.1
			
 
				-Cython==3.0.11
			
 
				-datasets==2.21.0
			
 
				-decorator==5.1.1
			
 
				-dill==0.3.4
			
 
				-Distance==0.1.3
			
 
				-distro==1.9.0
			
 
				-editdistance==0.8.1
			
 
				-fastapi==0.112.2
			
 
				-filelock==3.15.4
			
 
				-Flask==3.0.3
			
 
				-flask-babel==4.0.0
			
 
				-flatbuffers==24.3.25
			
 
				-fonttools==4.53.1
			
 
				-frozenlist==1.4.1
			
 
				-fsspec==2024.6.1
			
 
				-future==1.0.0
			
 
				-fuzzywuzzy==0.18.0
			
 
				-g2p-en==2.1.0
			
 
				-g2pM==0.1.2.5
			
 
				-h11==0.14.0
			
 
				-h5py==3.11.0
			
 
				-httpcore==1.0.5
			
 
				-httpx==0.27.2
			
 
				-huggingface-hub==0.24.6
			
 
				-humanfriendly==10.0
			
 
				-idna==3.8
			
 
				-inflect==7.3.1
			
 
				-itsdangerous==2.2.0
			
 
				-jieba==0.42.1
			
 
				-Jinja2==3.1.4
			
 
				-jiter==0.5.0
			
 
				-joblib==1.4.2
			
 
				-jsonlines==4.0.0
			
 
				-kaldiio==2.18.0
			
 
				-kiwisolver==1.4.5
			
 
				-Levenshtein==0.25.1
			
 
				-librosa==0.8.1
			
 
				-llvmlite==0.43.0
			
 
				-loguru==0.7.2
			
 
				-lxml==5.3.0
			
 
				-markdown-it-py==3.0.0
			
 
				-MarkupSafe==2.1.5
			
 
				-matplotlib==3.9.2
			
 
				-mdurl==0.1.2
			
 
				-mock==5.1.0
			
 
				-more-itertools==10.4.0
			
 
				-mpmath==1.3.0
			
 
				-multidict==6.0.5
			
 
				-multiprocess==0.70.12.2
			
 
				-nara-wpe==0.0.10
			
 
				-nltk==3.9.1
			
 
				-numba==0.60.0
			
 
				-numpy==2.0.2
			
 
				-onnx==1.16.2
			
 
				-onnxruntime==1.19.0
			
 
				-openai==1.42.0
			
 
				-packaging==24.1
			
 
				-paddle2onnx==0.8.1
			
 
				-paddleaudio==1.0.2
			
 
				-paddlefsl==1.1.0
			
 
				-paddlenlp==2.6.1
			
 
				-paddlespeech==1.0.1
			
 
				-paddlespeech-feat==0.1.0
			
 
				-pandas==2.2.2
			
 
				-pathos==0.2.8
			
 
				-pattern_singleton==1.2.0
			
 
				-pillow==10.4.0
			
 
				-pip-autoremove==0.10.0
			
 
				-platformdirs==4.2.2
			
 
				-pooch==1.8.2
			
 
				-portalocker==2.10.1
			
 
				-pox==0.3.4
			
 
				-ppft==1.7.6.8
			
 
				-praatio==5.0.0
			
 
				-prettytable==3.11.0
			
 
				-protobuf==3.20.2
			
 
				-psutil==6.0.0
			
 
				-pyarrow==17.0.0
			
 
				-pycparser==2.22
			
 
				-pycryptodome==3.20.0
			
 
				-pydantic==2.8.2
			
 
				-pydantic_core==2.20.1
			
 
				-Pygments==2.18.0
			
 
				-pyparsing==3.1.4
			
 
				-pypinyin==0.52.0
			
 
				-pypinyin-dict==0.8.0
			
 
				-pytest-runner==6.0.1
			
 
				-python-dateutil==2.9.0.post0
			
 
				-python-dotenv==1.0.1
			
 
				-python-Levenshtein==0.25.1
			
 
				-pytz==2024.1
			
 
				-pyworld==0.3.4
			
 
				-PyYAML==6.0.2
			
 
				-rapidfuzz==3.9.6
			
 
				-rarfile==4.2
			
 
				-regex==2024.7.24
			
 
				-requests==2.32.3
			
 
				-resampy==0.2.2
			
 
				-rich==13.8.0
			
 
				-sacrebleu==2.4.3
			
 
				-safetensors==0.4.4
			
 
				-scikit-learn==1.5.1
			
 
				-scipy==1.14.1
			
 
				-sentencepiece==0.1.99
			
 
				-seqeval==1.2.2
			
 
				-shellingham==1.5.4
			
 
				-six==1.16.0
			
 
				-sniffio==1.3.1
			
 
				-soundfile==0.12.1
			
 
				-starlette==0.38.2
			
 
				-sympy==1.13.2
			
 
				-tabulate==0.9.0
			
 
				-TextGrid==1.6.1
			
 
				-threadpoolctl==3.5.0
			
 
				-tiktoken==0.7.0
			
 
				-timer==0.3.0
			
 
				-tqdm==4.66.5
			
 
				-typeguard==4.3.0
			
 
				-typer==0.12.5
			
 
				-typing_extensions==4.12.2
			
 
				-tzdata==2024.1
			
 
				-urllib3==2.2.2
			
 
				-uvicorn==0.30.6
			
 
				-visualdl==2.5.3
			
 
				-wcwidth==0.2.13
			
 
				-webrtcvad==2.0.10
			
 
				-websockets==13.0.1
			
 
				-Werkzeug==3.0.4
			
 
				-xxhash==3.5.0
			
 
				-yacs==0.1.8
			
 
				-yarl==1.9.4
			
 
				-zhon==2.0.2
			
--- a/src/.env
+++ b/src/.env
@@ -0,0 +1,3 @@
 
				+SUPABASE_URL = "http://139.144.120.184:8000"
			
 
				+SUPABASE_KEY = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyAgCiAgICAicm9sZSI6ICJzZXJ2aWNlX3JvbGUiLAogICAgImlzcyI6ICJzdXBhYmFzZS1kZW1vIiwKICAgICJpYXQiOiAxNjQxNzY5MjAwLAogICAgImV4cCI6IDE3OTk1MzU2MDAKfQ.DaYlNEoUrrEn2Ig7tqibS-PHK5vgusbcbo7X36XVt4Q"
			
 
				+OPENAI_API_KEY = "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"
			
--- a/src/__pycache__/audio_processing.cpython-38.pyc
+++ b/src/__pycache__/audio_processing.cpython-38.pyc
--- a/src/__pycache__/config.cpython-38.pyc
+++ b/src/__pycache__/config.cpython-38.pyc
--- a/src/__pycache__/dictionary_loader.cpython-38.pyc
+++ b/src/__pycache__/dictionary_loader.cpython-38.pyc
--- a/src/__pycache__/text_processing.cpython-38.pyc
+++ b/src/__pycache__/text_processing.cpython-38.pyc
--- a/src/audio_processing.py
+++ b/src/audio_processing.py
@@ -0,0 +1,59 @@
 
				+from openai import OpenAI
			
 
				+from config import SYSTEM_PROMPT, OPEN_API_KEY, SUPABASE_KEY, SUPABASE_URL
			
 
				+from supabase import create_client, Client
			
 
				+from text_processing import fuzzy_correct_chinese
			
 
				+
			
 
				+client = OpenAI(api_key=OPEN_API_KEY)
			
 
				+supabase: Client = create_client(SUPABASE_URL, SUPABASE_KEY)
			
 
				+
			
 
				+def transcribe(audio_file):
			
 
				+    try:
			
 
				+        table_name = "brand_database"
			
 
				+        response = supabase.table(table_name).select("brand", "category").execute()
			
 
				+        custom_vocab = []
			
 
				+        if response.data:
			
 
				+            for item in response.data:
			
 
				+                custom_vocab.append({item['brand']})
			
 
				+        else:
			
 
				+            print(f"No data found or an error occurred: {response.error}")
			
 
				+            print("Using default dictionary as Supabase data couldn't be fetched.")
			
 
				+            custom_vocab = ["FENDI", "BOSS", "BALENCIAGA", "BURBERRY", "CELINE", "COS", 
			
 
				+                            "COACH", "Dior", "FENDI", "GUCCI", "KENZO", "Louis Vuitton", 
			
 
				+                            "LV", "MONTBLANC", "POLO", "TORY BURCH", "VERSACE", "BV",  
			
 
				+                            "BAO BAO ISSEY MIYAKE", "BERLUTI", "BOTTEGA VENETA", "ZEGNA", 
			
 
				+                            "FERRAGAMO", "LONGCHAMP", "Loro Piana", "maje", "MICHAEL KORS", 
			
 
				+                            "MONCLER", "PLEATS PLEASE", "SAINT LAURENT"]
			
 
				+        transcript = client.audio.transcriptions.create(
			
 
				+            file=audio_file,
			
 
				+            model="whisper-1",
			
 
				+            response_format="text", 
			
 
				+            prompt=f"請注意以下詞彙：{custom_vocab}"
			
 
				+        )
			
 
				+        return transcript
			
 
				+    except Exception as e:
			
 
				+        print(f"轉錄時發生錯誤：{str(e)}")
			
 
				+        return None
			
 
				+
			
 
				+def post_process_transcript(transcript, temperature=0):
			
 
				+    corrected_transcript = fuzzy_correct_chinese(transcript)
			
 
				+    
			
 
				+    messages = [
			
 
				+        {"role": "system", "content": SYSTEM_PROMPT},
			
 
				+        {"role": "user", "content": f"請校對並修正以下轉錄文本，但不要改變其原意或回答問題，也不要更動英文的大小寫：\n\n{corrected_transcript}"}
			
 
				+    ]
			
 
				+
			
 
				+    response = client.chat.completions.create(
			
 
				+        model="gpt-4",
			
 
				+        temperature=temperature,
			
 
				+        messages=messages
			
 
				+    )
			
 
				+
			
 
				+    return response.choices[0].message.content
			
 
				+
			
 
				+def process_audio(audio_data):
			
 
				+    raw_transcript = transcribe(audio_data)
			
 
				+    print(raw_transcript)
			
 
				+    if raw_transcript is None:
			
 
				+        return None, None
			
 
				+    corrected_transcript = post_process_transcript(raw_transcript)
			
 
				+    return raw_transcript, corrected_transcript
			
--- a/src/config.py
+++ b/src/config.py
@@ -0,0 +1,31 @@
 
				+import os
			
 
				+from dotenv import load_dotenv
			
 
				+
			
 
				+load_dotenv()
			
 
				+
			
 
				+SUPABASE_URL: str =  os.environ.get('SUPABASE_URL')
			
 
				+SUPABASE_KEY: str = os.environ.get('SUPABASE_KEY')
			
 
				+OPEN_API_KEY: str = os.environ.get('OPENAI_API_KEY')
			
 
				+
			
 
				+if not SUPABASE_URL or not SUPABASE_KEY:
			
 
				+    raise ValueError("SUPABASE_URL and SUPABASE_KEY must be set in the .env file")
			
 
				+
			
 
				+
			
 
				+SYSTEM_PROMPT = """你是一位專業的轉錄校對助理，專門處理有關品牌的對話轉錄。
			
 
				+你的任務是：
			
 
				+1. 確保中英文品牌的正確性，大小寫不要更動。
			
 
				+2. 在必要時添加適當的標點符號，如句號、逗號
			
 
				+3. 使用台灣的繁體中文或英文，確保語言表達符合台灣的用語習慣。
			
 
				+4. 只更正明顯的錯誤或改善可讀性，不要改變原文的意思或結構。
			
 
				+5. 不要回答問題、解釋概念或添加任何不在原文中的信息。
			
 
				+6. 如果原文是一個問句，保持它的問句形式，不要提供答案。
			
 
				+
			
 
				+請只根據提供的原文進行必要的更正，不要添加或刪除任何實質性內容。在修正時，請特別注意上下文，確保修正後的詞語符合整句話的語境。"""
			
 
				+
			
 
				+CORRECT_TERMS = [
			
 
				+    "品牌", "101", "一零一"
			
 
				+]
			
 
				+
			
 
				+ERROR_CORRECTION = {
			
 
				+
			
 
				+}
			
--- a/src/dictionary_loader.py
+++ b/src/dictionary_loader.py
@@ -0,0 +1,27 @@
 
				+import io
			
 
				+import jieba
			
 
				+from supabase import create_client, Client
			
 
				+from config import SUPABASE_URL, SUPABASE_KEY
			
 
				+
			
 
				+supabase: Client = create_client(SUPABASE_URL, SUPABASE_KEY)
			
 
				+
			
 
				+
			
 
				+def load_word_database_dictionary_from_supabase():
			
 
				+    table_name = "word_database"
			
 
				+    response = supabase.table(table_name).select("term", "weight", "type").execute()
			
 
				+    
			
 
				+    if response.data:
			
 
				+        dict_data = io.StringIO()
			
 
				+        for item in response.data:
			
 
				+            dict_data.write(f"{item['term']} {item['weight']} {item['type']}\n")
			
 
				+        
			
 
				+        dict_data.seek(0)
			
 
				+        jieba.load_userdict(dict_data)
			
 
				+        # print("Loaded dictionary from Supabase")
			
 
				+        return True
			
 
				+    else:
			
 
				+        print(f"No data found or an error occurred: {response.error}")
			
 
				+        print("Using default dictionary as Supabase data couldn't be fetched.")
			
 
				+        return False
			
 
				+
			
 
				+
			
--- a/src/main_script.py
+++ b/src/main_script.py
@@ -0,0 +1,67 @@
 
				+import sys
			
 
				+from dictionary_loader import load_word_database_dictionary_from_supabase
			
 
				+from audio_processing import process_audio
			
 
				+
			
 
				+def initialize():
			
 
				+    word_database_success = load_word_database_dictionary_from_supabase()
			
 
				+    if not word_database_success:
			
 
				+        print("Warning: Word Database Dictionary loading failed. Proceeding with default dictionary.")
			
 
				+
			
 
				+
			
 
				+def process_audio_file(audio_file):
			
 
				+    try:
			
 
				+        result = process_audio(audio_file)
			
 
				+        if isinstance(result, tuple) and len(result) == 2:
			
 
				+            return result
			
 
				+        else:
			
 
				+            print("Unexpected result from process_audio")
			
 
				+            return None, None
			
 
				+    except Exception as e:
			
 
				+        print(f"Error processing audio: {str(e)}")
			
 
				+        return None, None
			
 
				+    
			
 
				+# 加入檢查user是否詢問特定問題
			
 
				+def main(audio_file_path):
			
 
				+    initialize()
			
 
				+    try:
			
 
				+        with open(audio_file_path, "rb") as audio_file:
			
 
				+            raw_transcript, corrected_transcript = process_audio_file(audio_file)
			
 
				+        
			
 
				+        if raw_transcript and corrected_transcript:
			
 
				+            print(f"Raw transcript: {raw_transcript}")
			
 
				+            print(f"Corrected transcript: {corrected_transcript}")
			
 
				+            
			
 
				+        else:
			
 
				+            print("Audio processing failed.")
			
 
				+    except FileNotFoundError:
			
 
				+        print(f"Error: The file '{audio_file_path}' was not found.")
			
 
				+    except Exception as e:
			
 
				+        print(f"An unexpected error occurred: {str(e)}")
			
 
				+        import traceback
			
 
				+        traceback.print_exc()
			
 
				+
			
 
				+
			
 
				+## 原本的main
			
 
				+# def main(audio_file_path):
			
 
				+#     initialize()
			
 
				+#     try:
			
 
				+#         with open(audio_file_path, "rb") as audio_file:
			
 
				+#             result = process_audio_file(audio_file)
			
 
				+        
			
 
				+#         if result:
			
 
				+#             print(result)
			
 
				+#         else:
			
 
				+#             print("Audio processing failed.")
			
 
				+#     except FileNotFoundError:
			
 
				+#         print(f"Error: The file '{audio_file_path}' was not found.")
			
 
				+#     except Exception as e:
			
 
				+#         print(f"An unexpected error occurred: {str(e)}")
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    if len(sys.argv) != 2:
			
 
				+        print("Usage: python script_name.py <audio_file_path>")
			
 
				+        sys.exit(1)
			
 
				+    
			
 
				+    audio_file_path = sys.argv[1]
			
 
				+    main(audio_file_path)
			
--- a/src/text_processing.py
+++ b/src/text_processing.py
@@ -0,0 +1,71 @@
 
				+import jieba
			
 
				+from pypinyin import pinyin, Style
			
 
				+from config import CORRECT_TERMS, ERROR_CORRECTION
			
 
				+
			
 
				+def chinese_soundex(pinyin_str):
			
 
				+    soundex_map = {
			
 
				+        'b': '1', 'p': '1', 'm': '1', 'f': '1',
			
 
				+        'd': '2', 'n': '2', 'l': '2',
			
 
				+        'g': '3', 'k': '3', 'h': '3', 't': '3',
			
 
				+        'j': '4', 'q': '4', 'x': '4',
			
 
				+        'zh': '5', 'ch': '5', 'sh': '5', 'r': '5',
			
 
				+        'z': '6', 'c': '6', 's': '6',
			
 
				+        'an': '7', 'ang': '7', 'en': '8', 'eng': '8', 'in': '8', 'ing': '8',
			
 
				+        'ong': '9', 'un': '9', 'uan': '9',
			
 
				+        'i': 'A', 'u': 'A', 'v': 'A',
			
 
				+        'e': 'B', 'o': 'B',
			
 
				+    }
			
 
				+    
			
 
				+    code = ''
			
 
				+    tone = '0'
			
 
				+    i = 0
			
 
				+    while i < len(pinyin_str):
			
 
				+        if pinyin_str[i:i+2] in soundex_map:
			
 
				+            code += soundex_map[pinyin_str[i:i+2]]
			
 
				+            i += 2
			
 
				+        elif pinyin_str[i] in soundex_map:
			
 
				+            code += soundex_map[pinyin_str[i]]
			
 
				+            i += 1
			
 
				+        elif pinyin_str[i].isdigit():
			
 
				+            tone = pinyin_str[i]
			
 
				+            i += 1
			
 
				+        else:
			
 
				+            i += 1
			
 
				+    
			
 
				+    code = code[:1] + ''.join(sorted(set(code[1:])))
			
 
				+    return (code[:3] + tone).ljust(4, '0')
			
 
				+
			
 
				+def compare_chinese_words(word1, word2, tone_sensitive=True):
			
 
				+    pinyin1 = ''.join([p[0] for p in pinyin(word1, style=Style.TONE3, neutral_tone_with_five=True)])
			
 
				+    pinyin2 = ''.join([p[0] for p in pinyin(word2, style=Style.TONE3, neutral_tone_with_five=True)])
			
 
				+    
			
 
				+    soundex1 = chinese_soundex(pinyin1)
			
 
				+    # print(soundex1)
			
 
				+    soundex2 = chinese_soundex(pinyin2)
			
 
				+    # print('soundex2', soundex2)
			
 
				+
			
 
				+    if tone_sensitive:
			
 
				+        return soundex1 == soundex2
			
 
				+    else:
			
 
				+        return soundex1[:3] == soundex2[:3]
			
 
				+
			
 
				+def fuzzy_correct_chinese(text):
			
 
				+    words = jieba.lcut(text)
			
 
				+    corrected_words = []
			
 
				+    for word in words:
			
 
				+        if word.isalpha():
			
 
				+            corrected_words.append(word)
			
 
				+            continue
			
 
				+        word_pinyin = ''.join([p[0] for p in pinyin(word, style=Style.NORMAL)])
			
 
				+        # print(f"Term: {word}, Pinyin: {word_pinyin}")
			
 
				+        if word in ERROR_CORRECTION:
			
 
				+            corrected_words.append(ERROR_CORRECTION[word])
			
 
				+        else:
			
 
				+            for term in CORRECT_TERMS:
			
 
				+                if compare_chinese_words(word, term, tone_sensitive=True):
			
 
				+                    # print(f"corrected: {word} -> {term}")
			
 
				+                    corrected_words.append(term)
			
 
				+                    break
			
 
				+            else:
			
 
				+                corrected_words.append(word)
			
 
				+    return ''.join(corrected_words)
			
--- a/whisper.py
+++ b/whisper.py
@@ -1,191 +0,0 @@
 
				-import os
			
 
				-import argparse
			
 
				-from openai import OpenAI
			
 
				-from dotenv import load_dotenv
			
 
				-import tiktoken
			
 
				-from pypinyin import pinyin, Style
			
 
				-import jieba
			
 
				-from datetime import datetime
			
 
				-
			
 
				-load_dotenv('environment.env')
			
 
				-client = OpenAI()
			
 
				-
			
 
				-system_prompt = """你是一位專業的轉錄校對助理，專門處理有關溫室氣體、碳排放和碳管理的對話轉錄。
			
 
				-你的任務是：
			
 
				-1. 確保以下專業術語的準確性：溫室氣體、碳排放、碳管理、碳盤查、碳權交易、碳足跡、淨零排放、碳權。
			
 
				-2. 在必要時添加適當的標點符號，如句號、逗號
			
 
				-3. 使用台灣的繁體中文，確保語言表達符合台灣的用語習慣。
			
 
				-4. 只更正明顯的錯誤或改善可讀性，不要改變原文的意思或結構。
			
 
				-5. 不要回答問題、解釋概念或添加任何不在原文中的信息。
			
 
				-6. 如果原文是一個問句，保持它的問句形式，不要提供答案。
			
 
				-
			
 
				-請只根據提供的原文進行必要的更正，不要添加或刪除任何實質性內容。在修正時，請特別注意上下文，確保修正後的詞語符合整句話的語境。"""
			
 
				-
			
 
				-def transcribe(audio_file):
			
 
				-    try:
			
 
				-        transcript = client.audio.transcriptions.create(
			
 
				-            file=audio_file,
			
 
				-            model="whisper-1",
			
 
				-            response_format="text"
			
 
				-        )
			
 
				-        return transcript
			
 
				-    except Exception as e:
			
 
				-        print(f"轉錄時發生錯誤：{str(e)}")
			
 
				-        return None
			
 
				-    
			
 
				-def save_output(file_name, raw_transcript, corrected_transcript):
			
 
				-    output_dir = "output"
			
 
				-    if not os.path.exists(output_dir):
			
 
				-        os.makedirs(output_dir)
			
 
				-    
			
 
				-    output_file = os.path.join(output_dir, "transcription_results.txt")
			
 
				-    
			
 
				-    with open(output_file, "a", encoding="utf-8") as f:
			
 
				-        f.write(f"\n{'='*50}\n")
			
 
				-        f.write(f"文件名: {file_name}\n")
			
 
				-        f.write(f"處理時間: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n")
			
 
				-        f.write("原始轉錄:\n")
			
 
				-        f.write(f"{raw_transcript}\n\n")
			
 
				-        f.write("修正後的轉錄:\n")
			
 
				-        f.write(f"{corrected_transcript}\n")
			
 
				-
			
 
				-
			
 
				-def process_audio_file(file_path):
			
 
				-    try:
			
 
				-        with open(file_path, "rb") as audio_file:
			
 
				-            file_size = os.path.getsize(file_path) / (1024 * 1024)  # 轉換為 MB
			
 
				-            if file_size > 25:
			
 
				-                print(f"警告：文件 {os.path.basename(file_path)} 大小為 {file_size:.2f} MB，超過了 25 MB 的限制。可能無法處理。")
			
 
				-
			
 
				-            print(f"\n處理文件：{os.path.basename(file_path)}")
			
 
				-            raw_transcript = transcribe(audio_file)
			
 
				-            if raw_transcript is None:
			
 
				-                return
			
 
				-
			
 
				-            print("\n原始轉錄：")
			
 
				-            print(raw_transcript)
			
 
				-
			
 
				-            corrected_transcript = post_process_transcript(raw_transcript)
			
 
				-            print("\n修正後的轉錄：")
			
 
				-            print(corrected_transcript)
			
 
				-
			
 
				-            # 保存輸出結果
			
 
				-            save_output(os.path.basename(file_path), raw_transcript, corrected_transcript)
			
 
				-
			
 
				-    except Exception as e:
			
 
				-        print(f"處理文件 {os.path.basename(file_path)} 時發生錯誤：{str(e)}")
			
 
				-
			
 
				-
			
 
				-def process_folder(folder_path):
			
 
				-    processed_files = 0
			
 
				-
			
 
				-    for filename in os.listdir(folder_path):
			
 
				-        if filename.endswith((".mp3", ".wav", ".m4a")):  
			
 
				-            file_path = os.path.join(folder_path, filename)
			
 
				-            process_audio_file(file_path)
			
 
				-            processed_files += 1
			
 
				-
			
 
				-    print("\n=== 總結 ===")
			
 
				-    print(f"處理的文件數：{processed_files}")
			
 
				-
			
 
				-def chinese_soundex(pinyin):
			
 
				-    soundex_map = {
			
 
				-        'b': '1', 'p': '1', 'm': '1', 'f': '1',
			
 
				-        'd': '2', 't': '2', 'n': '2', 'l': '2',
			
 
				-        'g': '3', 'k': '3', 'h': '3',
			
 
				-        'j': '4', 'q': '4', 'x': '4',
			
 
				-        'zh': '5', 'ch': '5', 'sh': '5', 'r': '5',
			
 
				-        'z': '6', 'c': '6', 's': '6'
			
 
				-    }
			
 
				-    
			
 
				-    code = pinyin[0].upper()
			
 
				-    tone = '0'
			
 
				-    
			
 
				-    for char in pinyin[1:]:
			
 
				-        if char.isdigit():
			
 
				-            tone = char
			
 
				-        elif char in soundex_map:
			
 
				-            if len(code) == 1 or code[-1] != soundex_map[char]:
			
 
				-                code += soundex_map[char]
			
 
				-        if len(code) == 4:
			
 
				-            break
			
 
				-    
			
 
				-    return (code.ljust(4, '0') + tone)[:5]
			
 
				-
			
 
				-def compare_chinese_words(word1, word2):
			
 
				-    pinyin1 = ''.join([p[0] for p in pinyin(word1, style=Style.TONE3, neutral_tone_with_five=True)])
			
 
				-    pinyin2 = ''.join([p[0] for p in pinyin(word2, style=Style.TONE3, neutral_tone_with_five=True)])
			
 
				-    
			
 
				-    soundex1 = chinese_soundex(pinyin1)
			
 
				-    soundex2 = chinese_soundex(pinyin2)
			
 
				-    
			
 
				-    return soundex1 == soundex2
			
 
				-
			
 
				-error_correction = {
			
 
				-    "看拳": "碳權",
			
 
				-    "看盤插": "碳盤查",
			
 
				-    "盤插": "盤查",
			
 
				-    "看": "碳"
			
 
				-}
			
 
				-
			
 
				-
			
 
				-def fuzzy_correct_chinese(text, correct_terms):
			
 
				-    words = jieba.cut(text)
			
 
				-    corrected_words = []
			
 
				-    for word in words:
			
 
				-        if word in error_correction:
			
 
				-            corrected_words.append(error_correction[word])
			
 
				-        else:
			
 
				-            for term in correct_terms:
			
 
				-                if compare_chinese_words(word, term):
			
 
				-                    corrected_words.append(term)
			
 
				-                    break
			
 
				-            else:
			
 
				-                corrected_words.append(word)
			
 
				-    return ' '.join(corrected_words)
			
 
				-
			
 
				-
			
 
				-def post_process_transcript(transcript, temperature=0):
			
 
				-    correct_terms = ["碳", "溫室氣體", "碳排放", "排放", "碳管理", "管理", "碳盤查", "盤查", "碳權交易", "碳費",
			
 
				-                     "碳權", "碳足跡", "足跡", "淨零排放", "零排放", "排放", "淨零",
			
 
				-                     "氣候變遷法", "氣候", "氣候變遷", "法",
			
 
				-                     "是什麼", "請解釋", "為什麼", "什麼意思",
			
 
				-                     "台灣"]
			
 
				-    
			
 
				-    corrected_transcript = fuzzy_correct_chinese(transcript, correct_terms)
			
 
				-    
			
 
				-    messages = [
			
 
				-        {"role": "system", "content": system_prompt},
			
 
				-        {"role": "user", "content": f"請校對並修正以下轉錄文本，但不要改變其原意或回答問題：\n\n{corrected_transcript}"}
			
 
				-    ]
			
 
				-
			
 
				-    response = client.chat.completions.create(
			
 
				-        model="gpt-4o",
			
 
				-        temperature=temperature,
			
 
				-        messages=messages
			
 
				-    )
			
 
				-
			
 
				-    return response.choices[0].message.content
			
 
				-
			
 
				-
			
 
				-def main():
			
 
				-    parser = argparse.ArgumentParser(description="處理音頻文件使用 Whisper")
			
 
				-    parser.add_argument("--file", help="要處理的單個音頻文件的路徑")
			
 
				-    parser.add_argument("--folder", default="data", help="包含音頻文件的文件夾路徑（默認：data）")
			
 
				-    args = parser.parse_args()
			
 
				-
			
 
				-    if args.file:
			
 
				-        if os.path.isfile(args.file):
			
 
				-            process_audio_file(args.file)
			
 
				-        else:
			
 
				-            print(f"錯誤：文件 '{args.file}' 不存在。")
			
 
				-    elif args.folder:
			
 
				-        if os.path.isdir(args.folder):
			
 
				-            process_folder(args.folder)
			
 
				-        else:
			
 
				-            print(f"錯誤：文件夾 '{args.folder}' 不存在。")
			
 
				-    else:
			
 
				-        print("錯誤：請指定一個文件（--file）或文件夾（--folder）來處理。")
			
 
				-
			
 
				-if __name__ == "__main__":
			
 
				-    main()