import os import argparse from openai import OpenAI from dotenv import load_dotenv import tiktoken from pypinyin import pinyin, Style import jieba from datetime import datetime load_dotenv('environment.env') client = OpenAI() system_prompt = """你是一位專業的轉錄校對助理,專門處理有關溫室氣體、碳排放和碳管理的對話轉錄。 你的任務是: 1. 確保以下專業術語的準確性:溫室氣體、碳排放、碳管理、碳盤查、碳權交易、碳足跡、淨零排放、碳權。 2. 在必要時添加適當的標點符號,如句號、逗號 3. 使用台灣的繁體中文,確保語言表達符合台灣的用語習慣。 4. 只更正明顯的錯誤或改善可讀性,不要改變原文的意思或結構。 5. 不要回答問題、解釋概念或添加任何不在原文中的信息。 6. 如果原文是一個問句,保持它的問句形式,不要提供答案。 請只根據提供的原文進行必要的更正,不要添加或刪除任何實質性內容。在修正時,請特別注意上下文,確保修正後的詞語符合整句話的語境。""" def transcribe(audio_file): try: transcript = client.audio.transcriptions.create( file=audio_file, model="whisper-1", response_format="text" ) return transcript except Exception as e: print(f"轉錄時發生錯誤:{str(e)}") return None def save_output(file_name, raw_transcript, corrected_transcript): output_dir = "output" if not os.path.exists(output_dir): os.makedirs(output_dir) output_file = os.path.join(output_dir, "transcription_results.txt") with open(output_file, "a", encoding="utf-8") as f: f.write(f"\n{'='*50}\n") f.write(f"文件名: {file_name}\n") f.write(f"處理時間: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n") f.write("原始轉錄:\n") f.write(f"{raw_transcript}\n\n") f.write("修正後的轉錄:\n") f.write(f"{corrected_transcript}\n") def process_audio_file(file_path): try: with open(file_path, "rb") as audio_file: file_size = os.path.getsize(file_path) / (1024 * 1024) # 轉換為 MB if file_size > 25: print(f"警告:文件 {os.path.basename(file_path)} 大小為 {file_size:.2f} MB,超過了 25 MB 的限制。可能無法處理。") print(f"\n處理文件:{os.path.basename(file_path)}") raw_transcript = transcribe(audio_file) if raw_transcript is None: return print("\n原始轉錄:") print(raw_transcript) corrected_transcript = post_process_transcript(raw_transcript) print("\n修正後的轉錄:") print(corrected_transcript) # 保存輸出結果 save_output(os.path.basename(file_path), raw_transcript, corrected_transcript) except Exception as e: print(f"處理文件 {os.path.basename(file_path)} 時發生錯誤:{str(e)}") def process_folder(folder_path): processed_files = 0 for filename in os.listdir(folder_path): if filename.endswith((".mp3", ".wav", ".m4a")): file_path = os.path.join(folder_path, filename) process_audio_file(file_path) processed_files += 1 print("\n=== 總結 ===") print(f"處理的文件數:{processed_files}") def chinese_soundex(pinyin): soundex_map = { 'b': '1', 'p': '1', 'm': '1', 'f': '1', 'd': '2', 't': '2', 'n': '2', 'l': '2', 'g': '3', 'k': '3', 'h': '3', 'j': '4', 'q': '4', 'x': '4', 'zh': '5', 'ch': '5', 'sh': '5', 'r': '5', 'z': '6', 'c': '6', 's': '6' } code = pinyin[0].upper() tone = '0' for char in pinyin[1:]: if char.isdigit(): tone = char elif char in soundex_map: if len(code) == 1 or code[-1] != soundex_map[char]: code += soundex_map[char] if len(code) == 4: break return (code.ljust(4, '0') + tone)[:5] def compare_chinese_words(word1, word2): pinyin1 = ''.join([p[0] for p in pinyin(word1, style=Style.TONE3, neutral_tone_with_five=True)]) pinyin2 = ''.join([p[0] for p in pinyin(word2, style=Style.TONE3, neutral_tone_with_five=True)]) soundex1 = chinese_soundex(pinyin1) soundex2 = chinese_soundex(pinyin2) return soundex1 == soundex2 error_correction = { "看拳": "碳權", "看盤插": "碳盤查", "盤插": "盤查", "看": "碳" } def fuzzy_correct_chinese(text, correct_terms): words = jieba.cut(text) corrected_words = [] for word in words: if word in error_correction: corrected_words.append(error_correction[word]) else: for term in correct_terms: if compare_chinese_words(word, term): corrected_words.append(term) break else: corrected_words.append(word) return ' '.join(corrected_words) def post_process_transcript(transcript, temperature=0): correct_terms = ["碳", "溫室氣體", "碳排放", "排放", "碳管理", "管理", "碳盤查", "盤查", "碳權交易", "碳費", "碳權", "碳足跡", "足跡", "淨零排放", "零排放", "排放", "淨零", "氣候變遷法", "氣候", "氣候變遷", "法", "是什麼", "請解釋", "為什麼", "什麼意思", "台灣"] corrected_transcript = fuzzy_correct_chinese(transcript, correct_terms) messages = [ {"role": "system", "content": system_prompt}, {"role": "user", "content": f"請校對並修正以下轉錄文本,但不要改變其原意或回答問題:\n\n{corrected_transcript}"} ] response = client.chat.completions.create( model="gpt-4o", temperature=temperature, messages=messages ) return response.choices[0].message.content def main(): parser = argparse.ArgumentParser(description="處理音頻文件使用 Whisper") parser.add_argument("--file", help="要處理的單個音頻文件的路徑") parser.add_argument("--folder", default="data", help="包含音頻文件的文件夾路徑(默認:data)") args = parser.parse_args() if args.file: if os.path.isfile(args.file): process_audio_file(args.file) else: print(f"錯誤:文件 '{args.file}' 不存在。") elif args.folder: if os.path.isdir(args.folder): process_folder(args.folder) else: print(f"錯誤:文件夾 '{args.folder}' 不存在。") else: print("錯誤:請指定一個文件(--file)或文件夾(--folder)來處理。") if __name__ == "__main__": main()