SherryLiu hace 6 meses
padre
commit
16e5e2f5c6
Se han modificado 2 ficheros con 112 adiciones y 27 borrados
  1. 58 0
      asr_api.py
  2. 54 27
      whisper.py

+ 58 - 0
asr_api.py

@@ -0,0 +1,58 @@
+# asr_api.py
+
+import os
+import tempfile
+from fastapi import FastAPI, File, UploadFile, HTTPException
+from fastapi.responses import JSONResponse
+import logging
+from dotenv import load_dotenv
+from whisper import transcribe, post_process_transcript, setup_logger
+
+# 加载环境变量
+load_dotenv('environment.env')
+
+# 设置日志
+logger = setup_logger()
+
+app = FastAPI()
+
+@app.post("/transcribe/")
+async def transcribe_audio(file: UploadFile = File(...)):
+    try:
+        content_type = file.content_type
+        logger.info(f"Received file: {file.filename}, Content-Type: {content_type}")
+        
+        if content_type not in ["audio/mpeg", "audio/mp4", "audio/x-m4a", "audio/wav"]:
+            raise HTTPException(status_code=400, detail=f"Unsupported file type: {content_type}")
+
+        with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(file.filename)[1]) as temp_file:
+            content = await file.read()
+            temp_file.write(content)
+            temp_file.flush()
+
+            logger.info(f"Temporary file created: {temp_file.name}")
+
+            transcript = transcribe(temp_file.name)
+
+        os.unlink(temp_file.name)
+        logger.info(f"Temporary file deleted: {temp_file.name}")
+
+        if transcript is None:
+            raise ValueError("Transcription failed. Check server logs for details.")
+        
+        corrected_transcript = post_process_transcript(transcript)
+        
+        return JSONResponse(content={
+            "original_transcript": transcript,
+            "corrected_transcript": corrected_transcript
+        })
+    except ValueError as ve:
+        logger.exception("Transcription failed")
+        raise HTTPException(status_code=400, detail=str(ve))
+    except Exception as e:
+        logger.exception("An unexpected error occurred during transcription")
+        raise HTTPException(status_code=500, detail=f"An unexpected error occurred: {str(e)}")
+
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run("asr_api:app", host="0.0.0.0", port=8000, reload=True)

+ 54 - 27
whisper.py

@@ -2,13 +2,13 @@ import os
 import argparse
 import argparse
 from openai import OpenAI
 from openai import OpenAI
 from dotenv import load_dotenv
 from dotenv import load_dotenv
-import tiktoken
 from pypinyin import pinyin, Style
 from pypinyin import pinyin, Style
 import jieba
 import jieba
 from datetime import datetime
 from datetime import datetime
+import logging
 
 
 load_dotenv('environment.env')
 load_dotenv('environment.env')
-client = OpenAI()
+client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))
 
 
 system_prompt = """你是一位專業的轉錄校對助理,專門處理有關溫室氣體、碳排放和碳管理的對話轉錄。
 system_prompt = """你是一位專業的轉錄校對助理,專門處理有關溫室氣體、碳排放和碳管理的對話轉錄。
 你的任務是:
 你的任務是:
@@ -21,16 +21,26 @@ system_prompt = """你是一位專業的轉錄校對助理,專門處理有關
 
 
 請只根據提供的原文進行必要的更正,不要添加或刪除任何實質性內容。在修正時,請特別注意上下文,確保修正後的詞語符合整句話的語境。"""
 請只根據提供的原文進行必要的更正,不要添加或刪除任何實質性內容。在修正時,請特別注意上下文,確保修正後的詞語符合整句話的語境。"""
 
 
+def setup_logger():
+    logging.basicConfig(level=logging.DEBUG, 
+                        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+    return logging.getLogger(__name__)
+
+logger = setup_logger()
+
 def transcribe(audio_file):
 def transcribe(audio_file):
     try:
     try:
-        transcript = client.audio.transcriptions.create(
-            file=audio_file,
-            model="whisper-1",
-            response_format="text"
-        )
-        return transcript
+        logger.info(f"Attempting to transcribe file: {audio_file}")
+        with open(audio_file, "rb") as file:
+            response = client.audio.transcriptions.create(
+                file=file,
+                model="whisper-1",
+                response_format="text"
+            )
+        logger.info("Transcription successful")
+        return response
     except Exception as e:
     except Exception as e:
-        print(f"轉錄時發生錯誤:{str(e)}")
+        logger.error(f"Transcription failed: {str(e)}", exc_info=True)
         return None
         return None
     
     
 def save_output(file_name, raw_transcript, corrected_transcript):
 def save_output(file_name, raw_transcript, corrected_transcript):
@@ -74,6 +84,7 @@ def process_audio_file(file_path):
 
 
     except Exception as e:
     except Exception as e:
         print(f"處理文件 {os.path.basename(file_path)} 時發生錯誤:{str(e)}")
         print(f"處理文件 {os.path.basename(file_path)} 時發生錯誤:{str(e)}")
+    return transcript
 
 
 
 
 def process_folder(folder_path):
 def process_folder(folder_path):
@@ -165,27 +176,43 @@ def post_process_transcript(transcript, temperature=0):
         messages=messages
         messages=messages
     )
     )
 
 
-    return response.choices[0].message.content
+    # return response.choices[0].message.content
+    return transcript
 
 
-
-def main():
+# 處理單個音頻,使用transcript端點發送音頻給API
+if __name__ == "__main__":
+    import argparse
     parser = argparse.ArgumentParser(description="處理音頻文件使用 Whisper")
     parser = argparse.ArgumentParser(description="處理音頻文件使用 Whisper")
     parser.add_argument("--file", help="要處理的單個音頻文件的路徑")
     parser.add_argument("--file", help="要處理的單個音頻文件的路徑")
-    parser.add_argument("--folder", default="data", help="包含音頻文件的文件夾路徑(默認:data)")
     args = parser.parse_args()
     args = parser.parse_args()
 
 
     if args.file:
     if args.file:
-        if os.path.isfile(args.file):
-            process_audio_file(args.file)
-        else:
-            print(f"錯誤:文件 '{args.file}' 不存在。")
-    elif args.folder:
-        if os.path.isdir(args.folder):
-            process_folder(args.folder)
-        else:
-            print(f"錯誤:文件夾 '{args.folder}' 不存在。")
-    else:
-        print("錯誤:請指定一個文件(--file)或文件夾(--folder)來處理。")
-
-if __name__ == "__main__":
-    main()
+        with open(args.file, "rb") as audio_file:
+            transcript = transcribe(audio_file)
+            if transcript:
+                corrected = post_process_transcript(transcript)
+                print("Original:", transcript)
+                print("Corrected:", corrected)
+
+
+# def main():
+#     parser = argparse.ArgumentParser(description="處理音頻文件使用 Whisper")
+#     parser.add_argument("--file", help="要處理的單個音頻文件的路徑")
+#     parser.add_argument("--folder", default="data", help="包含音頻文件的文件夾路徑(默認:data)")
+#     args = parser.parse_args()
+
+#     if args.file:
+#         if os.path.isfile(args.file):
+#             process_audio_file(args.file)
+#         else:
+#             print(f"錯誤:文件 '{args.file}' 不存在。")
+#     elif args.folder:
+#         if os.path.isdir(args.folder):
+#             process_folder(args.folder)
+#         else:
+#             print(f"錯誤:文件夾 '{args.folder}' 不存在。")
+#     else:
+#         print("錯誤:請指定一個文件(--file)或文件夾(--folder)來處理。")
+
+# if __name__ == "__main__":
+#     main()