whisper.py 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112
  1. # import os, sys
  2. # from typing import List, Any
  3. from fastapi import Request, APIRouter, UploadFile, File
  4. from fastapi.responses import FileResponse, PlainTextResponse
  5. # from fastapi import FastAPI, HTTPException, status
  6. # from fastapi.middleware.cors import CORSMiddleware
  7. # import uvicorn
  8. # from fastapi.exceptions import HTTPException
  9. # from fastapi.encoders import jsonable_encoder
  10. # from urllib.parse import urlparse, urljoin
  11. from pathlib import Path
  12. from api.openai_scripts_tai_gi.main_script import main
  13. from api.openai_scripts_chinese.main_script import main as main2
  14. # from api.openai_scripts_new.main_script import main as main3
  15. from datetime import datetime
  16. import random
  17. import string
  18. router = APIRouter()
  19. # router = FastAPI()
  20. # router.add_middleware(
  21. # CORSMiddleware,
  22. # allow_origins=["*"],
  23. # allow_credentials=True,
  24. # allow_methods=["*"],
  25. # allow_headers=["*"],
  26. # )
  27. @router.post('/tai_gi')
  28. async def whisper_auto(file: UploadFile = File()):
  29. if file == None:
  30. return {'message': '請上傳檔案'}
  31. extension = file.filename.split(".")[-1]
  32. if extension not in ("mp3", "wav", "webm"):
  33. return PlainTextResponse("Audio must be mp3, wav or webm format!", 400)
  34. filename = Path(__file__).parent.parent/'speech_audio'/datetime.now().strftime(f"%Y%m%d%H%M%S_{''.join(random.sample(string.ascii_lowercase, 3))}.{extension}")
  35. with open(filename, 'wb') as f:
  36. f.write(await file.read())
  37. with open(filename, 'rb') as f:
  38. raw_transcript, corrected_transcript = main(f)
  39. # if raw_transcript and corrected_transcript:
  40. # os.remove(filename)
  41. # return {'message': corrected_transcript}
  42. # else:
  43. # os.remove(filename)
  44. # return {"message": "Audio processing failed."}
  45. if raw_transcript and corrected_transcript:
  46. # os.remove(save_path)
  47. # return {'message': {"Raw transcript": raw_transcript, "Corrected transcript": corrected_transcript}}
  48. return {'message': corrected_transcript}
  49. else:
  50. # os.remove(save_path)
  51. return {"message": "Audio processing failed."}
  52. @router.post('/chinese')
  53. async def whisper_auto(file: UploadFile = File()):
  54. if file == None:
  55. return {'message': '請上傳檔案'}
  56. extension = file.filename.split(".")[-1]
  57. if extension not in ("mp3", "wav", "webm"):
  58. return PlainTextResponse("Audio must be mp3, wav or webm format!", 400)
  59. filename = Path(__file__).parent.parent/'speech_audio'/datetime.now().strftime(f"%Y%m%d%H%M%S_{''.join(random.sample(string.ascii_lowercase, 3))}.{extension}")
  60. with open(filename, 'wb') as f:
  61. f.write(await file.read())
  62. with open(filename, 'rb') as f:
  63. raw_transcript, corrected_transcript = main2(f)
  64. if raw_transcript and corrected_transcript:
  65. # return {'message': {"Raw transcript": raw_transcript, "Corrected transcript": corrected_transcript}}
  66. return {'message': corrected_transcript}
  67. else:
  68. return {"message": "Audio processing failed."}
  69. # import numpy as np
  70. # from transformers import pipeline
  71. # import gradio as gr
  72. # @router.post('/tai_gi_new')
  73. # async def whisper_auto(file: UploadFile = File()):
  74. # if file == None:
  75. # return {'message': '請上傳檔案'}
  76. # extension = file.filename.split(".")[-1]
  77. # if extension not in ("mp3", "wav", "webm"):
  78. # return PlainTextResponse("Audio must be mp3, wav or webm format!", 400)
  79. # filename = Path(__file__).parent/'speech_audio'/datetime.now().strftime(f"%Y%m%d%H%M%S_{''.join(random.sample(string.ascii_lowercase, 3))}.{extension}")
  80. # with open(filename, 'wb') as f:
  81. # f.write(await file.read())
  82. # # with open(filename, 'rb') as f:
  83. # # raw_transcript, corrected_transcript = main3(f)
  84. # # audio_data, sample_rate = librosa.load(filename, sr=None)
  85. # # 讀取音頻文件
  86. # audio_input = gr.Audio(type="filepath")
  87. # raw_transcript, corrected_transcript = main3(audio_input(filename))
  88. # # if raw_transcript and corrected_transcript:
  89. # # os.remove(filename)
  90. # # return {'message': corrected_transcript}
  91. # # else:
  92. # # os.remove(filename)
  93. # # return {"message": "Audio processing failed."}
  94. # if raw_transcript and corrected_transcript:
  95. # # os.remove(save_path)
  96. # return {'message': {"Raw transcript": raw_transcript, "Corrected transcript": corrected_transcript}}
  97. # else:
  98. # # os.remove(save_path)
  99. # return {"message": "Audio processing failed."}
  100. # if __name__ == "__main__":
  101. # uvicorn.run("whisper:router", reload=False, port=8086, host='cmm.ai', ssl_keyfile="/etc/letsencrypt/live/cmm.ai/privkey.pem", ssl_certfile="/etc/letsencrypt/live/cmm.ai/fullchain.pem")