speech2text.py 1.6 KB

123456789101112131415161718192021222324252627282930313233343536373839
  1. import tempfile
  2. from typing import List, Any
  3. from fastapi import Request, APIRouter, UploadFile, File
  4. from fastapi.responses import FileResponse, PlainTextResponse
  5. from fastapi.exceptions import HTTPException
  6. from fastapi.encoders import jsonable_encoder
  7. from urllib.parse import urlparse, urljoin
  8. from pathlib import Path
  9. from icecream import ic
  10. from google.oauth2 import service_account
  11. from google.cloud import speech
  12. client_file = Path(__file__).parent.parent/'keys/pure-lodge-426406-e4-af94156a748a.json'
  13. credentials = service_account.Credentials.from_service_account_file(client_file)
  14. client = speech.SpeechClient(credentials=credentials)
  15. router = APIRouter()
  16. @router.post('/gcp')
  17. def gcp(language_code: str=None, file: UploadFile = File()):
  18. extension = file.filename.split(".")[-1]
  19. if extension not in ("mp3", "wav", "webm"):
  20. return HTTPException(status_code=400, detail="Audio must be mp3, wav, webm or webm format!")
  21. content = file.file.read()
  22. audio = speech.RecognitionAudio(content=content)
  23. if extension=='mp3': encoding=speech.RecognitionConfig.AudioEncoding.MP3
  24. elif extension=='wav': encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16
  25. elif extension=='webm': encoding=speech.RecognitionConfig.AudioEncoding.WEBM_OPUS
  26. else: return HTTPException(status_code=400, detail="no such encoding.")
  27. config = speech.RecognitionConfig(
  28. encoding=encoding,
  29. sample_rate_hertz=48000,
  30. language_code=language_code
  31. )
  32. response = client.recognize(config=config, audio=audio)
  33. results = [results.alternatives[0].transcript for results in response.results]
  34. ic(results)
  35. return results