audio_processing.py 1.9 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253
  1. from openai import OpenAI
  2. from api.openai_scripts_chinese.config import SYSTEM_PROMPT, OPENAI_API_KEY, SUPABASE_KEY, SUPABASE_URL
  3. from supabase import create_client, Client
  4. from api.openai_scripts_chinese.text_processing import fuzzy_correct_chinese
  5. client = OpenAI(api_key=OPENAI_API_KEY)
  6. supabase: Client = create_client(SUPABASE_URL, SUPABASE_KEY)
  7. def transcribe(audio_file):
  8. try:
  9. table_name = "word_database"
  10. response = supabase.table(table_name).select("term").execute()
  11. custom_vocab = []
  12. if response.data:
  13. for item in response.data:
  14. custom_vocab.append({item['term']})
  15. else:
  16. print(f"No data found or an error occurred: {response.error}")
  17. print("Using default dictionary as Supabase data couldn't be fetched.")
  18. transcript = client.audio.transcriptions.create(
  19. file=audio_file,
  20. model="whisper-1",
  21. response_format="text",
  22. prompt=f"請注意以下詞彙:{custom_vocab}"
  23. )
  24. return transcript
  25. except Exception as e:
  26. print(f"轉錄時發生錯誤:{str(e)}")
  27. return None
  28. def post_process_transcript(transcript, temperature=0):
  29. corrected_transcript = fuzzy_correct_chinese(transcript)
  30. messages = [
  31. {"role": "system", "content": SYSTEM_PROMPT},
  32. {"role": "user", "content": f"請校對並修正以下轉錄文本,但不要改變其原意或回答問題:\n\n{corrected_transcript}"}
  33. ]
  34. response = client.chat.completions.create(
  35. model="gpt-4",
  36. temperature=temperature,
  37. messages=messages
  38. )
  39. return response.choices[0].message.content
  40. def process_audio(audio_data):
  41. raw_transcript = transcribe(audio_data)
  42. print(raw_transcript)
  43. if raw_transcript is None:
  44. return None, None
  45. corrected_transcript = post_process_transcript(raw_transcript)
  46. return raw_transcript, corrected_transcript