audio_parser.py 2.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778
  1. import logging
  2. import os
  3. import tempfile
  4. from typing import AsyncGenerator
  5. from core.base.parsers.base_parser import AsyncParser
  6. from core.base.providers import (
  7. CompletionProvider,
  8. DatabaseProvider,
  9. IngestionConfig,
  10. )
  11. logger = logging.getLogger()
  12. class AudioParser(AsyncParser[bytes]):
  13. """A parser for audio data using Whisper transcription."""
  14. def __init__(
  15. self,
  16. config: IngestionConfig,
  17. database_provider: DatabaseProvider,
  18. llm_provider: CompletionProvider,
  19. ):
  20. self.database_provider = database_provider
  21. self.llm_provider = llm_provider
  22. self.config = config
  23. try:
  24. from litellm import atranscription
  25. self.atranscription = atranscription
  26. except ImportError:
  27. logger.error("Failed to import LiteLLM transcription")
  28. raise ImportError(
  29. "Please install the `litellm` package to use the AudioParser."
  30. )
  31. async def ingest( # type: ignore
  32. self, data: bytes, **kwargs
  33. ) -> AsyncGenerator[str, None]:
  34. """
  35. Ingest audio data and yield a transcription using Whisper via LiteLLM.
  36. Args:
  37. data: Raw audio bytes
  38. *args, **kwargs: Additional arguments passed to the transcription call
  39. Yields:
  40. Chunks of transcribed text
  41. """
  42. try:
  43. # Create a temporary file to store the audio data
  44. with tempfile.NamedTemporaryFile(
  45. suffix=".wav", delete=False
  46. ) as temp_file:
  47. temp_file.write(data)
  48. temp_file_path = temp_file.name
  49. # Call Whisper transcription
  50. response = await self.atranscription(
  51. model=self.config.audio_transcription_model,
  52. file=open(temp_file_path, "rb"),
  53. **kwargs,
  54. )
  55. # The response should contain the transcribed text directly
  56. yield response.text
  57. except Exception as e:
  58. logger.error(f"Error processing audio with Whisper: {str(e)}")
  59. raise
  60. finally:
  61. # Clean up the temporary file
  62. try:
  63. os.unlink(temp_file_path)
  64. except Exception as e:
  65. logger.warning(f"Failed to delete temporary file: {str(e)}")