text_parser.py 783 B

123456789101112131415161718192021222324252627282930
  1. # type: ignore
  2. from typing import AsyncGenerator
  3. from core.base.parsers.base_parser import AsyncParser
  4. from core.base.providers import (
  5. CompletionProvider,
  6. DatabaseProvider,
  7. IngestionConfig,
  8. )
  9. class TextParser(AsyncParser[str | bytes]):
  10. """A parser for raw text data."""
  11. def __init__(
  12. self,
  13. config: IngestionConfig,
  14. database_provider: DatabaseProvider,
  15. llm_provider: CompletionProvider,
  16. ):
  17. self.database_provider = database_provider
  18. self.llm_provider = llm_provider
  19. self.config = config
  20. async def ingest(
  21. self, data: str | bytes, *args, **kwargs
  22. ) -> AsyncGenerator[str | bytes, None]:
  23. if isinstance(data, bytes):
  24. data = data.decode("utf-8")
  25. yield data