bmp_parser.py 2.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778
  1. # type: ignore
  2. from typing import AsyncGenerator
  3. from core.base.parsers.base_parser import AsyncParser
  4. from core.base.providers import (
  5. CompletionProvider,
  6. DatabaseProvider,
  7. IngestionConfig,
  8. )
  9. class BMPParser(AsyncParser[str | bytes]):
  10. """A parser for BMP image data."""
  11. def __init__(
  12. self,
  13. config: IngestionConfig,
  14. database_provider: DatabaseProvider,
  15. llm_provider: CompletionProvider,
  16. ):
  17. self.database_provider = database_provider
  18. self.llm_provider = llm_provider
  19. self.config = config
  20. import struct
  21. self.struct = struct
  22. async def extract_bmp_metadata(self, data: bytes) -> dict:
  23. """Extract metadata from BMP file header."""
  24. try:
  25. # BMP header format
  26. header_format = "<2sIHHI"
  27. header_size = self.struct.calcsize(header_format)
  28. # Unpack header data
  29. (
  30. signature,
  31. file_size,
  32. reserved,
  33. reserved2,
  34. data_offset,
  35. ) = self.struct.unpack(header_format, data[:header_size])
  36. # DIB header
  37. dib_format = "<IiiHHIIiiII"
  38. dib_size = self.struct.calcsize(dib_format)
  39. dib_data = self.struct.unpack(dib_format, data[14 : 14 + dib_size])
  40. width = dib_data[1]
  41. height = abs(dib_data[2]) # Height can be negative
  42. bits_per_pixel = dib_data[4]
  43. compression = dib_data[5]
  44. return {
  45. "width": width,
  46. "height": height,
  47. "bits_per_pixel": bits_per_pixel,
  48. "file_size": file_size,
  49. "compression": compression,
  50. }
  51. except Exception as e:
  52. return {"error": f"Failed to parse BMP header: {str(e)}"}
  53. async def ingest(
  54. self, data: str | bytes, **kwargs
  55. ) -> AsyncGenerator[str, None]:
  56. """Ingest BMP data and yield metadata description."""
  57. if isinstance(data, str):
  58. # Convert base64 string to bytes if needed
  59. import base64
  60. data = base64.b64decode(data)
  61. metadata = await self.extract_bmp_metadata(data)
  62. # Generate description of the BMP file
  63. yield f"BMP image with dimensions {metadata.get('width', 'unknown')}x{metadata.get('height', 'unknown')} pixels, {metadata.get('bits_per_pixel', 'unknown')} bits per pixel, file size: {metadata.get('file_size', 'unknown')} bytes"