base.py 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135
  1. from abc import ABC, abstractmethod
  2. from typing import Any, Optional, Type
  3. from pydantic import BaseModel
  4. class InnerConfig(BaseModel, ABC):
  5. """A base provider configuration class."""
  6. extra_fields: dict[str, Any] = {}
  7. class Config:
  8. populate_by_name = True
  9. arbitrary_types_allowed = True
  10. ignore_extra = True
  11. @classmethod
  12. def create(cls: Type["InnerConfig"], **kwargs: Any) -> "InnerConfig":
  13. base_args = cls.model_fields.keys()
  14. filtered_kwargs = {
  15. k: v if v != "None" else None
  16. for k, v in kwargs.items()
  17. if k in base_args
  18. }
  19. instance = cls(**filtered_kwargs) # type: ignore
  20. for k, v in kwargs.items():
  21. if k not in base_args:
  22. instance.extra_fields[k] = v
  23. return instance
  24. class AppConfig(InnerConfig):
  25. project_name: Optional[str] = None
  26. user_tools_path: Optional[str] = None
  27. default_max_documents_per_user: Optional[int] = 100
  28. default_max_chunks_per_user: Optional[int] = 10_000
  29. default_max_collections_per_user: Optional[int] = 5
  30. default_max_upload_size: int = 2_000_000 # e.g. ~2 MB
  31. quality_llm: Optional[str] = None
  32. fast_llm: Optional[str] = None
  33. vlm: Optional[str] = None
  34. audio_lm: Optional[str] = None
  35. reasoning_llm: Optional[str] = None
  36. planning_llm: Optional[str] = None
  37. # File extension to max-size mapping
  38. # These are examples; adjust sizes as needed.
  39. max_upload_size_by_type: dict[str, int] = {
  40. # Common text-based formats
  41. "txt": 2_000_000,
  42. "md": 2_000_000,
  43. "tsv": 2_000_000,
  44. "csv": 5_000_000,
  45. "html": 5_000_000,
  46. # Office docs
  47. "doc": 10_000_000,
  48. "docx": 10_000_000,
  49. "ppt": 20_000_000,
  50. "pptx": 20_000_000,
  51. "xls": 10_000_000,
  52. "xlsx": 10_000_000,
  53. "odt": 5_000_000,
  54. # PDFs can expand quite a bit when converted to text
  55. "pdf": 30_000_000,
  56. # E-mail
  57. "eml": 5_000_000,
  58. "msg": 5_000_000,
  59. "p7s": 5_000_000,
  60. # Images
  61. "bmp": 5_000_000,
  62. "heic": 5_000_000,
  63. "jpeg": 5_000_000,
  64. "jpg": 5_000_000,
  65. "png": 5_000_000,
  66. "tiff": 5_000_000,
  67. # Others
  68. "epub": 10_000_000,
  69. "rtf": 5_000_000,
  70. "rst": 5_000_000,
  71. "org": 5_000_000,
  72. }
  73. class ProviderConfig(BaseModel, ABC):
  74. """A base provider configuration class."""
  75. app: Optional[AppConfig] = None # Add an app_config field
  76. extra_fields: dict[str, Any] = {}
  77. provider: Optional[str] = None
  78. class Config:
  79. populate_by_name = True
  80. arbitrary_types_allowed = True
  81. ignore_extra = True
  82. @abstractmethod
  83. def validate_config(self) -> None:
  84. pass
  85. @classmethod
  86. def create(cls: Type["ProviderConfig"], **kwargs: Any) -> "ProviderConfig":
  87. base_args = cls.model_fields.keys()
  88. filtered_kwargs = {
  89. k: v if v != "None" else None
  90. for k, v in kwargs.items()
  91. if k in base_args
  92. }
  93. instance = cls(**filtered_kwargs) # type: ignore
  94. for k, v in kwargs.items():
  95. if k not in base_args:
  96. instance.extra_fields[k] = v
  97. return instance
  98. @property
  99. @abstractmethod
  100. def supported_providers(self) -> list[str]:
  101. """Define a list of supported providers."""
  102. pass
  103. @classmethod
  104. def from_dict(
  105. cls: Type["ProviderConfig"], data: dict[str, Any]
  106. ) -> "ProviderConfig":
  107. """Create a new instance of the config from a dictionary."""
  108. return cls.create(**data)
  109. class Provider(ABC):
  110. """A base provider class to provide a common interface for all
  111. providers."""
  112. def __init__(self, config: ProviderConfig, *args, **kwargs):
  113. if config:
  114. config.validate_config()
  115. self.config = config