123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135 |
- from abc import ABC, abstractmethod
- from typing import Any, Optional, Type
- from pydantic import BaseModel
- class InnerConfig(BaseModel, ABC):
- """A base provider configuration class."""
- extra_fields: dict[str, Any] = {}
- class Config:
- populate_by_name = True
- arbitrary_types_allowed = True
- ignore_extra = True
- @classmethod
- def create(cls: Type["InnerConfig"], **kwargs: Any) -> "InnerConfig":
- base_args = cls.model_fields.keys()
- filtered_kwargs = {
- k: v if v != "None" else None
- for k, v in kwargs.items()
- if k in base_args
- }
- instance = cls(**filtered_kwargs) # type: ignore
- for k, v in kwargs.items():
- if k not in base_args:
- instance.extra_fields[k] = v
- return instance
- class AppConfig(InnerConfig):
- project_name: Optional[str] = None
- user_tools_path: Optional[str] = None
- default_max_documents_per_user: Optional[int] = 100
- default_max_chunks_per_user: Optional[int] = 10_000
- default_max_collections_per_user: Optional[int] = 5
- default_max_upload_size: int = 2_000_000 # e.g. ~2 MB
- quality_llm: Optional[str] = None
- fast_llm: Optional[str] = None
- vlm: Optional[str] = None
- audio_lm: Optional[str] = None
- reasoning_llm: Optional[str] = None
- planning_llm: Optional[str] = None
- # File extension to max-size mapping
- # These are examples; adjust sizes as needed.
- max_upload_size_by_type: dict[str, int] = {
- # Common text-based formats
- "txt": 2_000_000,
- "md": 2_000_000,
- "tsv": 2_000_000,
- "csv": 5_000_000,
- "html": 5_000_000,
- # Office docs
- "doc": 10_000_000,
- "docx": 10_000_000,
- "ppt": 20_000_000,
- "pptx": 20_000_000,
- "xls": 10_000_000,
- "xlsx": 10_000_000,
- "odt": 5_000_000,
- # PDFs can expand quite a bit when converted to text
- "pdf": 30_000_000,
- # E-mail
- "eml": 5_000_000,
- "msg": 5_000_000,
- "p7s": 5_000_000,
- # Images
- "bmp": 5_000_000,
- "heic": 5_000_000,
- "jpeg": 5_000_000,
- "jpg": 5_000_000,
- "png": 5_000_000,
- "tiff": 5_000_000,
- # Others
- "epub": 10_000_000,
- "rtf": 5_000_000,
- "rst": 5_000_000,
- "org": 5_000_000,
- }
- class ProviderConfig(BaseModel, ABC):
- """A base provider configuration class."""
- app: Optional[AppConfig] = None # Add an app_config field
- extra_fields: dict[str, Any] = {}
- provider: Optional[str] = None
- class Config:
- populate_by_name = True
- arbitrary_types_allowed = True
- ignore_extra = True
- @abstractmethod
- def validate_config(self) -> None:
- pass
- @classmethod
- def create(cls: Type["ProviderConfig"], **kwargs: Any) -> "ProviderConfig":
- base_args = cls.model_fields.keys()
- filtered_kwargs = {
- k: v if v != "None" else None
- for k, v in kwargs.items()
- if k in base_args
- }
- instance = cls(**filtered_kwargs) # type: ignore
- for k, v in kwargs.items():
- if k not in base_args:
- instance.extra_fields[k] = v
- return instance
- @property
- @abstractmethod
- def supported_providers(self) -> list[str]:
- """Define a list of supported providers."""
- pass
- @classmethod
- def from_dict(
- cls: Type["ProviderConfig"], data: dict[str, Any]
- ) -> "ProviderConfig":
- """Create a new instance of the config from a dictionary."""
- return cls.create(**data)
- class Provider(ABC):
- """A base provider class to provide a common interface for all
- providers."""
- def __init__(self, config: ProviderConfig, *args, **kwargs):
- if config:
- config.validate_config()
- self.config = config
|