6 months ago · bf409edbfa
--- a/cli/main.py
+++ b/cli/main.py
@@ -1,5 +1,5 @@
 
															 import json
														
 
															-from typing import Any, Dict
														
 
															+from typing import Any
														
 
															 import asyncclick as click
														
 
															 from rich.console import Console
														
@@ -73,7 +73,7 @@ def _ensure_config_dir_exists() -> None:
 
															     CONFIG_DIR.mkdir(parents=True, exist_ok=True)
														
 
															-def save_config(config_data: Dict[str, Any]) -> None:
														
 
															+def save_config(config_data: dict[str, Any]) -> None:
														
 
															     """
														
 
															     Persist the given config data to ~/.r2r/config.json.
														
 
															     """
														
--- a/compose.full.yaml
+++ b/compose.full.yaml
@@ -307,7 +307,6 @@ services:
 
															       - R2R_POSTGRES_HOST=${R2R_POSTGRES_HOST:-postgres}
														
 
															       - R2R_POSTGRES_PORT=${R2R_POSTGRES_PORT:-5432}
														
 
															       - R2R_POSTGRES_DBNAME=${R2R_POSTGRES_DBNAME:-postgres}
														
 
															-      - R2R_POSTGRES_PROJECT_NAME=${R2R_POSTGRES_PROJECT_NAME:-r2r_default}
														
 
															       - R2R_POSTGRES_MAX_CONNECTIONS=${R2R_POSTGRES_MAX_CONNECTIONS:-1024}
														
 
															       - R2R_POSTGRES_STATEMENT_CACHE_SIZE=${R2R_POSTGRES_STATEMENT_CACHE_SIZE:-100}
														
--- a/compose.full_with_replicas.yaml
+++ b/compose.full_with_replicas.yaml
@@ -305,7 +305,6 @@ services:
 
															       - R2R_POSTGRES_HOST=${R2R_POSTGRES_HOST:-postgres}
														
 
															       - R2R_POSTGRES_PORT=${R2R_POSTGRES_PORT:-5432}
														
 
															       - R2R_POSTGRES_DBNAME=${R2R_POSTGRES_DBNAME:-postgres}
														
 
															-      - R2R_POSTGRES_PROJECT_NAME=${R2R_POSTGRES_PROJECT_NAME:-r2r_default}
														
 
															       - R2R_POSTGRES_MAX_CONNECTIONS=${R2R_POSTGRES_MAX_CONNECTIONS:-1024}
														
 
															       - R2R_POSTGRES_STATEMENT_CACHE_SIZE=${R2R_POSTGRES_STATEMENT_CACHE_SIZE:-100}
														
--- a/core/agent/rag.py
+++ b/core/agent/rag.py
@@ -1,5 +1,3 @@
 
															-from typing import Union
														
 
															-
														
 
															 from core.agent import R2RAgent, R2RStreamingAgent
														
 
															 from core.base import (
														
 
															     format_search_results_for_llm,
														
@@ -126,9 +124,7 @@ class R2RRAGAgent(RAGAgentMixin, R2RAgent):
 
															     def __init__(
														
 
															         self,
														
 
															         database_provider: DatabaseProvider,
														
 
															-        llm_provider: Union[
														
 
															-            LiteLLMCompletionProvider, OpenAICompletionProvider
														
 
															-        ],
														
 
															+        llm_provider: LiteLLMCompletionProvider | OpenAICompletionProvider,
														
 
															         search_pipeline: SearchPipeline,
														
 
															         config: AgentConfig,
														
 
															     ):
														
@@ -144,9 +140,7 @@ class R2RStreamingRAGAgent(RAGAgentMixin, R2RStreamingAgent):
 
															     def __init__(
														
 
															         self,
														
 
															         database_provider: DatabaseProvider,
														
 
															-        llm_provider: Union[
														
 
															-            LiteLLMCompletionProvider, OpenAICompletionProvider
														
 
															-        ],
														
 
															+        llm_provider: LiteLLMCompletionProvider | OpenAICompletionProvider,
														
 
															         search_pipeline: SearchPipeline,
														
 
															         config: AgentConfig,
														
 
															     ):
														
--- a/core/base/abstractions/__init__.py
+++ b/core/base/abstractions/__init__.py
@@ -25,20 +25,16 @@ from shared.abstractions.graph import (
 
															     Graph,
														
 
															     KGExtraction,
														
 
															     Relationship,
														
 
															+    StoreType,
														
 
															 )
														
 
															 from shared.abstractions.ingestion import (
														
 
															     ChunkEnrichmentSettings,
														
 
															     ChunkEnrichmentStrategy,
														
 
															 )
														
 
															 from shared.abstractions.kg import (
														
 
															-    GraphBuildSettings,
														
 
															     GraphCommunitySettings,
														
 
															-    GraphEntitySettings,
														
 
															-    GraphRelationshipSettings,
														
 
															     KGCreationSettings,
														
 
															     KGEnrichmentSettings,
														
 
															-    KGEntityDeduplicationSettings,
														
 
															-    KGEntityDeduplicationType,
														
 
															     KGRunType,
														
 
															 )
														
 
															 from shared.abstractions.llm import (
														
@@ -59,7 +55,6 @@ from shared.abstractions.search import (
 
															     HybridSearchSettings,
														
 
															     KGCommunityResult,
														
 
															     KGEntityResult,
														
 
															-    KGGlobalResult,
														
 
															     KGRelationshipResult,
														
 
															     KGSearchResultType,
														
 
															     SearchMode,
														
@@ -110,6 +105,7 @@ __all__ = [
 
															     # Graph abstractions
														
 
															     "Entity",
														
 
															     "Community",
														
 
															+    "StoreType",
														
 
															     "KGExtraction",
														
 
															     "Relationship",
														
 
															     # Index abstractions
														
@@ -130,7 +126,6 @@ __all__ = [
 
															     "KGEntityResult",
														
 
															     "KGRelationshipResult",
														
 
															     "KGCommunityResult",
														
 
															-    "KGGlobalResult",
														
 
															     "GraphSearchSettings",
														
 
															     "ChunkSearchSettings",
														
 
															     "ChunkSearchResult",
														
@@ -141,12 +136,7 @@ __all__ = [
 
															     # KG abstractions
														
 
															     "KGCreationSettings",
														
 
															     "KGEnrichmentSettings",
														
 
															-    "KGEntityDeduplicationSettings",
														
 
															-    "GraphBuildSettings",
														
 
															-    "GraphEntitySettings",
														
 
															-    "GraphRelationshipSettings",
														
 
															     "GraphCommunitySettings",
														
 
															-    "KGEntityDeduplicationType",
														
 
															     "KGRunType",
														
 
															     # User abstractions
														
 
															     "Token",
														
--- a/core/base/logger/__init__.py
+++ b/core/base/logger/__init__.py
@@ -1,9 +1,6 @@
 
															-from .base import RunInfoLog
														
 
															 from .run_manager import RunManager, manage_run
														
 
															 __all__ = [
														
 
															-    # Basic types
														
 
															-    "RunInfoLog",
														
 
															     # Run Manager
														
 
															     "RunManager",
														
 
															     "manage_run",
														
--- a/core/base/providers/auth.py
+++ b/core/base/providers/auth.py
@@ -66,10 +66,8 @@ class AuthProvider(Provider, ABC):
 
															         self.database_provider = database_provider
														
 
															         self.email_provider = email_provider
														
 
															         super().__init__(config)
														
 
															-        self.config: AuthConfig = config  # for type hinting
														
 
															-        self.database_provider: "PostgresDatabaseProvider" = (
														
 
															-            database_provider  # for type hinting
														
 
															-        )
														
 
															+        self.config: AuthConfig = config
														
 
															+        self.database_provider: "PostgresDatabaseProvider" = database_provider
														
 
															     async def _get_default_admin_user(self) -> User:
														
 
															         return await self.database_provider.users_handler.get_user_by_email(
														
--- a/core/base/providers/base.py
+++ b/core/base/providers/base.py
@@ -1,5 +1,5 @@
 
															 from abc import ABC, abstractmethod
														
 
															-from typing import Any, Optional, Sequence, Type
														
 
															+from typing import Any, Optional, Type
														
 
															 from pydantic import BaseModel
														
--- a/core/base/providers/crypto.py
+++ b/core/base/providers/crypto.py
@@ -10,7 +10,7 @@ class CryptoConfig(ProviderConfig):
 
															     @property
														
 
															     def supported_providers(self) -> list[str]:
														
 
															-        return ["bcrypt", "nacl"]  # Add other crypto providers as needed
														
 
															+        return ["bcrypt", "nacl"]
														
 
															     def validate_config(self) -> None:
														
 
															         if self.provider not in self.supported_providers:
														
--- a/core/base/providers/database.py
+++ b/core/base/providers/database.py
@@ -1,56 +1,20 @@
 
															-import logging
														
 
															-from abc import abstractmethod
														
 
															-from datetime import datetime
														
 
															-from io import BytesIO
														
 
															-from typing import BinaryIO, Optional, Tuple
														
 
															-from uuid import UUID
														
 
															-
														
 
															-from pydantic import BaseModel
														
 
															-
														
 
															-from core.base.abstractions import (
														
 
															-    ChunkSearchResult,
														
 
															-    Community,
														
 
															-    DocumentResponse,
														
 
															-    Entity,
														
 
															-    IndexArgsHNSW,
														
 
															-    IndexArgsIVFFlat,
														
 
															-    IndexMeasure,
														
 
															-    IndexMethod,
														
 
															-    KGCreationSettings,
														
 
															-    KGEnrichmentSettings,
														
 
															-    KGEntityDeduplicationSettings,
														
 
															-    Message,
														
 
															-    Relationship,
														
 
															-    SearchSettings,
														
 
															-    User,
														
 
															-    VectorEntry,
														
 
															-    VectorTableName,
														
 
															-)
														
 
															-from core.base.api.models import CollectionResponse, GraphResponse
														
 
															-
														
 
															-from .base import Provider, ProviderConfig
														
 
															-
														
 
															-"""Base classes for knowledge graph providers."""
														
 
															+"""Base classes for database providers."""
														
 
															 import logging
														
 
															 from abc import ABC, abstractmethod
														
 
															-from typing import Any, Optional, Sequence, Tuple, Type
														
 
															+from typing import Any, Optional, Sequence
														
 
															 from uuid import UUID
														
 
															 from pydantic import BaseModel
														
 
															-from ..abstractions import (
														
 
															-    Community,
														
 
															-    Entity,
														
 
															+from core.base.abstractions import (
														
 
															     GraphSearchSettings,
														
 
															     KGCreationSettings,
														
 
															     KGEnrichmentSettings,
														
 
															-    KGEntityDeduplicationSettings,
														
 
															-    KGExtraction,
														
 
															-    R2RSerializable,
														
 
															-    Relationship,
														
 
															 )
														
 
															+from .base import Provider, ProviderConfig
														
 
															+
														
 
															 logger = logging.getLogger()
														
@@ -168,14 +132,11 @@ class DatabaseConfig(ProviderConfig):
 
															     collection_summary_task_prompt: str = "default_collection_summary"
														
 
															     enable_fts: bool = False
														
 
															-    # KG settings
														
 
															+    # Graph settings
														
 
															     batch_size: Optional[int] = 1
														
 
															     kg_store_path: Optional[str] = None
														
 
															     graph_enrichment_settings: KGEnrichmentSettings = KGEnrichmentSettings()
														
 
															     graph_creation_settings: KGCreationSettings = KGCreationSettings()
														
 
															-    graph_entity_deduplication_settings: KGEntityDeduplicationSettings = (
														
 
															-        KGEntityDeduplicationSettings()
														
 
															-    )
														
 
															     graph_search_settings: GraphSearchSettings = GraphSearchSettings()
														
 
															     # Rate limits
														
--- a/core/base/providers/email.py
+++ b/core/base/providers/email.py
@@ -29,11 +29,14 @@ class EmailConfig(ProviderConfig):
 
															         ]  # Could add more providers like AWS SES, SendGrid etc.
														
 
															     def validate_config(self) -> None:
														
 
															-        if self.provider == "sendgrid":
														
 
															-            if not (self.sendgrid_api_key or os.getenv("SENDGRID_API_KEY")):
														
 
															-                raise ValueError(
														
 
															-                    "SendGrid API key is required when using SendGrid provider"
														
 
															-                )
														
 
															+        if (
														
 
															+            self.provider == "sendgrid"
														
 
															+            and not self.sendgrid_api_key
														
 
															+            and not os.getenv("SENDGRID_API_KEY")
														
 
															+        ):
														
 
															+            raise ValueError(
														
 
															+                "SendGrid API key is required when using SendGrid provider"
														
 
															+            )
														
 
															 logger = logging.getLogger(__name__)
														
@@ -46,7 +49,7 @@ class EmailProvider(Provider, ABC):
 
															                 "EmailProvider must be initialized with an EmailConfig"
														
 
															             )
														
 
															         super().__init__(config)
														
 
															-        self.config: EmailConfig = config  # for type hinting
														
 
															+        self.config: EmailConfig = config
														
 
															     @abstractmethod
														
 
															     async def send_email(
														
--- a/core/base/providers/ingestion.py
+++ b/core/base/providers/ingestion.py
@@ -16,6 +16,19 @@ if TYPE_CHECKING:
 
															     from core.database import PostgresDatabaseProvider
														
 
															+class ChunkingStrategy(str, Enum):
														
 
															+    RECURSIVE = "recursive"
														
 
															+    CHARACTER = "character"
														
 
															+    BASIC = "basic"
														
 
															+    BY_TITLE = "by_title"
														
 
															+
														
 
															+
														
 
															+class IngestionMode(str, Enum):
														
 
															+    hi_res = "hi-res"
														
 
															+    fast = "fast"
														
 
															+    custom = "custom"
														
 
															+
														
 
															+
														
 
															 class IngestionConfig(ProviderConfig):
														
 
															     _defaults: ClassVar[dict] = {
														
 
															         "app": AppConfig(),
														
@@ -44,7 +57,7 @@ class IngestionConfig(ProviderConfig):
 
															     excluded_parsers: list[str] = Field(
														
 
															         default_factory=lambda: IngestionConfig._defaults["excluded_parsers"]
														
 
															     )
														
 
															-    chunking_strategy: str = Field(
														
 
															+    chunking_strategy: str | ChunkingStrategy = Field(
														
 
															         default_factory=lambda: IngestionConfig._defaults["chunking_strategy"]
														
 
															     )
														
 
															     chunk_enrichment_settings: ChunkEnrichmentSettings = Field(
														
@@ -131,31 +144,6 @@ class IngestionConfig(ProviderConfig):
 
															         else:
														
 
															             return cls(app=app)
														
 
															-    @classmethod
														
 
															-    def get_default(cls, mode: str, app) -> "IngestionConfig":
														
 
															-        """Return default ingestion configuration for a given mode."""
														
 
															-        if mode == "hi-res":
														
 
															-            # More thorough parsing, no skipping summaries, possibly larger `chunks_for_document_summary`.
														
 
															-            return cls(app=app, parser_overrides={"pdf": "zerox"})
														
 
															-        # elif mode == "fast":
														
 
															-        #     # Skip summaries and other enrichment steps for speed.
														
 
															-        #     return cls(
														
 
															-        #         app=app,
														
 
															-        #     )
														
 
															-        else:
														
 
															-            # For `custom` or any unrecognized mode, return a base config
														
 
															-            return cls(app=app)
														
 
															-
														
 
															-    @classmethod
														
 
															-    def set_default(cls, **kwargs):
														
 
															-        for key, value in kwargs.items():
														
 
															-            if key in cls._defaults:
														
 
															-                cls._defaults[key] = value
														
 
															-            else:
														
 
															-                raise AttributeError(
														
 
															-                    f"No default attribute '{key}' in GenerationConfig"
														
 
															-                )
														
 
															-
														
 
															     class Config:
														
 
															         populate_by_name = True
														
 
															         json_schema_extra = {
														
@@ -193,16 +181,3 @@ class IngestionProvider(Provider, ABC):
 
															         self.config: IngestionConfig = config
														
 
															         self.llm_provider = llm_provider
														
 
															         self.database_provider: "PostgresDatabaseProvider" = database_provider
														
 
															-
														
 
															-
														
 
															-class ChunkingStrategy(str, Enum):
														
 
															-    RECURSIVE = "recursive"
														
 
															-    CHARACTER = "character"
														
 
															-    BASIC = "basic"
														
 
															-    BY_TITLE = "by_title"
														
 
															-
														
 
															-
														
 
															-class IngestionMode(str, Enum):
														
 
															-    hi_res = "hi-res"
														
 
															-    fast = "fast"
														
 
															-    custom = "custom"
														
--- a/core/configs/full_azure.toml
+++ b/core/configs/full_azure.toml
@@ -15,9 +15,6 @@ concurrent_request_limit = 128
 
															     clustering_mode = "remote"
														
 
															     generation_config = { model = "azure/gpt-4o-mini" }
														
 
															-  [database.graph_entity_deduplication_settings]
														
 
															-    generation_config = { model = "azure/gpt-4o-mini" }
														
 
															-
														
 
															   [database.graph_enrichment_settings]
														
 
															     generation_config = { model = "azure/gpt-4o-mini" }
														
--- a/core/configs/full_local_llm.toml
+++ b/core/configs/full_local_llm.toml
@@ -31,12 +31,6 @@ provider = "postgres"
 
															     max_description_input_length = 65536
														
 
															     generation_config = { model = "ollama/llama3.1" } # and other params, model used for relationshipt extraction
														
 
															-  [database.graph_entity_deduplication_settings]
														
 
															-    graph_entity_deduplication_type = "by_name"
														
 
															-    graph_entity_deduplication_prompt = "graphrag_entity_deduplication"
														
 
															-    max_description_input_length = 65536
														
 
															-    generation_config = { model = "ollama/llama3.1" } # and other params, model used for deduplication
														
 
															-
														
 
															   [database.graph_enrichment_settings]
														
 
															     community_reports_prompt = "graphrag_community_reports"
														
 
															     max_summary_input_length = 65536
														
--- a/core/configs/local_llm.toml
+++ b/core/configs/local_llm.toml
@@ -37,12 +37,6 @@ provider = "postgres"
 
															     max_description_input_length = 65536
														
 
															     generation_config = { model = "ollama/llama3.1" } # and other params, model used for relationshipt extraction
														
 
															-  [database.graph_entity_deduplication_settings]
														
 
															-    graph_entity_deduplication_type = "by_name"
														
 
															-    graph_entity_deduplication_prompt = "graphrag_entity_deduplication"
														
 
															-    max_description_input_length = 65536
														
 
															-    generation_config = { model = "ollama/llama3.1" } # and other params, model used for deduplication
														
 
															-
														
 
															   [database.graph_enrichment_settings]
														
 
															     community_reports_prompt = "graphrag_community_reports"
														
 
															     max_summary_input_length = 65536
														
--- a/core/configs/r2r_azure.toml
+++ b/core/configs/r2r_azure.toml
@@ -13,9 +13,6 @@ batch_size = 256
 
															   [database.graph_creation_settings]
														
 
															     generation_config = { model = "azure/gpt-4o-mini" }
														
 
															-  [database.graph_entity_deduplication_settings]
														
 
															-    generation_config = { model = "azure/gpt-4o-mini" }
														
 
															-
														
 
															   [database.graph_enrichment_settings]
														
 
															     generation_config = { model = "azure/gpt-4o-mini" }
														
--- a/core/configs/r2r_azure_with_test_limits.toml
+++ b/core/configs/r2r_azure_with_test_limits.toml
@@ -16,9 +16,6 @@ batch_size = 256
 
															   [database.graph_creation_settings]
														
 
															     generation_config = { model = "azure/gpt-4o-mini" }
														
 
															-  [database.graph_entity_deduplication_settings]
														
 
															-    generation_config = { model = "azure/gpt-4o-mini" }
														
 
															-
														
 
															   [database.graph_enrichment_settings]
														
 
															     generation_config = { model = "azure/gpt-4o-mini" }
														
--- a/core/database/chunks.py
+++ b/core/database/chunks.py
@@ -612,10 +612,9 @@ class PostgresChunksHandler(Handler):
 
															         SET collection_ids = array_append(collection_ids, $1)
														
 
															         WHERE document_id = $2 AND NOT ($1 = ANY(collection_ids));
														
 
															         """
														
 
															-        result = await self.connection_manager.execute_query(
														
 
															+        return await self.connection_manager.execute_query(
														
 
															             query, (str(collection_id), str(document_id))
														
 
															         )
														
 
															-        return result
														
 
															     async def remove_document_from_collection_vector(
														
 
															         self, document_id: UUID, collection_id: UUID
														
@@ -883,7 +882,7 @@ class PostgresChunksHandler(Handler):
 
															         where_clause = " AND ".join(where_clauses) if where_clauses else ""
														
 
															         if where_clause:
														
 
															-            where_clause = "AND " + where_clause
														
 
															+            where_clause = f"AND {where_clause}"
														
 
															         query = f"""
														
 
															         WITH index_info AS (
														
@@ -1223,7 +1222,7 @@ class PostgresChunksHandler(Handler):
 
															                     ) as body_rank
														
 
															                 FROM {self._get_table_name(PostgresChunksHandler.TABLE_NAME)}
														
 
															                 WHERE $1 != ''
														
 
															-                {f"AND to_tsvector('english', text) @@ websearch_to_tsquery('english', $1)" if settings.search_over_body else ""}
														
 
															+                {"AND to_tsvector('english', text) @@ websearch_to_tsquery('english', $1)" if settings.search_over_body else ""}
														
 
															                 GROUP BY document_id
														
 
															             ),
														
 
															             -- Combined scores with document metadata
														
--- a/core/database/collections.py
+++ b/core/database/collections.py
@@ -1,6 +1,8 @@
 
															+import csv
														
 
															 import json
														
 
															 import logging
														
 
															-from typing import Any, Optional
														
 
															+import tempfile
														
 
															+from typing import IO, Any, Optional
														
 
															 from uuid import UUID, uuid4
														
 
															 from asyncpg.exceptions import UniqueViolationError
														
@@ -117,6 +119,11 @@ class PostgresCollectionsHandler(Handler):
 
															                 message="Collection with this ID already exists",
														
 
															                 status_code=409,
														
 
															             )
														
 
															+        except Exception as e:
														
 
															+            raise HTTPException(
														
 
															+                status_code=500,
														
 
															+                detail=f"An error occurred while creating the collection: {e}",
														
 
															+            ) from e
														
 
															     async def update_collection(
														
 
															         self,
														
@@ -189,7 +196,7 @@ class PostgresCollectionsHandler(Handler):
 
															             raise HTTPException(
														
 
															                 status_code=500,
														
 
															                 detail=f"An error occurred while updating the collection: {e}",
														
 
															-            )
														
 
															+            ) from e
														
 
															     async def delete_collection_relational(self, collection_id: UUID) -> None:
														
 
															         # Remove collection_id from users
														
@@ -361,7 +368,7 @@ class PostgresCollectionsHandler(Handler):
 
															             raise HTTPException(
														
 
															                 status_code=500,
														
 
															                 detail=f"An error occurred while fetching collections: {e}",
														
 
															-            )
														
 
															+            ) from e
														
 
															     async def assign_document_to_collection_relational(
														
 
															         self,
														
@@ -435,7 +442,7 @@ class PostgresCollectionsHandler(Handler):
 
															             raise HTTPException(
														
 
															                 status_code=500,
														
 
															                 detail=f"An error '{e}' occurred while assigning the document to the collection",
														
 
															-            )
														
 
															+            ) from e
														
 
															     async def remove_document_from_collection_relational(
														
 
															         self, document_id: UUID, collection_id: UUID
														
@@ -468,3 +475,112 @@ class PostgresCollectionsHandler(Handler):
 
															                 status_code=404,
														
 
															                 message="Document not found in the specified collection",
														
 
															             )
														
 
															+
														
 
															+    async def export_to_csv(
														
 
															+        self,
														
 
															+        columns: Optional[list[str]] = None,
														
 
															+        filters: Optional[dict] = None,
														
 
															+        include_header: bool = True,
														
 
															+    ) -> tuple[str, IO]:
														
 
															+        """
														
 
															+        Creates a CSV file from the PostgreSQL data and returns the path to the temp file.
														
 
															+        """
														
 
															+        valid_columns = {
														
 
															+            "id",
														
 
															+            "owner_id",
														
 
															+            "name",
														
 
															+            "description",
														
 
															+            "graph_sync_status",
														
 
															+            "graph_cluster_status",
														
 
															+            "created_at",
														
 
															+            "updated_at",
														
 
															+            "user_count",
														
 
															+            "document_count",
														
 
															+        }
														
 
															+
														
 
															+        if not columns:
														
 
															+            columns = list(valid_columns)
														
 
															+        elif invalid_cols := set(columns) - valid_columns:
														
 
															+            raise ValueError(f"Invalid columns: {invalid_cols}")
														
 
															+
														
 
															+        select_stmt = f"""
														
 
															+            SELECT
														
 
															+                id::text,
														
 
															+                owner_id::text,
														
 
															+                name,
														
 
															+                description,
														
 
															+                graph_sync_status,
														
 
															+                graph_cluster_status,
														
 
															+                to_char(created_at, 'YYYY-MM-DD HH24:MI:SS') AS created_at,
														
 
															+                to_char(updated_at, 'YYYY-MM-DD HH24:MI:SS') AS updated_at,
														
 
															+                user_count,
														
 
															+                document_count
														
 
															+            FROM {self._get_table_name(self.TABLE_NAME)}
														
 
															+        """
														
 
															+
														
 
															+        params = []
														
 
															+        if filters:
														
 
															+            conditions = []
														
 
															+            param_index = 1
														
 
															+
														
 
															+            for field, value in filters.items():
														
 
															+                if field not in valid_columns:
														
 
															+                    continue
														
 
															+
														
 
															+                if isinstance(value, dict):
														
 
															+                    for op, val in value.items():
														
 
															+                        if op == "$eq":
														
 
															+                            conditions.append(f"{field} = ${param_index}")
														
 
															+                            params.append(val)
														
 
															+                            param_index += 1
														
 
															+                        elif op == "$gt":
														
 
															+                            conditions.append(f"{field} > ${param_index}")
														
 
															+                            params.append(val)
														
 
															+                            param_index += 1
														
 
															+                        elif op == "$lt":
														
 
															+                            conditions.append(f"{field} < ${param_index}")
														
 
															+                            params.append(val)
														
 
															+                            param_index += 1
														
 
															+                else:
														
 
															+                    # Direct equality
														
 
															+                    conditions.append(f"{field} = ${param_index}")
														
 
															+                    params.append(value)
														
 
															+                    param_index += 1
														
 
															+
														
 
															+            if conditions:
														
 
															+                select_stmt = f"{select_stmt} WHERE {' AND '.join(conditions)}"
														
 
															+
														
 
															+        select_stmt = f"{select_stmt} ORDER BY created_at DESC"
														
 
															+
														
 
															+        temp_file = None
														
 
															+        try:
														
 
															+            temp_file = tempfile.NamedTemporaryFile(
														
 
															+                mode="w", delete=True, suffix=".csv"
														
 
															+            )
														
 
															+            writer = csv.writer(temp_file, quoting=csv.QUOTE_ALL)
														
 
															+
														
 
															+            async with self.connection_manager.pool.get_connection() as conn:  # type: ignore
														
 
															+                async with conn.transaction():
														
 
															+                    cursor = await conn.cursor(select_stmt, *params)
														
 
															+
														
 
															+                    if include_header:
														
 
															+                        writer.writerow(columns)
														
 
															+
														
 
															+                    chunk_size = 1000
														
 
															+                    while True:
														
 
															+                        rows = await cursor.fetch(chunk_size)
														
 
															+                        if not rows:
														
 
															+                            break
														
 
															+                        for row in rows:
														
 
															+                            writer.writerow(row)
														
 
															+
														
 
															+            temp_file.flush()
														
 
															+            return temp_file.name, temp_file
														
 
															+
														
 
															+        except Exception as e:
														
 
															+            if temp_file:
														
 
															+                temp_file.close()
														
 
															+            raise HTTPException(
														
 
															+                status_code=500,
														
 
															+                detail=f"Failed to export data: {str(e)}",
														
 
															+            ) from e
														
--- a/core/database/conversations.py
+++ b/core/database/conversations.py
@@ -1,5 +1,7 @@
 
															+import csv
														
 
															 import json
														
 
															-from typing import Any, Optional
														
 
															+import tempfile
														
 
															+from typing import IO, Any, Optional
														
 
															 from uuid import UUID, uuid4
														
 
															 from fastapi import HTTPException
														
@@ -452,3 +454,201 @@ class PostgresConversationsHandler(Handler):
 
															         await self.connection_manager.execute_query(
														
 
															             del_conv_query, [conversation_id]
														
 
															         )
														
 
															+
														
 
															+    async def export_conversations_to_csv(
														
 
															+        self,
														
 
															+        columns: Optional[list[str]] = None,
														
 
															+        filters: Optional[dict] = None,
														
 
															+        include_header: bool = True,
														
 
															+    ) -> tuple[str, IO]:
														
 
															+        """
														
 
															+        Creates a CSV file from the PostgreSQL data and returns the path to the temp file.
														
 
															+        """
														
 
															+        valid_columns = {
														
 
															+            "id",
														
 
															+            "user_id",
														
 
															+            "created_at",
														
 
															+            "name",
														
 
															+        }
														
 
															+
														
 
															+        if not columns:
														
 
															+            columns = list(valid_columns)
														
 
															+        elif invalid_cols := set(columns) - valid_columns:
														
 
															+            raise ValueError(f"Invalid columns: {invalid_cols}")
														
 
															+
														
 
															+        select_stmt = f"""
														
 
															+            SELECT
														
 
															+                id::text,
														
 
															+                user_id::text,
														
 
															+                to_char(created_at, 'YYYY-MM-DD HH24:MI:SS') AS created_at,
														
 
															+                name
														
 
															+            FROM {self._get_table_name("conversations")}
														
 
															+        """
														
 
															+
														
 
															+        conditions = []
														
 
															+        params: list[Any] = []
														
 
															+        param_index = 1
														
 
															+
														
 
															+        if filters:
														
 
															+            for field, value in filters.items():
														
 
															+                if field not in valid_columns:
														
 
															+                    continue
														
 
															+
														
 
															+                if isinstance(value, dict):
														
 
															+                    for op, val in value.items():
														
 
															+                        if op == "$eq":
														
 
															+                            conditions.append(f"{field} = ${param_index}")
														
 
															+                            params.append(val)
														
 
															+                            param_index += 1
														
 
															+                        elif op == "$gt":
														
 
															+                            conditions.append(f"{field} > ${param_index}")
														
 
															+                            params.append(val)
														
 
															+                            param_index += 1
														
 
															+                        elif op == "$lt":
														
 
															+                            conditions.append(f"{field} < ${param_index}")
														
 
															+                            params.append(val)
														
 
															+                            param_index += 1
														
 
															+                else:
														
 
															+                    # Direct equality
														
 
															+                    conditions.append(f"{field} = ${param_index}")
														
 
															+                    params.append(value)
														
 
															+                    param_index += 1
														
 
															+
														
 
															+        if conditions:
														
 
															+            select_stmt = f"{select_stmt} WHERE {' AND '.join(conditions)}"
														
 
															+
														
 
															+        select_stmt = f"{select_stmt} ORDER BY created_at DESC"
														
 
															+
														
 
															+        temp_file = None
														
 
															+        try:
														
 
															+            temp_file = tempfile.NamedTemporaryFile(
														
 
															+                mode="w", delete=True, suffix=".csv"
														
 
															+            )
														
 
															+            writer = csv.writer(temp_file, quoting=csv.QUOTE_ALL)
														
 
															+
														
 
															+            async with self.connection_manager.pool.get_connection() as conn:  # type: ignore
														
 
															+                async with conn.transaction():
														
 
															+                    cursor = await conn.cursor(select_stmt, *params)
														
 
															+
														
 
															+                    if include_header:
														
 
															+                        writer.writerow(columns)
														
 
															+
														
 
															+                    chunk_size = 1000
														
 
															+                    while True:
														
 
															+                        rows = await cursor.fetch(chunk_size)
														
 
															+                        if not rows:
														
 
															+                            break
														
 
															+                        for row in rows:
														
 
															+                            writer.writerow(row)
														
 
															+
														
 
															+            temp_file.flush()
														
 
															+            return temp_file.name, temp_file
														
 
															+
														
 
															+        except Exception as e:
														
 
															+            if temp_file:
														
 
															+                temp_file.close()
														
 
															+            raise HTTPException(
														
 
															+                status_code=500,
														
 
															+                detail=f"Failed to export data: {str(e)}",
														
 
															+            ) from e
														
 
															+
														
 
															+    async def export_messages_to_csv(
														
 
															+        self,
														
 
															+        columns: Optional[list[str]] = None,
														
 
															+        filters: Optional[dict] = None,
														
 
															+        include_header: bool = True,
														
 
															+    ) -> tuple[str, IO]:
														
 
															+        """
														
 
															+        Creates a CSV file from the PostgreSQL data and returns the path to the temp file.
														
 
															+        """
														
 
															+        valid_columns = {
														
 
															+            "id",
														
 
															+            "conversation_id",
														
 
															+            "parent_id",
														
 
															+            "content",
														
 
															+            "metadata",
														
 
															+            "created_at",
														
 
															+        }
														
 
															+
														
 
															+        if not columns:
														
 
															+            columns = list(valid_columns)
														
 
															+        elif invalid_cols := set(columns) - valid_columns:
														
 
															+            raise ValueError(f"Invalid columns: {invalid_cols}")
														
 
															+
														
 
															+        select_stmt = f"""
														
 
															+            SELECT
														
 
															+                id::text,
														
 
															+                conversation_id::text,
														
 
															+                parent_id::text,
														
 
															+                content::text,
														
 
															+                metadata::text,
														
 
															+                to_char(created_at, 'YYYY-MM-DD HH24:MI:SS') AS created_at
														
 
															+            FROM {self._get_table_name("messages")}
														
 
															+        """
														
 
															+
														
 
															+        conditions = []
														
 
															+        params: list[Any] = []
														
 
															+        param_index = 1
														
 
															+
														
 
															+        if filters:
														
 
															+            for field, value in filters.items():
														
 
															+                if field not in valid_columns:
														
 
															+                    continue
														
 
															+
														
 
															+                if isinstance(value, dict):
														
 
															+                    for op, val in value.items():
														
 
															+                        if op == "$eq":
														
 
															+                            conditions.append(f"{field} = ${param_index}")
														
 
															+                            params.append(val)
														
 
															+                            param_index += 1
														
 
															+                        elif op == "$gt":
														
 
															+                            conditions.append(f"{field} > ${param_index}")
														
 
															+                            params.append(val)
														
 
															+                            param_index += 1
														
 
															+                        elif op == "$lt":
														
 
															+                            conditions.append(f"{field} < ${param_index}")
														
 
															+                            params.append(val)
														
 
															+                            param_index += 1
														
 
															+                else:
														
 
															+                    # Direct equality
														
 
															+                    conditions.append(f"{field} = ${param_index}")
														
 
															+                    params.append(value)
														
 
															+                    param_index += 1
														
 
															+
														
 
															+        if conditions:
														
 
															+            select_stmt = f"{select_stmt} WHERE {' AND '.join(conditions)}"
														
 
															+
														
 
															+        select_stmt = f"{select_stmt} ORDER BY created_at DESC"
														
 
															+
														
 
															+        temp_file = None
														
 
															+        try:
														
 
															+            temp_file = tempfile.NamedTemporaryFile(
														
 
															+                mode="w", delete=True, suffix=".csv"
														
 
															+            )
														
 
															+            writer = csv.writer(temp_file, quoting=csv.QUOTE_ALL)
														
 
															+
														
 
															+            async with self.connection_manager.pool.get_connection() as conn:  # type: ignore
														
 
															+                async with conn.transaction():
														
 
															+                    cursor = await conn.cursor(select_stmt, *params)
														
 
															+
														
 
															+                    if include_header:
														
 
															+                        writer.writerow(columns)
														
 
															+
														
 
															+                    chunk_size = 1000
														
 
															+                    while True:
														
 
															+                        rows = await cursor.fetch(chunk_size)
														
 
															+                        if not rows:
														
 
															+                            break
														
 
															+                        for row in rows:
														
 
															+                            writer.writerow(row)
														
 
															+
														
 
															+            temp_file.flush()
														
 
															+            return temp_file.name, temp_file
														
 
															+
														
 
															+        except Exception as e:
														
 
															+            if temp_file:
														
 
															+                temp_file.close()
														
 
															+            raise HTTPException(
														
 
															+                status_code=500,
														
 
															+                detail=f"Failed to export data: {str(e)}",
														
 
															+            ) from e
														
--- a/core/database/documents.py
+++ b/core/database/documents.py
@@ -1,8 +1,10 @@
 
															 import asyncio
														
 
															 import copy
														
 
															+import csv
														
 
															 import json
														
 
															 import logging
														
 
															-from typing import Any, Optional
														
 
															+import tempfile
														
 
															+from typing import IO, Any, Optional
														
 
															 from uuid import UUID
														
 
															 import asyncpg
														
@@ -20,7 +22,7 @@ from core.base import (
 
															 )
														
 
															 from .base import PostgresConnectionManager
														
 
															-from .filters import apply_filters  # Add this near other imports
														
 
															+from .filters import apply_filters
														
 
															 logger = logging.getLogger()
														
@@ -247,7 +249,7 @@ class PostgresDocumentsHandler(Handler):
 
															         Get the IDs from a given table.
														
 
															         Args:
														
 
															-            status (Union[str, list[str]]): The status or list of statuses to retrieve.
														
 
															+            status (str | list[str]): The status or list of statuses to retrieve.
														
 
															             table_name (str): The table name.
														
 
															             status_type (str): The type of status to retrieve.
														
 
															         """
														
@@ -299,9 +301,7 @@ class PostgresDocumentsHandler(Handler):
 
															             return IngestionStatus
														
 
															         elif status_type == "extraction_status":
														
 
															             return KGExtractionStatus
														
 
															-        elif status_type == "graph_cluster_status":
														
 
															-            return KGEnrichmentStatus
														
 
															-        elif status_type == "graph_sync_status":
														
 
															+        elif status_type in {"graph_cluster_status", "graph_sync_status"}:
														
 
															             return KGEnrichmentStatus
														
 
															         else:
														
 
															             raise R2RException(
														
@@ -315,7 +315,7 @@ class PostgresDocumentsHandler(Handler):
 
															         Get the workflow status for a given document or list of documents.
														
 
															         Args:
														
 
															-            id (Union[UUID, list[UUID]]): The document ID or list of document IDs.
														
 
															+            id (UUID | list[UUID]): The document ID or list of document IDs.
														
 
															             status_type (str): The type of status to retrieve.
														
 
															         Returns:
														
@@ -341,7 +341,7 @@ class PostgresDocumentsHandler(Handler):
 
															         Set the workflow status for a given document or list of documents.
														
 
															         Args:
														
 
															-            id (Union[UUID, list[UUID]]): The document ID or list of document IDs.
														
 
															+            id (UUID | list[UUID]): The document ID or list of document IDs.
														
 
															             status_type (str): The type of status to set.
														
 
															             status (str): The status to set.
														
 
															         """
														
@@ -368,7 +368,7 @@ class PostgresDocumentsHandler(Handler):
 
															         Args:
														
 
															             ids_key (str): The key to retrieve the IDs.
														
 
															             status_type (str): The type of status to retrieve.
														
 
															-            status (Union[str, list[str]]): The status or list of statuses to retrieve.
														
 
															+            status (str | list[str]): The status or list of statuses to retrieve.
														
 
															         """
														
 
															         if isinstance(status, str):
														
@@ -501,7 +501,7 @@ class PostgresDocumentsHandler(Handler):
 
															             raise HTTPException(
														
 
															                 status_code=500,
														
 
															                 detail="Database query failed",
														
 
															-            )
														
 
															+            ) from e
														
 
															     async def semantic_document_search(
														
 
															         self, query_embedding: list[float], search_settings: SearchSettings
														
@@ -792,3 +792,118 @@ class PostgresDocumentsHandler(Handler):
 
															             )
														
 
															         else:
														
 
															             return await self.full_text_document_search(query_text, settings)
														
 
															+
														
 
															+    async def export_to_csv(
														
 
															+        self,
														
 
															+        columns: Optional[list[str]] = None,
														
 
															+        filters: Optional[dict] = None,
														
 
															+        include_header: bool = True,
														
 
															+    ) -> tuple[str, IO]:
														
 
															+        """
														
 
															+        Creates a CSV file from the PostgreSQL data and returns the path to the temp file.
														
 
															+        """
														
 
															+        valid_columns = {
														
 
															+            "id",
														
 
															+            "collection_ids",
														
 
															+            "owner_id",
														
 
															+            "type",
														
 
															+            "metadata",
														
 
															+            "title",
														
 
															+            "summary",
														
 
															+            "version",
														
 
															+            "size_in_bytes",
														
 
															+            "ingestion_status",
														
 
															+            "extraction_status",
														
 
															+            "created_at",
														
 
															+            "updated_at",
														
 
															+        }
														
 
															+
														
 
															+        if not columns:
														
 
															+            columns = list(valid_columns)
														
 
															+        elif invalid_cols := set(columns) - valid_columns:
														
 
															+            raise ValueError(f"Invalid columns: {invalid_cols}")
														
 
															+
														
 
															+        select_stmt = f"""
														
 
															+            SELECT
														
 
															+                id::text,
														
 
															+                collection_ids::text,
														
 
															+                owner_id::text,
														
 
															+                type::text,
														
 
															+                metadata::text AS metadata,
														
 
															+                title,
														
 
															+                summary,
														
 
															+                version,
														
 
															+                size_in_bytes,
														
 
															+                ingestion_status,
														
 
															+                extraction_status,
														
 
															+                to_char(created_at, 'YYYY-MM-DD HH24:MI:SS') AS created_at,
														
 
															+                to_char(updated_at, 'YYYY-MM-DD HH24:MI:SS') AS updated_at
														
 
															+            FROM {self._get_table_name(self.TABLE_NAME)}
														
 
															+        """
														
 
															+
														
 
															+        conditions = []
														
 
															+        params: list[Any] = []
														
 
															+        param_index = 1
														
 
															+
														
 
															+        if filters:
														
 
															+            for field, value in filters.items():
														
 
															+                if field not in valid_columns:
														
 
															+                    continue
														
 
															+
														
 
															+                if isinstance(value, dict):
														
 
															+                    for op, val in value.items():
														
 
															+                        if op == "$eq":
														
 
															+                            conditions.append(f"{field} = ${param_index}")
														
 
															+                            params.append(val)
														
 
															+                            param_index += 1
														
 
															+                        elif op == "$gt":
														
 
															+                            conditions.append(f"{field} > ${param_index}")
														
 
															+                            params.append(val)
														
 
															+                            param_index += 1
														
 
															+                        elif op == "$lt":
														
 
															+                            conditions.append(f"{field} < ${param_index}")
														
 
															+                            params.append(val)
														
 
															+                            param_index += 1
														
 
															+                else:
														
 
															+                    # Direct equality
														
 
															+                    conditions.append(f"{field} = ${param_index}")
														
 
															+                    params.append(value)
														
 
															+                    param_index += 1
														
 
															+
														
 
															+        if conditions:
														
 
															+            select_stmt = f"{select_stmt} WHERE {' AND '.join(conditions)}"
														
 
															+
														
 
															+        select_stmt = f"{select_stmt} ORDER BY created_at DESC"
														
 
															+
														
 
															+        temp_file = None
														
 
															+        try:
														
 
															+            temp_file = tempfile.NamedTemporaryFile(
														
 
															+                mode="w", delete=True, suffix=".csv"
														
 
															+            )
														
 
															+            writer = csv.writer(temp_file, quoting=csv.QUOTE_ALL)
														
 
															+
														
 
															+            async with self.connection_manager.pool.get_connection() as conn:  # type: ignore
														
 
															+                async with conn.transaction():
														
 
															+                    cursor = await conn.cursor(select_stmt, *params)
														
 
															+
														
 
															+                    if include_header:
														
 
															+                        writer.writerow(columns)
														
 
															+
														
 
															+                    chunk_size = 1000
														
 
															+                    while True:
														
 
															+                        rows = await cursor.fetch(chunk_size)
														
 
															+                        if not rows:
														
 
															+                            break
														
 
															+                        for row in rows:
														
 
															+                            writer.writerow(row)
														
 
															+
														
 
															+            temp_file.flush()
														
 
															+            return temp_file.name, temp_file
														
 
															+
														
 
															+        except Exception as e:
														
 
															+            if temp_file:
														
 
															+                temp_file.close()
														
 
															+            raise HTTPException(
														
 
															+                status_code=500,
														
 
															+                detail=f"Failed to export data: {str(e)}",
														
 
															+            ) from e
														
--- a/core/database/files.py
+++ b/core/database/files.py
@@ -1,7 +1,10 @@
 
															 import io
														
 
															 import logging
														
 
															-from typing import BinaryIO, Optional, Union
														
 
															+from datetime import datetime
														
 
															+from io import BytesIO
														
 
															+from typing import BinaryIO, Optional
														
 
															 from uuid import UUID
														
 
															+from zipfile import ZipFile
														
 
															 import asyncpg
														
 
															 from fastapi import HTTPException
														
@@ -119,7 +122,7 @@ class PostgresFilesHandler(Handler):
 
															             raise HTTPException(
														
 
															                 status_code=500,
														
 
															                 detail=f"Failed to write to large object: {e}",
														
 
															-            )
														
 
															+            ) from e
														
 
															     async def retrieve_file(
														
 
															         self, document_id: UUID
														
@@ -150,6 +153,62 @@ class PostgresFilesHandler(Handler):
 
															             file_content = await self._read_lobject(conn, oid)
														
 
															             return file_name, io.BytesIO(file_content), size
														
 
															+    async def retrieve_files_as_zip(
														
 
															+        self,
														
 
															+        document_ids: Optional[list[UUID]] = None,
														
 
															+        start_date: Optional[datetime] = None,
														
 
															+        end_date: Optional[datetime] = None,
														
 
															+    ) -> tuple[str, BinaryIO, int]:
														
 
															+        """Retrieve multiple files and return them as a zip file."""
														
 
															+
														
 
															+        query = f"""
														
 
															+        SELECT document_id, name, oid, size
														
 
															+        FROM {self._get_table_name(PostgresFilesHandler.TABLE_NAME)}
														
 
															+        WHERE 1=1
														
 
															+        """
														
 
															+        params: list = []
														
 
															+
														
 
															+        if document_ids:
														
 
															+            query += f" AND document_id = ANY(${len(params) + 1})"
														
 
															+            params.append([str(doc_id) for doc_id in document_ids])
														
 
															+
														
 
															+        if start_date:
														
 
															+            query += f" AND created_at >= ${len(params) + 1}"
														
 
															+            params.append(start_date)
														
 
															+
														
 
															+        if end_date:
														
 
															+            query += f" AND created_at <= ${len(params) + 1}"
														
 
															+            params.append(end_date)
														
 
															+
														
 
															+        query += " ORDER BY created_at DESC"
														
 
															+
														
 
															+        results = await self.connection_manager.fetch_query(query, params)
														
 
															+
														
 
															+        if not results:
														
 
															+            raise R2RException(
														
 
															+                status_code=404,
														
 
															+                message="No files found matching the specified criteria",
														
 
															+            )
														
 
															+
														
 
															+        zip_buffer = BytesIO()
														
 
															+        total_size = 0
														
 
															+
														
 
															+        async with self.connection_manager.pool.get_connection() as conn:  # type: ignore
														
 
															+            with ZipFile(zip_buffer, "w") as zip_file:
														
 
															+                for record in results:
														
 
															+                    file_content = await self._read_lobject(
														
 
															+                        conn, record["oid"]
														
 
															+                    )
														
 
															+
														
 
															+                    zip_file.writestr(record["name"], file_content)
														
 
															+                    total_size += record["size"]
														
 
															+
														
 
															+        zip_buffer.seek(0)
														
 
															+        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
														
 
															+        zip_filename = f"files_export_{timestamp}.zip"
														
 
															+
														
 
															+        return zip_filename, zip_buffer, zip_buffer.getbuffer().nbytes
														
 
															+
														
 
															     async def _read_lobject(self, conn, oid: int) -> bytes:
														
 
															         """Read content from a large object."""
														
 
															         file_data = io.BytesIO()
														
@@ -233,7 +292,7 @@ class PostgresFilesHandler(Handler):
 
															     ) -> list[dict]:
														
 
															         """Get an overview of stored files."""
														
 
															         conditions = []
														
 
															-        params: list[Union[str, list[str], int]] = []
														
 
															+        params: list[str | list[str] | int] = []
														
 
															         query = f"""
														
 
															         SELECT document_id, name, oid, size, type, created_at, updated_at
														
 
															         FROM {self._get_table_name(PostgresFilesHandler.TABLE_NAME)}
														
--- a/core/database/filters.py
+++ b/core/database/filters.py
@@ -1,6 +1,5 @@
 
															 import json
														
 
															-from typing import Any, Optional, Tuple, Union
														
 
															-from uuid import UUID
														
 
															+from typing import Any, Optional, Tuple
														
 
															 COLUMN_VARS = [
														
 
															     "id",
														
@@ -46,7 +45,7 @@ class FilterCondition:
 
															 class FilterExpression:
														
 
															     def __init__(self, logical_op: Optional[str] = None):
														
 
															         self.logical_op = logical_op
														
 
															-        self.conditions: list[Union[FilterCondition, "FilterExpression"]] = []
														
 
															+        self.conditions: list[FilterCondition | "FilterExpression"] = []
														
 
															 class FilterParser:
														
@@ -410,13 +409,13 @@ class SQLFilterBuilder:
 
															 def apply_filters(
														
 
															     filters: dict, params: list[Any], mode: str = "where_clause"
														
 
															-) -> str:
														
 
															+) -> tuple[str, list[Any]]:
														
 
															     """
														
 
															     Apply filters with consistent WHERE clause handling
														
 
															     """
														
 
															     if not filters:
														
 
															-        return ""
														
 
															+        return "", params
														
 
															     parser = FilterParser()
														
 
															     expr = parser.parse(filters)
														
--- a/core/database/graphs.py
+++ b/core/database/graphs.py
@@ -1,12 +1,13 @@
 
															 import asyncio
														
 
															 import contextlib
														
 
															+import csv
														
 
															 import datetime
														
 
															 import json
														
 
															 import logging
														
 
															 import os
														
 
															+import tempfile
														
 
															 import time
														
 
															-from enum import Enum
														
 
															-from typing import Any, AsyncGenerator, Optional, Tuple
														
 
															+from typing import IO, Any, AsyncGenerator, Optional, Tuple
														
 
															 from uuid import UUID
														
 
															 import asyncpg
														
@@ -20,10 +21,10 @@ from core.base.abstractions import (
 
															     Graph,
														
 
															     KGCreationSettings,
														
 
															     KGEnrichmentSettings,
														
 
															-    KGEntityDeduplicationSettings,
														
 
															     KGExtractionStatus,
														
 
															     R2RException,
														
 
															     Relationship,
														
 
															+    StoreType,
														
 
															     VectorQuantizationType,
														
 
															 )
														
 
															 from core.base.api.models import GraphResponse
														
@@ -37,12 +38,6 @@ from core.base.utils import (
 
															 from .base import PostgresConnectionManager
														
 
															 from .collections import PostgresCollectionsHandler
														
 
															-
														
 
															-class StoreType(str, Enum):
														
 
															-    GRAPHS = "graphs"
														
 
															-    DOCUMENTS = "documents"
														
 
															-
														
 
															-
														
 
															 logger = logging.getLogger()
														
@@ -59,9 +54,7 @@ class PostgresEntitiesHandler(Handler):
 
															     def _get_entity_table_for_store(self, store_type: StoreType) -> str:
														
 
															         """Get the appropriate table name for the store type."""
														
 
															-        if isinstance(store_type, StoreType):
														
 
															-            store_type = store_type.value
														
 
															-        return f"{store_type}_entities"
														
 
															+        return f"{store_type.value}_entities"
														
 
															     def _get_parent_constraint(self, store_type: StoreType) -> str:
														
 
															         """Get the appropriate foreign key constraint for the store type."""
														
@@ -376,6 +369,115 @@ class PostgresEntitiesHandler(Handler):
 
															                     404,
														
 
															                 )
														
 
															+    async def export_to_csv(
														
 
															+        self,
														
 
															+        parent_id: UUID,
														
 
															+        store_type: StoreType,
														
 
															+        columns: Optional[list[str]] = None,
														
 
															+        filters: Optional[dict] = None,
														
 
															+        include_header: bool = True,
														
 
															+    ) -> tuple[str, IO]:
														
 
															+        """
														
 
															+        Creates a CSV file from the PostgreSQL data and returns the path to the temp file.
														
 
															+        """
														
 
															+        valid_columns = {
														
 
															+            "id",
														
 
															+            "name",
														
 
															+            "category",
														
 
															+            "description",
														
 
															+            "parent_id",
														
 
															+            "chunk_ids",
														
 
															+            "metadata",
														
 
															+            "created_at",
														
 
															+            "updated_at",
														
 
															+        }
														
 
															+
														
 
															+        if not columns:
														
 
															+            columns = list(valid_columns)
														
 
															+        elif invalid_cols := set(columns) - valid_columns:
														
 
															+            raise ValueError(f"Invalid columns: {invalid_cols}")
														
 
															+
														
 
															+        select_stmt = f"""
														
 
															+            SELECT
														
 
															+                id::text,
														
 
															+                name,
														
 
															+                category,
														
 
															+                description,
														
 
															+                parent_id::text,
														
 
															+                chunk_ids::text,
														
 
															+                metadata::text,
														
 
															+                to_char(created_at, 'YYYY-MM-DD HH24:MI:SS') AS created_at,
														
 
															+                to_char(updated_at, 'YYYY-MM-DD HH24:MI:SS') AS updated_at
														
 
															+            FROM {self._get_table_name(self._get_entity_table_for_store(store_type))}
														
 
															+        """
														
 
															+
														
 
															+        conditions = ["parent_id = $1"]
														
 
															+        params: list[Any] = [parent_id]
														
 
															+        param_index = 2
														
 
															+
														
 
															+        if filters:
														
 
															+            for field, value in filters.items():
														
 
															+                if field not in valid_columns:
														
 
															+                    continue
														
 
															+
														
 
															+                if isinstance(value, dict):
														
 
															+                    for op, val in value.items():
														
 
															+                        if op == "$eq":
														
 
															+                            conditions.append(f"{field} = ${param_index}")
														
 
															+                            params.append(val)
														
 
															+                            param_index += 1
														
 
															+                        elif op == "$gt":
														
 
															+                            conditions.append(f"{field} > ${param_index}")
														
 
															+                            params.append(val)
														
 
															+                            param_index += 1
														
 
															+                        elif op == "$lt":
														
 
															+                            conditions.append(f"{field} < ${param_index}")
														
 
															+                            params.append(val)
														
 
															+                            param_index += 1
														
 
															+                else:
														
 
															+                    # Direct equality
														
 
															+                    conditions.append(f"{field} = ${param_index}")
														
 
															+                    params.append(value)
														
 
															+                    param_index += 1
														
 
															+
														
 
															+        if conditions:
														
 
															+            select_stmt = f"{select_stmt} WHERE {' AND '.join(conditions)}"
														
 
															+
														
 
															+        select_stmt = f"{select_stmt} ORDER BY created_at DESC"
														
 
															+
														
 
															+        temp_file = None
														
 
															+        try:
														
 
															+            temp_file = tempfile.NamedTemporaryFile(
														
 
															+                mode="w", delete=True, suffix=".csv"
														
 
															+            )
														
 
															+            writer = csv.writer(temp_file, quoting=csv.QUOTE_ALL)
														
 
															+
														
 
															+            async with self.connection_manager.pool.get_connection() as conn:  # type: ignore
														
 
															+                async with conn.transaction():
														
 
															+                    cursor = await conn.cursor(select_stmt, *params)
														
 
															+
														
 
															+                    if include_header:
														
 
															+                        writer.writerow(columns)
														
 
															+
														
 
															+                    chunk_size = 1000
														
 
															+                    while True:
														
 
															+                        rows = await cursor.fetch(chunk_size)
														
 
															+                        if not rows:
														
 
															+                            break
														
 
															+                        for row in rows:
														
 
															+                            writer.writerow(row)
														
 
															+
														
 
															+            temp_file.flush()
														
 
															+            return temp_file.name, temp_file
														
 
															+
														
 
															+        except Exception as e:
														
 
															+            if temp_file:
														
 
															+                temp_file.close()
														
 
															+            raise HTTPException(
														
 
															+                status_code=500,
														
 
															+                detail=f"Failed to export data: {str(e)}",
														
 
															+            ) from e
														
 
															+
														
 
															 class PostgresRelationshipsHandler(Handler):
														
 
															     def __init__(self, *args: Any, **kwargs: Any) -> None:
														
@@ -390,9 +492,7 @@ class PostgresRelationshipsHandler(Handler):
 
															     def _get_relationship_table_for_store(self, store_type: StoreType) -> str:
														
 
															         """Get the appropriate table name for the store type."""
														
 
															-        if isinstance(store_type, StoreType):
														
 
															-            store_type = store_type.value
														
 
															-        return f"{store_type}_relationships"
														
 
															+        return f"{store_type.value}_relationships"
														
 
															     def _get_parent_constraint(self, store_type: StoreType) -> str:
														
 
															         """Get the appropriate foreign key constraint for the store type."""
														
@@ -774,6 +874,123 @@ class PostgresRelationshipsHandler(Handler):
 
															                     404,
														
 
															                 )
														
 
															+    async def export_to_csv(
														
 
															+        self,
														
 
															+        parent_id: UUID,
														
 
															+        store_type: StoreType,
														
 
															+        columns: Optional[list[str]] = None,
														
 
															+        filters: Optional[dict] = None,
														
 
															+        include_header: bool = True,
														
 
															+    ) -> tuple[str, IO]:
														
 
															+        """
														
 
															+        Creates a CSV file from the PostgreSQL data and returns the path to the temp file.
														
 
															+        """
														
 
															+        valid_columns = {
														
 
															+            "id",
														
 
															+            "subject",
														
 
															+            "predicate",
														
 
															+            "object",
														
 
															+            "description",
														
 
															+            "subject_id",
														
 
															+            "object_id",
														
 
															+            "weight",
														
 
															+            "chunk_ids",
														
 
															+            "parent_id",
														
 
															+            "metadata",
														
 
															+            "created_at",
														
 
															+            "updated_at",
														
 
															+        }
														
 
															+
														
 
															+        if not columns:
														
 
															+            columns = list(valid_columns)
														
 
															+        elif invalid_cols := set(columns) - valid_columns:
														
 
															+            raise ValueError(f"Invalid columns: {invalid_cols}")
														
 
															+
														
 
															+        select_stmt = f"""
														
 
															+            SELECT
														
 
															+                id::text,
														
 
															+                subject,
														
 
															+                predicate,
														
 
															+                object,
														
 
															+                description,
														
 
															+                subject_id::text,
														
 
															+                object_id::text,
														
 
															+                weight,
														
 
															+                chunk_ids::text,
														
 
															+                parent_id::text,
														
 
															+                metadata::text,
														
 
															+                to_char(created_at, 'YYYY-MM-DD HH24:MI:SS') AS created_at,
														
 
															+                to_char(updated_at, 'YYYY-MM-DD HH24:MI:SS') AS updated_at
														
 
															+            FROM {self._get_table_name(self._get_relationship_table_for_store(store_type))}
														
 
															+        """
														
 
															+
														
 
															+        conditions = ["parent_id = $1"]
														
 
															+        params: list[Any] = [parent_id]
														
 
															+        param_index = 2
														
 
															+
														
 
															+        if filters:
														
 
															+            for field, value in filters.items():
														
 
															+                if field not in valid_columns:
														
 
															+                    continue
														
 
															+
														
 
															+                if isinstance(value, dict):
														
 
															+                    for op, val in value.items():
														
 
															+                        if op == "$eq":
														
 
															+                            conditions.append(f"{field} = ${param_index}")
														
 
															+                            params.append(val)
														
 
															+                            param_index += 1
														
 
															+                        elif op == "$gt":
														
 
															+                            conditions.append(f"{field} > ${param_index}")
														
 
															+                            params.append(val)
														
 
															+                            param_index += 1
														
 
															+                        elif op == "$lt":
														
 
															+                            conditions.append(f"{field} < ${param_index}")
														
 
															+                            params.append(val)
														
 
															+                            param_index += 1
														
 
															+                else:
														
 
															+                    # Direct equality
														
 
															+                    conditions.append(f"{field} = ${param_index}")
														
 
															+                    params.append(value)
														
 
															+                    param_index += 1
														
 
															+
														
 
															+        if conditions:
														
 
															+            select_stmt = f"{select_stmt} WHERE {' AND '.join(conditions)}"
														
 
															+
														
 
															+        select_stmt = f"{select_stmt} ORDER BY created_at DESC"
														
 
															+
														
 
															+        temp_file = None
														
 
															+        try:
														
 
															+            temp_file = tempfile.NamedTemporaryFile(
														
 
															+                mode="w", delete=True, suffix=".csv"
														
 
															+            )
														
 
															+            writer = csv.writer(temp_file, quoting=csv.QUOTE_ALL)
														
 
															+
														
 
															+            async with self.connection_manager.pool.get_connection() as conn:  # type: ignore
														
 
															+                async with conn.transaction():
														
 
															+                    cursor = await conn.cursor(select_stmt, *params)
														
 
															+
														
 
															+                    if include_header:
														
 
															+                        writer.writerow(columns)
														
 
															+
														
 
															+                    chunk_size = 1000
														
 
															+                    while True:
														
 
															+                        rows = await cursor.fetch(chunk_size)
														
 
															+                        if not rows:
														
 
															+                            break
														
 
															+                        for row in rows:
														
 
															+                            writer.writerow(row)
														
 
															+
														
 
															+            temp_file.flush()
														
 
															+            return temp_file.name, temp_file
														
 
															+
														
 
															+        except Exception as e:
														
 
															+            if temp_file:
														
 
															+                temp_file.close()
														
 
															+            raise HTTPException(
														
 
															+                status_code=500,
														
 
															+                detail=f"Failed to export data: {str(e)}",
														
 
															+            ) from e
														
 
															+
														
 
															 class PostgresCommunitiesHandler(Handler):
														
 
															     def __init__(self, *args: Any, **kwargs: Any) -> None:
														
@@ -946,7 +1163,7 @@ class PostgresCommunitiesHandler(Handler):
 
															     async def delete(
														
 
															         self,
														
 
															         parent_id: UUID,
														
 
															-        community_id: UUID = None,
														
 
															+        community_id: UUID,
														
 
															     ) -> None:
														
 
															         table_name = "graphs_communities"
														
@@ -964,7 +1181,7 @@ class PostgresCommunitiesHandler(Handler):
 
															             raise HTTPException(
														
 
															                 status_code=500,
														
 
															                 detail=f"An error occurred while deleting the community: {e}",
														
 
															-            )
														
 
															+            ) from e
														
 
															     async def delete_all_communities(
														
 
															         self,
														
@@ -986,7 +1203,7 @@ class PostgresCommunitiesHandler(Handler):
 
															             raise HTTPException(
														
 
															                 status_code=500,
														
 
															                 detail=f"An error occurred while deleting communities: {e}",
														
 
															-            )
														
 
															+            ) from e
														
 
															     async def get(
														
 
															         self,
														
@@ -1059,6 +1276,123 @@ class PostgresCommunitiesHandler(Handler):
 
															         return communities, count
														
 
															+    async def export_to_csv(
														
 
															+        self,
														
 
															+        parent_id: UUID,
														
 
															+        store_type: StoreType,
														
 
															+        columns: Optional[list[str]] = None,
														
 
															+        filters: Optional[dict] = None,
														
 
															+        include_header: bool = True,
														
 
															+    ) -> tuple[str, IO]:
														
 
															+        """
														
 
															+        Creates a CSV file from the PostgreSQL data and returns the path to the temp file.
														
 
															+        """
														
 
															+        valid_columns = {
														
 
															+            "id",
														
 
															+            "collection_id",
														
 
															+            "community_id",
														
 
															+            "level",
														
 
															+            "name",
														
 
															+            "summary",
														
 
															+            "findings",
														
 
															+            "rating",
														
 
															+            "rating_explanation",
														
 
															+            "created_at",
														
 
															+            "updated_at",
														
 
															+            "metadata",
														
 
															+        }
														
 
															+
														
 
															+        if not columns:
														
 
															+            columns = list(valid_columns)
														
 
															+        elif invalid_cols := set(columns) - valid_columns:
														
 
															+            raise ValueError(f"Invalid columns: {invalid_cols}")
														
 
															+
														
 
															+        table_name = "graphs_communities"
														
 
															+
														
 
															+        select_stmt = f"""
														
 
															+            SELECT
														
 
															+                id::text,
														
 
															+                collection_id::text,
														
 
															+                community_id::text,
														
 
															+                level,
														
 
															+                name,
														
 
															+                summary,
														
 
															+                findings::text,
														
 
															+                rating,
														
 
															+                rating_explanation,
														
 
															+                to_char(created_at, 'YYYY-MM-DD HH24:MI:SS') AS created_at,
														
 
															+                to_char(updated_at, 'YYYY-MM-DD HH24:MI:SS') AS updated_at,
														
 
															+                metadata::text
														
 
															+            FROM {self._get_table_name(table_name)}
														
 
															+        """
														
 
															+
														
 
															+        conditions = ["collection_id = $1"]
														
 
															+        params: list[Any] = [parent_id]
														
 
															+        param_index = 2
														
 
															+
														
 
															+        if filters:
														
 
															+            for field, value in filters.items():
														
 
															+                if field not in valid_columns:
														
 
															+                    continue
														
 
															+
														
 
															+                if isinstance(value, dict):
														
 
															+                    for op, val in value.items():
														
 
															+                        if op == "$eq":
														
 
															+                            conditions.append(f"{field} = ${param_index}")
														
 
															+                            params.append(val)
														
 
															+                            param_index += 1
														
 
															+                        elif op == "$gt":
														
 
															+                            conditions.append(f"{field} > ${param_index}")
														
 
															+                            params.append(val)
														
 
															+                            param_index += 1
														
 
															+                        elif op == "$lt":
														
 
															+                            conditions.append(f"{field} < ${param_index}")
														
 
															+                            params.append(val)
														
 
															+                            param_index += 1
														
 
															+                else:
														
 
															+                    # Direct equality
														
 
															+                    conditions.append(f"{field} = ${param_index}")
														
 
															+                    params.append(value)
														
 
															+                    param_index += 1
														
 
															+
														
 
															+        if conditions:
														
 
															+            select_stmt = f"{select_stmt} WHERE {' AND '.join(conditions)}"
														
 
															+
														
 
															+        select_stmt = f"{select_stmt} ORDER BY created_at DESC"
														
 
															+
														
 
															+        temp_file = None
														
 
															+        try:
														
 
															+            temp_file = tempfile.NamedTemporaryFile(
														
 
															+                mode="w", delete=True, suffix=".csv"
														
 
															+            )
														
 
															+            writer = csv.writer(temp_file, quoting=csv.QUOTE_ALL)
														
 
															+
														
 
															+            async with self.connection_manager.pool.get_connection() as conn:  # type: ignore
														
 
															+                async with conn.transaction():
														
 
															+                    cursor = await conn.cursor(select_stmt, *params)
														
 
															+
														
 
															+                    if include_header:
														
 
															+                        writer.writerow(columns)
														
 
															+
														
 
															+                    chunk_size = 1000
														
 
															+                    while True:
														
 
															+                        rows = await cursor.fetch(chunk_size)
														
 
															+                        if not rows:
														
 
															+                            break
														
 
															+                        for row in rows:
														
 
															+                            writer.writerow(row)
														
 
															+
														
 
															+            temp_file.flush()
														
 
															+            return temp_file.name, temp_file
														
 
															+
														
 
															+        except Exception as e:
														
 
															+            if temp_file:
														
 
															+                temp_file.close()
														
 
															+            raise HTTPException(
														
 
															+                status_code=500,
														
 
															+                detail=f"Failed to export data: {str(e)}",
														
 
															+            ) from e
														
 
															+
														
 
															 class PostgresGraphsHandler(Handler):
														
 
															     """Handler for Knowledge Graph METHODS in PostgreSQL."""
														
@@ -1572,72 +1906,6 @@ class PostgresGraphsHandler(Handler):
 
															             + _get_str_estimation_output(estimated_time),
														
 
															         }
														
 
															-    async def get_deduplication_estimate(
														
 
															-        self,
														
 
															-        collection_id: UUID,
														
 
															-        kg_deduplication_settings: KGEntityDeduplicationSettings,
														
 
															-    ):
														
 
															-        """Get the estimated cost and time for deduplicating entities in a KG."""
														
 
															-        try:
														
 
															-            query = f"""
														
 
															-                SELECT name, count(name)
														
 
															-                FROM {self._get_table_name("entity")}
														
 
															-                WHERE document_id = ANY(
														
 
															-                    SELECT document_id FROM {self._get_table_name("documents")}
														
 
															-                    WHERE $1 = ANY(collection_ids)
														
 
															-                )
														
 
															-                GROUP BY name
														
 
															-                HAVING count(name) >= 5
														
 
															-            """
														
 
															-            entities = await self.connection_manager.fetch_query(
														
 
															-                query, [collection_id]
														
 
															-            )
														
 
															-            num_entities = len(entities)
														
 
															-
														
 
															-            estimated_llm_calls = (num_entities, num_entities)
														
 
															-            tokens_in_millions = (
														
 
															-                estimated_llm_calls[0] * 1000 / 1000000,
														
 
															-                estimated_llm_calls[1] * 5000 / 1000000,
														
 
															-            )
														
 
															-            cost_per_million = llm_cost_per_million_tokens(
														
 
															-                kg_deduplication_settings.generation_config.model
														
 
															-            )
														
 
															-            estimated_cost = (
														
 
															-                tokens_in_millions[0] * cost_per_million,
														
 
															-                tokens_in_millions[1] * cost_per_million,
														
 
															-            )
														
 
															-            estimated_time = (
														
 
															-                tokens_in_millions[0] * 10 / 60,
														
 
															-                tokens_in_millions[1] * 10 / 60,
														
 
															-            )
														
 
															-
														
 
															-            return {
														
 
															-                "message": "Ran Deduplication Estimate (not the actual run). Note that these are estimated ranges.",
														
 
															-                "num_entities": num_entities,
														
 
															-                "estimated_llm_calls": _get_str_estimation_output(
														
 
															-                    estimated_llm_calls
														
 
															-                ),
														
 
															-                "estimated_total_in_out_tokens_in_millions": _get_str_estimation_output(
														
 
															-                    tokens_in_millions
														
 
															-                ),
														
 
															-                "estimated_cost_in_usd": _get_str_estimation_output(
														
 
															-                    estimated_cost
														
 
															-                ),
														
 
															-                "estimated_total_time_in_minutes": _get_str_estimation_output(
														
 
															-                    estimated_time
														
 
															-                ),
														
 
															-            }
														
 
															-        except UndefinedTableError:
														
 
															-            raise R2RException(
														
 
															-                "Entity embedding table not found. Please run `extract-triples` first.",
														
 
															-                404,
														
 
															-            )
														
 
															-        except Exception as e:
														
 
															-            logger.error(f"Error in get_deduplication_estimate: {str(e)}")
														
 
															-            raise HTTPException(
														
 
															-                500, "Error fetching deduplication estimate."
														
 
															-            ) from e
														
 
															-
														
 
															     async def get_entities(
														
 
															         self,
														
 
															         parent_id: UUID,
														
@@ -1987,7 +2255,6 @@ class PostgresGraphsHandler(Handler):
 
															             QUERY, [tuple(non_null_attrs.values())]
														
 
															         )
														
 
															-    # async def delete(self, collection_id: UUID, cascade: bool = False) -> None:
														
 
															     async def delete(self, collection_id: UUID) -> None:
														
 
															         graphs = await self.get(graph_id=collection_id, offset=0, limit=-1)
														
@@ -2009,33 +2276,6 @@ class PostgresGraphsHandler(Handler):
 
															             DELETE FROM {self._get_table_name("graphs")} WHERE collection_id = $1
														
 
															         """
														
 
															-        # if cascade:
														
 
															-        #     documents = []
														
 
															-        #     document_response = (
														
 
															-        #         await self.collections_handler.documents_in_collection(
														
 
															-        #             offset=0,
														
 
															-        #             limit=100,
														
 
															-        #             collection_id=collection_id,
														
 
															-        #         )
														
 
															-        #     )["results"]
														
 
															-        #     documents.extend(document_response)
														
 
															-        #     document_ids = [doc.id for doc in documents]
														
 
															-        #     for document_id in document_ids:
														
 
															-        #         self.entities.delete(
														
 
															-        #             parent_id=document_id, store_type=StoreType.DOCUMENTS
														
 
															-        #         )
														
 
															-        #         self.relationships.delete(
														
 
															-        #             parent_id=document_id, store_type=StoreType.DOCUMENTS
														
 
															-        #         )
														
 
															-
														
 
															-        #     # setting the extraction status to PENDING for the documents in this collection.
														
 
															-        #     QUERY = f"""
														
 
															-        #         UPDATE {self._get_table_name("documents")} SET extraction_status = $1 WHERE $2::uuid = ANY(collection_ids)
														
 
															-        #     """
														
 
															-        #     await self.connection_manager.execute_query(
														
 
															-        #         QUERY, [KGExtractionStatus.PENDING, collection_id]
														
 
															-        #     )
														
 
															-
														
 
															     async def perform_graph_clustering(
														
 
															         self,
														
 
															         collection_id: UUID,
														
@@ -2224,13 +2464,13 @@ class PostgresGraphsHandler(Handler):
 
															                 relationship_ids_cache.setdefault(relationship.subject, [])
														
 
															                 if relationship.id is not None:
														
 
															                     relationship_ids_cache[relationship.subject].append(
														
 
															-                        relationship.id
														
 
															+                        int(relationship.id)
														
 
															                     )
														
 
															             if relationship.object is not None:
														
 
															                 relationship_ids_cache.setdefault(relationship.object, [])
														
 
															                 if relationship.id is not None:
														
 
															                     relationship_ids_cache[relationship.object].append(
														
 
															-                        relationship.id
														
 
															+                        int(relationship.id)
														
 
															                     )
														
 
															         return relationship_ids_cache
														
--- a/core/database/limits.py
+++ b/core/database/limits.py
@@ -4,7 +4,7 @@ from typing import Optional
 
															 from uuid import UUID
														
 
															 from core.base import Handler
														
 
															-from shared.abstractions import User  # your domain user model
														
 
															+from shared.abstractions import User
														
 
															 from ..base.providers.database import DatabaseConfig, LimitSettings
														
 
															 from .base import PostgresConnectionManager
														
@@ -87,6 +87,38 @@ class PostgresLimitsHandler(Handler):
 
															         )
														
 
															         return await self._count_requests(user_id, None, start_of_month)
														
 
															+        return await self._count_requests(
														
 
															+            user_id, route=None, since=start_of_month
														
 
															+        )
														
 
															+
														
 
															+    def _determine_limits_for(
														
 
															+        self, user_id: UUID, route: str
														
 
															+    ) -> LimitSettings:
														
 
															+        # Start with base limits
														
 
															+        limits = self.config.limits
														
 
															+
														
 
															+        # Route-specific limits - directly override if present
														
 
															+        if route_limits := self.config.route_limits.get(route):
														
 
															+            # Only override non-None values from route_limits
														
 
															+            if route_limits.global_per_min is not None:
														
 
															+                limits.global_per_min = route_limits.global_per_min
														
 
															+            if route_limits.route_per_min is not None:
														
 
															+                limits.route_per_min = route_limits.route_per_min
														
 
															+            if route_limits.monthly_limit is not None:
														
 
															+                limits.monthly_limit = route_limits.monthly_limit
														
 
															+
														
 
															+        # User-specific limits - directly override if present
														
 
															+        if user_limits := self.config.user_limits.get(user_id):
														
 
															+            # Only override non-None values from user_limits
														
 
															+            if user_limits.global_per_min is not None:
														
 
															+                limits.global_per_min = user_limits.global_per_min
														
 
															+            if user_limits.route_per_min is not None:
														
 
															+                limits.route_per_min = user_limits.route_per_min
														
 
															+            if user_limits.monthly_limit is not None:
														
 
															+                limits.monthly_limit = user_limits.monthly_limit
														
 
															+
														
 
															+        return limits
														
 
															+
														
 
															     async def check_limits(self, user: User, route: str):
														
 
															         """
														
 
															         Perform rate limit checks for a user on a specific route.
														
--- a/core/database/postgres.py
+++ b/core/database/postgres.py
@@ -1,7 +1,6 @@
 
															 # TODO: Clean this up and make it more congruent across the vector database and the relational database.
														
 
															 import logging
														
 
															 import os
														
 
															-import warnings
														
 
															 from typing import TYPE_CHECKING, Any, Optional
														
 
															 from ..base.abstractions import VectorQuantizationType
														
@@ -28,18 +27,11 @@ from .tokens import PostgresTokensHandler
 
															 from .users import PostgresUserHandler
														
 
															 if TYPE_CHECKING:
														
 
															-    from ..providers.crypto import NaClCryptoProvider
														
 
															-
														
 
															-logger = logging.getLogger()
														
 
															+    from ..providers.crypto import BCryptCryptoProvider, NaClCryptoProvider
														
 
															+    CryptoProviderType = BCryptCryptoProvider | NaClCryptoProvider
														
 
															-def get_env_var(new_var, old_var, config_value):
														
 
															-    value = config_value or os.getenv(new_var) or os.getenv(old_var)
														
 
															-    if os.getenv(old_var) and not os.getenv(new_var):
														
 
															-        warnings.warn(
														
 
															-            f"{old_var} is deprecated and support for it will be removed in release 3.5.0. Use {new_var} instead."
														
 
															-        )
														
 
															-    return value
														
 
															+logger = logging.getLogger()
														
 
															 class PostgresDatabaseProvider(DatabaseProvider):
														
@@ -57,7 +49,7 @@ class PostgresDatabaseProvider(DatabaseProvider):
 
															     dimension: int
														
 
															     conn: Optional[Any]
														
 
															-    crypto_provider: "NaClCryptoProvider"
														
 
															+    crypto_provider: "CryptoProviderType"
														
 
															     postgres_configuration_settings: PostgresConfigurationSettings
														
 
															     default_collection_name: str
														
 
															     default_collection_description: str
														
@@ -81,7 +73,7 @@ class PostgresDatabaseProvider(DatabaseProvider):
 
															         self,
														
 
															         config: DatabaseConfig,
														
 
															         dimension: int,
														
 
															-        crypto_provider: "NaClCryptoProvider",
														
 
															+        crypto_provider: "BCryptCryptoProvider | NaClCryptoProvider",
														
 
															         quantization_type: VectorQuantizationType = VectorQuantizationType.FP32,
														
 
															         *args,
														
 
															         **kwargs,
														
@@ -89,29 +81,26 @@ class PostgresDatabaseProvider(DatabaseProvider):
 
															         super().__init__(config)
														
 
															         env_vars = [
														
 
															-            ("user", "R2R_POSTGRES_USER", "POSTGRES_USER"),
														
 
															-            ("password", "R2R_POSTGRES_PASSWORD", "POSTGRES_PASSWORD"),
														
 
															-            ("host", "R2R_POSTGRES_HOST", "POSTGRES_HOST"),
														
 
															-            ("port", "R2R_POSTGRES_PORT", "POSTGRES_PORT"),
														
 
															-            ("db_name", "R2R_POSTGRES_DBNAME", "POSTGRES_DBNAME"),
														
 
															+            ("user", "R2R_POSTGRES_USER"),
														
 
															+            ("password", "R2R_POSTGRES_PASSWORD"),
														
 
															+            ("host", "R2R_POSTGRES_HOST"),
														
 
															+            ("port", "R2R_POSTGRES_PORT"),
														
 
															+            ("db_name", "R2R_POSTGRES_DBNAME"),
														
 
															         ]
														
 
															-        for attr, new_var, old_var in env_vars:
														
 
															-            if value := get_env_var(new_var, old_var, getattr(config, attr)):
														
 
															+        for attr, env_var in env_vars:
														
 
															+            if value := (getattr(config, attr) or os.getenv(env_var)):
														
 
															                 setattr(self, attr, value)
														
 
															             else:
														
 
															                 raise ValueError(
														
 
															-                    f"Error, please set a valid {new_var} environment variable or set a '{attr}' in the 'database' settings of your `r2r.toml`."
														
 
															+                    f"Error, please set a valid {env_var} environment variable or set a '{attr}' in the 'database' settings of your `r2r.toml`."
														
 
															                 )
														
 
															         self.port = int(self.port)
														
 
															         self.project_name = (
														
 
															-            get_env_var(
														
 
															-                "R2R_PROJECT_NAME",
														
 
															-                "R2R_POSTGRES_PROJECT_NAME",  # Remove this after deprecation
														
 
															-                config.app.project_name,
														
 
															-            )
														
 
															+            config.app.project_name
														
 
															+            or os.getenv("R2R_PROJECT_NAME")
														
 
															             or "r2r_default"
														
 
															         )
														
--- a/core/database/users.py
+++ b/core/database/users.py
@@ -1,6 +1,8 @@
 
															+import csv
														
 
															 import json
														
 
															+import tempfile
														
 
															 from datetime import datetime
														
 
															-from typing import Any, Dict, List, Optional
														
 
															+from typing import IO, Optional
														
 
															 from uuid import UUID
														
 
															 from fastapi import HTTPException
														
@@ -372,7 +374,7 @@ class PostgresUserHandler(Handler):
 
															             query, [new_hashed_password, id]
														
 
															         )
														
 
															-    async def get_all_users(self) -> List[User]:
														
 
															+    async def get_all_users(self) -> list[User]:
														
 
															         """Get all users with minimal information."""
														
 
															         query, params = (
														
 
															             QueryBuilder(self._get_table_name(self.TABLE_NAME))
														
@@ -897,3 +899,111 @@ class PostgresUserHandler(Handler):
 
															         if result is None:
														
 
															             raise R2RException(status_code=404, message="API key not found")
														
 
															         return True
														
 
															+
														
 
															+    async def export_to_csv(
														
 
															+        self,
														
 
															+        columns: Optional[list[str]] = None,
														
 
															+        filters: Optional[dict] = None,
														
 
															+        include_header: bool = True,
														
 
															+    ) -> tuple[str, IO]:
														
 
															+        """
														
 
															+        Creates a CSV file from the PostgreSQL data and returns the path to the temp file.
														
 
															+        """
														
 
															+        valid_columns = {
														
 
															+            "id",
														
 
															+            "email",
														
 
															+            "is_superuser",
														
 
															+            "is_active",
														
 
															+            "is_verified",
														
 
															+            "name",
														
 
															+            "bio",
														
 
															+            "collection_ids",
														
 
															+            "created_at",
														
 
															+            "updated_at",
														
 
															+        }
														
 
															+
														
 
															+        if not columns:
														
 
															+            columns = list(valid_columns)
														
 
															+        elif invalid_cols := set(columns) - valid_columns:
														
 
															+            raise ValueError(f"Invalid columns: {invalid_cols}")
														
 
															+
														
 
															+        select_stmt = f"""
														
 
															+            SELECT
														
 
															+                id::text,
														
 
															+                email,
														
 
															+                is_superuser,
														
 
															+                is_active,
														
 
															+                is_verified,
														
 
															+                name,
														
 
															+                bio,
														
 
															+                to_char(created_at, 'YYYY-MM-DD HH24:MI:SS') AS created_at,
														
 
															+                to_char(updated_at, 'YYYY-MM-DD HH24:MI:SS') AS updated_at
														
 
															+            FROM {self._get_table_name(self.TABLE_NAME)}
														
 
															+        """
														
 
															+
														
 
															+        params = []
														
 
															+        if filters:
														
 
															+            conditions = []
														
 
															+            param_index = 1
														
 
															+
														
 
															+            for field, value in filters.items():
														
 
															+                if field not in valid_columns:
														
 
															+                    continue
														
 
															+
														
 
															+                if isinstance(value, dict):
														
 
															+                    for op, val in value.items():
														
 
															+                        if op == "$eq":
														
 
															+                            conditions.append(f"{field} = ${param_index}")
														
 
															+                            params.append(val)
														
 
															+                            param_index += 1
														
 
															+                        elif op == "$gt":
														
 
															+                            conditions.append(f"{field} > ${param_index}")
														
 
															+                            params.append(val)
														
 
															+                            param_index += 1
														
 
															+                        elif op == "$lt":
														
 
															+                            conditions.append(f"{field} < ${param_index}")
														
 
															+                            params.append(val)
														
 
															+                            param_index += 1
														
 
															+                else:
														
 
															+                    # Direct equality
														
 
															+                    conditions.append(f"{field} = ${param_index}")
														
 
															+                    params.append(value)
														
 
															+                    param_index += 1
														
 
															+
														
 
															+            if conditions:
														
 
															+                select_stmt = f"{select_stmt} WHERE {' AND '.join(conditions)}"
														
 
															+
														
 
															+        select_stmt = f"{select_stmt} ORDER BY created_at DESC"
														
 
															+
														
 
															+        temp_file = None
														
 
															+        try:
														
 
															+            temp_file = tempfile.NamedTemporaryFile(
														
 
															+                mode="w", delete=True, suffix=".csv"
														
 
															+            )
														
 
															+            writer = csv.writer(temp_file, quoting=csv.QUOTE_ALL)
														
 
															+
														
 
															+            async with self.connection_manager.pool.get_connection() as conn:  # type: ignore
														
 
															+                async with conn.transaction():
														
 
															+                    cursor = await conn.cursor(select_stmt, *params)
														
 
															+
														
 
															+                    if include_header:
														
 
															+                        writer.writerow(columns)
														
 
															+
														
 
															+                    chunk_size = 1000
														
 
															+                    while True:
														
 
															+                        rows = await cursor.fetch(chunk_size)
														
 
															+                        if not rows:
														
 
															+                            break
														
 
															+                        for row in rows:
														
 
															+                            writer.writerow(row)
														
 
															+
														
 
															+            temp_file.flush()
														
 
															+            return temp_file.name, temp_file
														
 
															+
														
 
															+        except Exception as e:
														
 
															+            if temp_file:
														
 
															+                temp_file.close()
														
 
															+            raise HTTPException(
														
 
															+                status_code=500,
														
 
															+                detail=f"Failed to export data: {str(e)}",
														
 
															+            )
														
--- a/core/database/vecs/adapter/base.py
+++ b/core/database/vecs/adapter/base.py
@@ -8,14 +8,14 @@ All public classes, enums, and functions are re-exported by `vecs.adapters` modu
 
															 from abc import ABC, abstractmethod
														
 
															 from enum import Enum
														
 
															-from typing import Any, Generator, Iterable, Optional, Tuple, Union
														
 
															+from typing import Any, Generator, Iterable, Optional, Tuple
														
 
															 from uuid import UUID
														
 
															 from vecs.exc import ArgError
														
 
															-MetadataValues = Union[str, int, float, bool, list[str]]
														
 
															+MetadataValues = str | int | float | bool | list[str]
														
 
															 Metadata = dict[str, MetadataValues]
														
 
															-Numeric = Union[int, float, complex]
														
 
															+Numeric = int | float | complex
														
 
															 Record = Tuple[
														
 
															     UUID,
														
--- a/core/main/abstractions.py
+++ b/core/main/abstractions.py
@@ -10,8 +10,6 @@ from core.pipes import (
 
															     EmbeddingPipe,
														
 
															     GraphClusteringPipe,
														
 
															     GraphCommunitySummaryPipe,
														
 
															-    GraphDeduplicationPipe,
														
 
															-    GraphDeduplicationSummaryPipe,
														
 
															     GraphDescriptionPipe,
														
 
															     GraphExtractionPipe,
														
 
															     GraphSearchSearchPipe,
														
@@ -76,8 +74,6 @@ class R2RPipes(BaseModel):
 
															     graph_storage_pipe: GraphStoragePipe
														
 
															     graph_description_pipe: GraphDescriptionPipe
														
 
															     graph_clustering_pipe: GraphClusteringPipe
														
 
															-    graph_deduplication_pipe: GraphDeduplicationPipe
														
 
															-    graph_deduplication_summary_pipe: GraphDeduplicationSummaryPipe
														
 
															     graph_community_summary_pipe: GraphCommunitySummaryPipe
														
 
															     rag_pipe: RAGPipe
														
 
															     streaming_rag_pipe: StreamingRAGPipe
														
--- a/core/main/api/v3/base_router.py
+++ b/core/main/api/v3/base_router.py
@@ -4,7 +4,7 @@ from abc import abstractmethod
 
															 from typing import Callable, Optional
														
 
															 from fastapi import APIRouter, Depends, HTTPException, Request, WebSocket
														
 
															-from fastapi.responses import StreamingResponse
														
 
															+from fastapi.responses import FileResponse, StreamingResponse
														
 
															 from core.base import R2RException, manage_run
														
@@ -64,7 +64,7 @@ class BaseRouterV3:
 
															                     else:
														
 
															                         results, outer_kwargs = func_result, {}
														
 
															-                    if isinstance(results, StreamingResponse):
														
 
															+                    if isinstance(results, (StreamingResponse, FileResponse)):
														
 
															                         return results
														
 
															                     return {"results": results, **outer_kwargs}
														
--- a/core/main/api/v3/collections_router.py
+++ b/core/main/api/v3/collections_router.py
@@ -1,10 +1,12 @@
 
															 import logging
														
 
															 import textwrap
														
 
															-import time
														
 
															+from tempfile import NamedTemporaryFile
														
 
															 from typing import Optional
														
 
															 from uuid import UUID
														
 
															 from fastapi import Body, Depends, Path, Query
														
 
															+from fastapi.background import BackgroundTasks
														
 
															+from fastapi.responses import FileResponse
														
 
															 from core.base import KGCreationSettings, KGRunType, R2RException
														
 
															 from core.base.api.models import (
														
@@ -177,6 +179,112 @@ class CollectionsRouter(BaseRouterV3):
 
															             )
														
 
															             return collection
														
 
															+        @self.router.post(
														
 
															+            "/collections/export",
														
 
															+            summary="Export collections to CSV",
														
 
															+            dependencies=[Depends(self.rate_limit_dependency)],
														
 
															+            openapi_extra={
														
 
															+                "x-codeSamples": [
														
 
															+                    {
														
 
															+                        "lang": "Python",
														
 
															+                        "source": textwrap.dedent(
														
 
															+                            """
														
 
															+                            from r2r import R2RClient
														
 
															+
														
 
															+                            client = R2RClient("http://localhost:7272")
														
 
															+                            # when using auth, do client.login(...)
														
 
															+
														
 
															+                            response = client.collections.export(
														
 
															+                                output_path="export.csv",
														
 
															+                                columns=["id", "name", "created_at"],
														
 
															+                                include_header=True,
														
 
															+                            )
														
 
															+                            """
														
 
															+                        ),
														
 
															+                    },
														
 
															+                    {
														
 
															+                        "lang": "JavaScript",
														
 
															+                        "source": textwrap.dedent(
														
 
															+                            """
														
 
															+                            const { r2rClient } = require("r2r-js");
														
 
															+
														
 
															+                            const client = new r2rClient("http://localhost:7272");
														
 
															+
														
 
															+                            function main() {
														
 
															+                                await client.collections.export({
														
 
															+                                    outputPath: "export.csv",
														
 
															+                                    columns: ["id", "name", "created_at"],
														
 
															+                                    includeHeader: true,
														
 
															+                                });
														
 
															+                            }
														
 
															+
														
 
															+                            main();
														
 
															+                            """
														
 
															+                        ),
														
 
															+                    },
														
 
															+                    {
														
 
															+                        "lang": "CLI",
														
 
															+                        "source": textwrap.dedent(
														
 
															+                            """
														
 
															+                            """
														
 
															+                        ),
														
 
															+                    },
														
 
															+                    {
														
 
															+                        "lang": "cURL",
														
 
															+                        "source": textwrap.dedent(
														
 
															+                            """
														
 
															+                            curl -X POST "http://127.0.0.1:7272/v3/collections/export" \
														
 
															+                            -H "Authorization: Bearer YOUR_API_KEY" \
														
 
															+                            -H "Content-Type: application/json" \
														
 
															+                            -H "Accept: text/csv" \
														
 
															+                            -d '{ "columns": ["id", "name", "created_at"], "include_header": true }' \
														
 
															+                            --output export.csv
														
 
															+                            """
														
 
															+                        ),
														
 
															+                    },
														
 
															+                ]
														
 
															+            },
														
 
															+        )
														
 
															+        @self.base_endpoint
														
 
															+        async def export_collections(
														
 
															+            background_tasks: BackgroundTasks,
														
 
															+            columns: Optional[list[str]] = Body(
														
 
															+                None, description="Specific columns to export"
														
 
															+            ),
														
 
															+            filters: Optional[dict] = Body(
														
 
															+                None, description="Filters to apply to the export"
														
 
															+            ),
														
 
															+            include_header: Optional[bool] = Body(
														
 
															+                True, description="Whether to include column headers"
														
 
															+            ),
														
 
															+            auth_user=Depends(self.providers.auth.auth_wrapper()),
														
 
															+        ) -> FileResponse:
														
 
															+            """
														
 
															+            Export collections as a CSV file.
														
 
															+            """
														
 
															+
														
 
															+            if not auth_user.is_superuser:
														
 
															+                raise R2RException(
														
 
															+                    "Only a superuser can export data.",
														
 
															+                    403,
														
 
															+                )
														
 
															+
														
 
															+            csv_file_path, temp_file = (
														
 
															+                await self.services.management.export_collections(
														
 
															+                    columns=columns,
														
 
															+                    filters=filters,
														
 
															+                    include_header=include_header,
														
 
															+                )
														
 
															+            )
														
 
															+
														
 
															+            background_tasks.add_task(temp_file.close)
														
 
															+
														
 
															+            return FileResponse(
														
 
															+                path=csv_file_path,
														
 
															+                media_type="text/csv",
														
 
															+                filename="collections_export.csv",
														
 
															+            )
														
 
															+
														
 
															         @self.router.get(
														
 
															             "/collections",
														
 
															             summary="List collections",
														
--- a/core/main/api/v3/conversations_router.py
+++ b/core/main/api/v3/conversations_router.py
@@ -4,6 +4,8 @@ from typing import Optional
 
															 from uuid import UUID
														
 
															 from fastapi import Body, Depends, Path, Query
														
 
															+from fastapi.background import BackgroundTasks
														
 
															+from fastapi.responses import FileResponse
														
 
															 from core.base import Message, R2RException
														
 
															 from core.base.api.models import (
														
@@ -206,6 +208,218 @@ class ConversationsRouter(BaseRouterV3):
 
															                 "total_entries": conversations_response["total_entries"]
														
 
															             }
														
 
															+        @self.router.post(
														
 
															+            "/conversations/export",
														
 
															+            summary="Export conversations to CSV",
														
 
															+            dependencies=[Depends(self.rate_limit_dependency)],
														
 
															+            openapi_extra={
														
 
															+                "x-codeSamples": [
														
 
															+                    {
														
 
															+                        "lang": "Python",
														
 
															+                        "source": textwrap.dedent(
														
 
															+                            """
														
 
															+                            from r2r import R2RClient
														
 
															+
														
 
															+                            client = R2RClient("http://localhost:7272")
														
 
															+                            # when using auth, do client.login(...)
														
 
															+
														
 
															+                            response = client.conversations.export(
														
 
															+                                output_path="export.csv",
														
 
															+                                columns=["id", "created_at"],
														
 
															+                                include_header=True,
														
 
															+                            )
														
 
															+                            """
														
 
															+                        ),
														
 
															+                    },
														
 
															+                    {
														
 
															+                        "lang": "JavaScript",
														
 
															+                        "source": textwrap.dedent(
														
 
															+                            """
														
 
															+                            const { r2rClient } = require("r2r-js");
														
 
															+
														
 
															+                            const client = new r2rClient("http://localhost:7272");
														
 
															+
														
 
															+                            function main() {
														
 
															+                                await client.conversations.export({
														
 
															+                                    outputPath: "export.csv",
														
 
															+                                    columns: ["id", "created_at"],
														
 
															+                                    includeHeader: true,
														
 
															+                                });
														
 
															+                            }
														
 
															+
														
 
															+                            main();
														
 
															+                            """
														
 
															+                        ),
														
 
															+                    },
														
 
															+                    {
														
 
															+                        "lang": "CLI",
														
 
															+                        "source": textwrap.dedent(
														
 
															+                            """
														
 
															+                            """
														
 
															+                        ),
														
 
															+                    },
														
 
															+                    {
														
 
															+                        "lang": "cURL",
														
 
															+                        "source": textwrap.dedent(
														
 
															+                            """
														
 
															+                            curl -X POST "http://127.0.0.1:7272/v3/conversations/export" \
														
 
															+                            -H "Authorization: Bearer YOUR_API_KEY" \
														
 
															+                            -H "Content-Type: application/json" \
														
 
															+                            -H "Accept: text/csv" \
														
 
															+                            -d '{ "columns": ["id", "created_at"], "include_header": true }' \
														
 
															+                            --output export.csv
														
 
															+                            """
														
 
															+                        ),
														
 
															+                    },
														
 
															+                ]
														
 
															+            },
														
 
															+        )
														
 
															+        @self.base_endpoint
														
 
															+        async def export_conversations(
														
 
															+            background_tasks: BackgroundTasks,
														
 
															+            columns: Optional[list[str]] = Body(
														
 
															+                None, description="Specific columns to export"
														
 
															+            ),
														
 
															+            filters: Optional[dict] = Body(
														
 
															+                None, description="Filters to apply to the export"
														
 
															+            ),
														
 
															+            include_header: Optional[bool] = Body(
														
 
															+                True, description="Whether to include column headers"
														
 
															+            ),
														
 
															+            auth_user=Depends(self.providers.auth.auth_wrapper()),
														
 
															+        ) -> FileResponse:
														
 
															+            """
														
 
															+            Export conversations as a downloadable CSV file.
														
 
															+            """
														
 
															+
														
 
															+            if not auth_user.is_superuser:
														
 
															+                raise R2RException(
														
 
															+                    "Only a superuser can export data.",
														
 
															+                    403,
														
 
															+                )
														
 
															+
														
 
															+            csv_file_path, temp_file = (
														
 
															+                await self.services.management.export_conversations(
														
 
															+                    columns=columns,
														
 
															+                    filters=filters,
														
 
															+                    include_header=include_header,
														
 
															+                )
														
 
															+            )
														
 
															+
														
 
															+            background_tasks.add_task(temp_file.close)
														
 
															+
														
 
															+            return FileResponse(
														
 
															+                path=csv_file_path,
														
 
															+                media_type="text/csv",
														
 
															+                filename="documents_export.csv",
														
 
															+            )
														
 
															+
														
 
															+        @self.router.post(
														
 
															+            "/conversations/export_messages",
														
 
															+            summary="Export messages to CSV",
														
 
															+            dependencies=[Depends(self.rate_limit_dependency)],
														
 
															+            openapi_extra={
														
 
															+                "x-codeSamples": [
														
 
															+                    {
														
 
															+                        "lang": "Python",
														
 
															+                        "source": textwrap.dedent(
														
 
															+                            """
														
 
															+                            from r2r import R2RClient
														
 
															+
														
 
															+                            client = R2RClient("http://localhost:7272")
														
 
															+                            # when using auth, do client.login(...)
														
 
															+
														
 
															+                            response = client.conversations.export_messages(
														
 
															+                                output_path="export.csv",
														
 
															+                                columns=["id", "created_at"],
														
 
															+                                include_header=True,
														
 
															+                            )
														
 
															+                            """
														
 
															+                        ),
														
 
															+                    },
														
 
															+                    {
														
 
															+                        "lang": "JavaScript",
														
 
															+                        "source": textwrap.dedent(
														
 
															+                            """
														
 
															+                            const { r2rClient } = require("r2r-js");
														
 
															+
														
 
															+                            const client = new r2rClient("http://localhost:7272");
														
 
															+
														
 
															+                            function main() {
														
 
															+                                await client.conversations.exportMessages({
														
 
															+                                    outputPath: "export.csv",
														
 
															+                                    columns: ["id", "created_at"],
														
 
															+                                    includeHeader: true,
														
 
															+                                });
														
 
															+                            }
														
 
															+
														
 
															+                            main();
														
 
															+                            """
														
 
															+                        ),
														
 
															+                    },
														
 
															+                    {
														
 
															+                        "lang": "CLI",
														
 
															+                        "source": textwrap.dedent(
														
 
															+                            """
														
 
															+                            """
														
 
															+                        ),
														
 
															+                    },
														
 
															+                    {
														
 
															+                        "lang": "cURL",
														
 
															+                        "source": textwrap.dedent(
														
 
															+                            """
														
 
															+                            curl -X POST "http://127.0.0.1:7272/v3/conversations/export_messages" \
														
 
															+                            -H "Authorization: Bearer YOUR_API_KEY" \
														
 
															+                            -H "Content-Type: application/json" \
														
 
															+                            -H "Accept: text/csv" \
														
 
															+                            -d '{ "columns": ["id", "created_at"], "include_header": true }' \
														
 
															+                            --output export.csv
														
 
															+                            """
														
 
															+                        ),
														
 
															+                    },
														
 
															+                ]
														
 
															+            },
														
 
															+        )
														
 
															+        @self.base_endpoint
														
 
															+        async def export_messages(
														
 
															+            background_tasks: BackgroundTasks,
														
 
															+            columns: Optional[list[str]] = Body(
														
 
															+                None, description="Specific columns to export"
														
 
															+            ),
														
 
															+            filters: Optional[dict] = Body(
														
 
															+                None, description="Filters to apply to the export"
														
 
															+            ),
														
 
															+            include_header: Optional[bool] = Body(
														
 
															+                True, description="Whether to include column headers"
														
 
															+            ),
														
 
															+            auth_user=Depends(self.providers.auth.auth_wrapper()),
														
 
															+        ) -> FileResponse:
														
 
															+            """
														
 
															+            Export conversations as a downloadable CSV file.
														
 
															+            """
														
 
															+
														
 
															+            if not auth_user.is_superuser:
														
 
															+                raise R2RException(
														
 
															+                    "Only a superuser can export data.",
														
 
															+                    403,
														
 
															+                )
														
 
															+
														
 
															+            csv_file_path, temp_file = (
														
 
															+                await self.services.management.export_messages(
														
 
															+                    columns=columns,
														
 
															+                    filters=filters,
														
 
															+                    include_header=include_header,
														
 
															+                )
														
 
															+            )
														
 
															+
														
 
															+            background_tasks.add_task(temp_file.close)
														
 
															+
														
 
															+            return FileResponse(
														
 
															+                path=csv_file_path,
														
 
															+                media_type="text/csv",
														
 
															+                filename="documents_export.csv",
														
 
															+            )
														
 
															+
														
 
															         @self.router.get(
														
 
															             "/conversations/{id}",
														
 
															             summary="Get conversation details",
														
--- a/core/main/api/v3/documents_router.py
+++ b/core/main/api/v3/documents_router.py
@@ -3,12 +3,14 @@ import json
 
															 import logging
														
 
															 import mimetypes
														
 
															 import textwrap
														
 
															+from datetime import datetime
														
 
															 from io import BytesIO
														
 
															 from typing import Any, Optional
														
 
															 from uuid import UUID
														
 
															 from fastapi import Body, Depends, File, Form, Path, Query, UploadFile
														
 
															-from fastapi.responses import StreamingResponse
														
 
															+from fastapi.background import BackgroundTasks
														
 
															+from fastapi.responses import FileResponse, StreamingResponse
														
 
															 from pydantic import Json
														
 
															 from core.base import (
														
@@ -23,7 +25,7 @@ from core.base import (
 
															     generate_id,
														
 
															     select_search_filters,
														
 
															 )
														
 
															-from core.base.abstractions import KGCreationSettings, KGRunType
														
 
															+from core.base.abstractions import KGCreationSettings, KGRunType, StoreType
														
 
															 from core.base.api.models import (
														
 
															     GenericBooleanResponse,
														
 
															     WrappedBooleanResponse,
														
@@ -444,7 +446,7 @@ class DocumentsRouter(BaseRouterV3):
 
															                         )
														
 
															                     )
														
 
															                     raw_message["document_id"] = str(document_id)
														
 
															-                    return raw_message
														
 
															+                    return raw_message  # type: ignore
														
 
															                 else:
														
 
															                     logger.info(
														
@@ -546,6 +548,211 @@ class DocumentsRouter(BaseRouterV3):
 
															                     "task_id": None,
														
 
															                 }
														
 
															+        @self.router.post(
														
 
															+            "/documents/export",
														
 
															+            summary="Export documents to CSV",
														
 
															+            dependencies=[Depends(self.rate_limit_dependency)],
														
 
															+            openapi_extra={
														
 
															+                "x-codeSamples": [
														
 
															+                    {
														
 
															+                        "lang": "Python",
														
 
															+                        "source": textwrap.dedent(
														
 
															+                            """
														
 
															+                            from r2r import R2RClient
														
 
															+
														
 
															+                            client = R2RClient("http://localhost:7272")
														
 
															+                            # when using auth, do client.login(...)
														
 
															+
														
 
															+                            response = client.documents.export(
														
 
															+                                output_path="export.csv",
														
 
															+                                columns=["id", "title", "created_at"],
														
 
															+                                include_header=True,
														
 
															+                            )
														
 
															+                            """
														
 
															+                        ),
														
 
															+                    },
														
 
															+                    {
														
 
															+                        "lang": "JavaScript",
														
 
															+                        "source": textwrap.dedent(
														
 
															+                            """
														
 
															+                            const { r2rClient } = require("r2r-js");
														
 
															+
														
 
															+                            const client = new r2rClient("http://localhost:7272");
														
 
															+
														
 
															+                            function main() {
														
 
															+                                await client.documents.export({
														
 
															+                                    outputPath: "export.csv",
														
 
															+                                    columns: ["id", "title", "created_at"],
														
 
															+                                    includeHeader: true,
														
 
															+                                });
														
 
															+                            }
														
 
															+
														
 
															+                            main();
														
 
															+                            """
														
 
															+                        ),
														
 
															+                    },
														
 
															+                    {
														
 
															+                        "lang": "CLI",
														
 
															+                        "source": textwrap.dedent(
														
 
															+                            """
														
 
															+                            """
														
 
															+                        ),
														
 
															+                    },
														
 
															+                    {
														
 
															+                        "lang": "cURL",
														
 
															+                        "source": textwrap.dedent(
														
 
															+                            """
														
 
															+                            curl -X POST "http://127.0.0.1:7272/v3/documents/export" \
														
 
															+                            -H "Authorization: Bearer YOUR_API_KEY" \
														
 
															+                            -H "Content-Type: application/json" \
														
 
															+                            -H "Accept: text/csv" \
														
 
															+                            -d '{ "columns": ["id", "title", "created_at"], "include_header": true }' \
														
 
															+                            --output export.csv
														
 
															+                            """
														
 
															+                        ),
														
 
															+                    },
														
 
															+                ]
														
 
															+            },
														
 
															+        )
														
 
															+        @self.base_endpoint
														
 
															+        async def export_documents(
														
 
															+            background_tasks: BackgroundTasks,
														
 
															+            columns: Optional[list[str]] = Body(
														
 
															+                None, description="Specific columns to export"
														
 
															+            ),
														
 
															+            filters: Optional[dict] = Body(
														
 
															+                None, description="Filters to apply to the export"
														
 
															+            ),
														
 
															+            include_header: Optional[bool] = Body(
														
 
															+                True, description="Whether to include column headers"
														
 
															+            ),
														
 
															+            auth_user=Depends(self.providers.auth.auth_wrapper()),
														
 
															+        ) -> FileResponse:
														
 
															+            """
														
 
															+            Export documents as a downloadable CSV file.
														
 
															+            """
														
 
															+
														
 
															+            if not auth_user.is_superuser:
														
 
															+                raise R2RException(
														
 
															+                    "Only a superuser can export data.",
														
 
															+                    403,
														
 
															+                )
														
 
															+
														
 
															+            csv_file_path, temp_file = (
														
 
															+                await self.services.management.export_documents(
														
 
															+                    columns=columns,
														
 
															+                    filters=filters,
														
 
															+                    include_header=include_header,
														
 
															+                )
														
 
															+            )
														
 
															+
														
 
															+            background_tasks.add_task(temp_file.close)
														
 
															+
														
 
															+            return FileResponse(
														
 
															+                path=csv_file_path,
														
 
															+                media_type="text/csv",
														
 
															+                filename="documents_export.csv",
														
 
															+            )
														
 
															+
														
 
															+        @self.router.get(
														
 
															+            "/documents/download_zip",
														
 
															+            dependencies=[Depends(self.rate_limit_dependency)],
														
 
															+            response_class=StreamingResponse,
														
 
															+            summary="Export multiple documents as zip",
														
 
															+            openapi_extra={
														
 
															+                "x-codeSamples": [
														
 
															+                    {
														
 
															+                        "lang": "Python",
														
 
															+                        "source": textwrap.dedent(
														
 
															+                            """
														
 
															+                            client.documents.download_zip(
														
 
															+                                document_ids=["uuid1", "uuid2"],
														
 
															+                                start_date="2024-01-01",
														
 
															+                                end_date="2024-12-31"
														
 
															+                            )
														
 
															+                            """
														
 
															+                        ),
														
 
															+                    },
														
 
															+                    {
														
 
															+                        "lang": "cURL",
														
 
															+                        "source": textwrap.dedent(
														
 
															+                            """
														
 
															+                            curl -X GET "https://api.example.com/v3/documents/download_zip?document_ids=uuid1,uuid2&start_date=2024-01-01&end_date=2024-12-31" \\
														
 
															+                            -H "Authorization: Bearer YOUR_API_KEY"
														
 
															+                            """
														
 
															+                        ),
														
 
															+                    },
														
 
															+                ]
														
 
															+            },
														
 
															+        )
														
 
															+        @self.base_endpoint
														
 
															+        async def export_files(
														
 
															+            document_ids: Optional[list[UUID]] = Query(
														
 
															+                None,
														
 
															+                description="List of document IDs to include in the export. If not provided, all accessible documents will be included.",
														
 
															+            ),
														
 
															+            start_date: Optional[datetime] = Query(
														
 
															+                None,
														
 
															+                description="Filter documents created on or after this date.",
														
 
															+            ),
														
 
															+            end_date: Optional[datetime] = Query(
														
 
															+                None,
														
 
															+                description="Filter documents created before this date.",
														
 
															+            ),
														
 
															+            auth_user=Depends(self.providers.auth.auth_wrapper()),
														
 
															+        ) -> StreamingResponse:
														
 
															+            """
														
 
															+            Export multiple documents as a zip file. Documents can be filtered by IDs and/or date range.
														
 
															+
														
 
															+            The endpoint allows downloading:
														
 
															+            - Specific documents by providing their IDs
														
 
															+            - Documents within a date range
														
 
															+            - All accessible documents if no filters are provided
														
 
															+
														
 
															+            Files are streamed as a zip archive to handle potentially large downloads efficiently.
														
 
															+            """
														
 
															+            if not auth_user.is_superuser:
														
 
															+                # For non-superusers, verify access to requested documents
														
 
															+                if document_ids:
														
 
															+                    documents_overview = (
														
 
															+                        await self.services.management.documents_overview(
														
 
															+                            user_ids=[auth_user.id],
														
 
															+                            document_ids=document_ids,
														
 
															+                            offset=0,
														
 
															+                            limit=len(document_ids),
														
 
															+                        )
														
 
															+                    )
														
 
															+                    if len(documents_overview["results"]) != len(document_ids):
														
 
															+                        raise R2RException(
														
 
															+                            status_code=403,
														
 
															+                            message="You don't have access to one or more requested documents.",
														
 
															+                        )
														
 
															+                if not document_ids:
														
 
															+                    raise R2RException(
														
 
															+                        status_code=403,
														
 
															+                        message="Non-superusers must provide document IDs to export.",
														
 
															+                    )
														
 
															+
														
 
															+            zip_name, zip_content, zip_size = (
														
 
															+                await self.services.management.export_files(
														
 
															+                    document_ids=document_ids,
														
 
															+                    start_date=start_date,
														
 
															+                    end_date=end_date,
														
 
															+                )
														
 
															+            )
														
 
															+
														
 
															+            async def stream_file():
														
 
															+                yield zip_content.getvalue()
														
 
															+
														
 
															+            return StreamingResponse(
														
 
															+                stream_file(),
														
 
															+                media_type="application/zip",
														
 
															+                headers={
														
 
															+                    "Content-Disposition": f'attachment; filename="{zip_name}"',
														
 
															+                    "Content-Length": str(zip_size),
														
 
															+                },
														
 
															+            )
														
 
															+
														
 
															         @self.router.get(
														
 
															             "/documents",
														
 
															             dependencies=[Depends(self.rate_limit_dependency)],
														
@@ -1377,7 +1584,7 @@ class DocumentsRouter(BaseRouterV3):
 
															                     "user": auth_user.json(),
														
 
															                 }
														
 
															-                return await self.providers.orchestration.run_workflow(
														
 
															+                return await self.providers.orchestration.run_workflow(  # type: ignore
														
 
															                     "extract-triples", {"request": workflow_input}, {}
														
 
															                 )
														
 
															             else:
														
@@ -1482,7 +1689,7 @@ class DocumentsRouter(BaseRouterV3):
 
															                 count,
														
 
															             ) = await self.providers.database.graphs_handler.entities.get(
														
 
															                 parent_id=id,
														
 
															-                store_type="documents",
														
 
															+                store_type=StoreType.DOCUMENTS,
														
 
															                 offset=offset,
														
 
															                 limit=limit,
														
 
															                 include_embeddings=include_embeddings,
														
@@ -1490,6 +1697,119 @@ class DocumentsRouter(BaseRouterV3):
 
															             return entities, {"total_entries": count}  # type: ignore
														
 
															+        @self.router.post(
														
 
															+            "/documents/{id}/entities/export",
														
 
															+            summary="Export document entities to CSV",
														
 
															+            dependencies=[Depends(self.rate_limit_dependency)],
														
 
															+            openapi_extra={
														
 
															+                "x-codeSamples": [
														
 
															+                    {
														
 
															+                        "lang": "Python",
														
 
															+                        "source": textwrap.dedent(
														
 
															+                            """
														
 
															+                            from r2r import R2RClient
														
 
															+
														
 
															+                            client = R2RClient("http://localhost:7272")
														
 
															+                            # when using auth, do client.login(...)
														
 
															+
														
 
															+                            response = client.documents.export_entities(
														
 
															+                                id="b4ac4dd6-5f27-596e-a55b-7cf242ca30aa",
														
 
															+                                output_path="export.csv",
														
 
															+                                columns=["id", "title", "created_at"],
														
 
															+                                include_header=True,
														
 
															+                            )
														
 
															+                            """
														
 
															+                        ),
														
 
															+                    },
														
 
															+                    {
														
 
															+                        "lang": "JavaScript",
														
 
															+                        "source": textwrap.dedent(
														
 
															+                            """
														
 
															+                            const { r2rClient } = require("r2r-js");
														
 
															+
														
 
															+                            const client = new r2rClient("http://localhost:7272");
														
 
															+
														
 
															+                            function main() {
														
 
															+                                await client.documents.exportEntities({
														
 
															+                                    id: "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa",
														
 
															+                                    outputPath: "export.csv",
														
 
															+                                    columns: ["id", "title", "created_at"],
														
 
															+                                    includeHeader: true,
														
 
															+                                });
														
 
															+                            }
														
 
															+
														
 
															+                            main();
														
 
															+                            """
														
 
															+                        ),
														
 
															+                    },
														
 
															+                    {
														
 
															+                        "lang": "CLI",
														
 
															+                        "source": textwrap.dedent(
														
 
															+                            """
														
 
															+                            """
														
 
															+                        ),
														
 
															+                    },
														
 
															+                    {
														
 
															+                        "lang": "cURL",
														
 
															+                        "source": textwrap.dedent(
														
 
															+                            """
														
 
															+                            curl -X POST "http://127.0.0.1:7272/v3/documents/export_entities" \
														
 
															+                            -H "Authorization: Bearer YOUR_API_KEY" \
														
 
															+                            -H "Content-Type: application/json" \
														
 
															+                            -H "Accept: text/csv" \
														
 
															+                            -d '{ "columns": ["id", "title", "created_at"], "include_header": true }' \
														
 
															+                            --output export.csv
														
 
															+                            """
														
 
															+                        ),
														
 
															+                    },
														
 
															+                ]
														
 
															+            },
														
 
															+        )
														
 
															+        @self.base_endpoint
														
 
															+        async def export_entities(
														
 
															+            background_tasks: BackgroundTasks,
														
 
															+            id: UUID = Path(
														
 
															+                ...,
														
 
															+                description="The ID of the document to export entities from.",
														
 
															+            ),
														
 
															+            columns: Optional[list[str]] = Body(
														
 
															+                None, description="Specific columns to export"
														
 
															+            ),
														
 
															+            filters: Optional[dict] = Body(
														
 
															+                None, description="Filters to apply to the export"
														
 
															+            ),
														
 
															+            include_header: Optional[bool] = Body(
														
 
															+                True, description="Whether to include column headers"
														
 
															+            ),
														
 
															+            auth_user=Depends(self.providers.auth.auth_wrapper()),
														
 
															+        ) -> FileResponse:
														
 
															+            """
														
 
															+            Export documents as a downloadable CSV file.
														
 
															+            """
														
 
															+
														
 
															+            if not auth_user.is_superuser:
														
 
															+                raise R2RException(
														
 
															+                    "Only a superuser can export data.",
														
 
															+                    403,
														
 
															+                )
														
 
															+
														
 
															+            csv_file_path, temp_file = (
														
 
															+                await self.services.management.export_document_entities(
														
 
															+                    id=id,
														
 
															+                    columns=columns,
														
 
															+                    filters=filters,
														
 
															+                    include_header=include_header,
														
 
															+                )
														
 
															+            )
														
 
															+
														
 
															+            background_tasks.add_task(temp_file.close)
														
 
															+
														
 
															+            return FileResponse(
														
 
															+                path=csv_file_path,
														
 
															+                media_type="text/csv",
														
 
															+                filename="documents_export.csv",
														
 
															+            )
														
 
															+
														
 
															         @self.router.get(
														
 
															             "/documents/{id}/relationships",
														
 
															             dependencies=[Depends(self.rate_limit_dependency)],
														
@@ -1624,7 +1944,7 @@ class DocumentsRouter(BaseRouterV3):
 
															                 count,
														
 
															             ) = await self.providers.database.graphs_handler.relationships.get(
														
 
															                 parent_id=id,
														
 
															-                store_type="documents",
														
 
															+                store_type=StoreType.DOCUMENTS,
														
 
															                 entity_names=entity_names,
														
 
															                 relationship_types=relationship_types,
														
 
															                 offset=offset,
														
@@ -1633,6 +1953,119 @@ class DocumentsRouter(BaseRouterV3):
 
															             return relationships, {"total_entries": count}  # type: ignore
														
 
															+        @self.router.post(
														
 
															+            "/documents/{id}/relationships/export",
														
 
															+            summary="Export document relationships to CSV",
														
 
															+            dependencies=[Depends(self.rate_limit_dependency)],
														
 
															+            openapi_extra={
														
 
															+                "x-codeSamples": [
														
 
															+                    {
														
 
															+                        "lang": "Python",
														
 
															+                        "source": textwrap.dedent(
														
 
															+                            """
														
 
															+                            from r2r import R2RClient
														
 
															+
														
 
															+                            client = R2RClient("http://localhost:7272")
														
 
															+                            # when using auth, do client.login(...)
														
 
															+
														
 
															+                            response = client.documents.export_entities(
														
 
															+                                id="b4ac4dd6-5f27-596e-a55b-7cf242ca30aa",
														
 
															+                                output_path="export.csv",
														
 
															+                                columns=["id", "title", "created_at"],
														
 
															+                                include_header=True,
														
 
															+                            )
														
 
															+                            """
														
 
															+                        ),
														
 
															+                    },
														
 
															+                    {
														
 
															+                        "lang": "JavaScript",
														
 
															+                        "source": textwrap.dedent(
														
 
															+                            """
														
 
															+                            const { r2rClient } = require("r2r-js");
														
 
															+
														
 
															+                            const client = new r2rClient("http://localhost:7272");
														
 
															+
														
 
															+                            function main() {
														
 
															+                                await client.documents.exportEntities({
														
 
															+                                    id: "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa",
														
 
															+                                    outputPath: "export.csv",
														
 
															+                                    columns: ["id", "title", "created_at"],
														
 
															+                                    includeHeader: true,
														
 
															+                                });
														
 
															+                            }
														
 
															+
														
 
															+                            main();
														
 
															+                            """
														
 
															+                        ),
														
 
															+                    },
														
 
															+                    {
														
 
															+                        "lang": "CLI",
														
 
															+                        "source": textwrap.dedent(
														
 
															+                            """
														
 
															+                            """
														
 
															+                        ),
														
 
															+                    },
														
 
															+                    {
														
 
															+                        "lang": "cURL",
														
 
															+                        "source": textwrap.dedent(
														
 
															+                            """
														
 
															+                            curl -X POST "http://127.0.0.1:7272/v3/documents/export_entities" \
														
 
															+                            -H "Authorization: Bearer YOUR_API_KEY" \
														
 
															+                            -H "Content-Type: application/json" \
														
 
															+                            -H "Accept: text/csv" \
														
 
															+                            -d '{ "columns": ["id", "title", "created_at"], "include_header": true }' \
														
 
															+                            --output export.csv
														
 
															+                            """
														
 
															+                        ),
														
 
															+                    },
														
 
															+                ]
														
 
															+            },
														
 
															+        )
														
 
															+        @self.base_endpoint
														
 
															+        async def export_relationships(
														
 
															+            background_tasks: BackgroundTasks,
														
 
															+            id: UUID = Path(
														
 
															+                ...,
														
 
															+                description="The ID of the document to export entities from.",
														
 
															+            ),
														
 
															+            columns: Optional[list[str]] = Body(
														
 
															+                None, description="Specific columns to export"
														
 
															+            ),
														
 
															+            filters: Optional[dict] = Body(
														
 
															+                None, description="Filters to apply to the export"
														
 
															+            ),
														
 
															+            include_header: Optional[bool] = Body(
														
 
															+                True, description="Whether to include column headers"
														
 
															+            ),
														
 
															+            auth_user=Depends(self.providers.auth.auth_wrapper()),
														
 
															+        ) -> FileResponse:
														
 
															+            """
														
 
															+            Export documents as a downloadable CSV file.
														
 
															+            """
														
 
															+
														
 
															+            if not auth_user.is_superuser:
														
 
															+                raise R2RException(
														
 
															+                    "Only a superuser can export data.",
														
 
															+                    403,
														
 
															+                )
														
 
															+
														
 
															+            csv_file_path, temp_file = (
														
 
															+                await self.services.management.export_document_relationships(
														
 
															+                    id=id,
														
 
															+                    columns=columns,
														
 
															+                    filters=filters,
														
 
															+                    include_header=include_header,
														
 
															+                )
														
 
															+            )
														
 
															+
														
 
															+            background_tasks.add_task(temp_file.close)
														
 
															+
														
 
															+            return FileResponse(
														
 
															+                path=csv_file_path,
														
 
															+                media_type="text/csv",
														
 
															+                filename="documents_export.csv",
														
 
															+            )
														
 
															+
														
 
															         @self.router.post(
														
 
															             "/documents/search",
														
 
															             dependencies=[Depends(self.rate_limit_dependency)],
														
--- a/core/main/api/v3/graph_router.py
+++ b/core/main/api/v3/graph_router.py
@@ -4,9 +4,11 @@ from typing import Optional
 
															 from uuid import UUID
														
 
															 from fastapi import Body, Depends, Path, Query
														
 
															+from fastapi.background import BackgroundTasks
														
 
															+from fastapi.responses import FileResponse
														
 
															 from core.base import KGEnrichmentStatus, R2RException, Workflow
														
 
															-from core.base.abstractions import KGRunType
														
 
															+from core.base.abstractions import KGRunType, StoreType
														
 
															 from core.base.api.models import (
														
 
															     GenericBooleanResponse,
														
 
															     WrappedBooleanResponse,
														
@@ -48,9 +50,6 @@ class GraphRouter(BaseRouterV3):
 
															             workflow_messages["build-communities"] = (
														
 
															                 "Graph enrichment task queued successfully."
														
 
															             )
														
 
															-            workflow_messages["entity-deduplication"] = (
														
 
															-                "KG Entity Deduplication task queued successfully."
														
 
															-            )
														
 
															         else:
														
 
															             workflow_messages["extract-triples"] = (
														
 
															                 "Document entities and relationships extracted successfully."
														
@@ -58,9 +57,6 @@ class GraphRouter(BaseRouterV3):
 
															             workflow_messages["build-communities"] = (
														
 
															                 "Graph communities created successfully."
														
 
															             )
														
 
															-            workflow_messages["entity-deduplication"] = (
														
 
															-                "KG Entity Deduplication completed successfully."
														
 
															-            )
														
 
															         self.providers.orchestration.register_workflows(
														
 
															             Workflow.KG,
														
@@ -68,80 +64,6 @@ class GraphRouter(BaseRouterV3):
 
															             workflow_messages,
														
 
															         )
														
 
															-    async def _deduplicate_entities(
														
 
															-        self,
														
 
															-        collection_id: UUID,
														
 
															-        settings,
														
 
															-        run_type: Optional[KGRunType] = KGRunType.ESTIMATE,
														
 
															-        run_with_orchestration: bool = True,
														
 
															-        auth_user=None,
														
 
															-    ):
														
 
															-        """Deduplicates entities in the knowledge graph using LLM-based analysis.
														
 
															-
														
 
															-        The deduplication process:
														
 
															-        1. Groups potentially duplicate entities by name/type
														
 
															-        2. Uses LLM analysis to determine if entities refer to same thing
														
 
															-        3. Merges duplicate entities while preserving relationships
														
 
															-        4. Updates all references to use canonical entity IDs
														
 
															-
														
 
															-        Args:
														
 
															-            id (UUID): Graph containing the entities
														
 
															-            settings (dict, optional): Deduplication settings including:
														
 
															-                - graph_entity_deduplication_type (str): Deduplication method (e.g. "by_name")
														
 
															-                - graph_entity_deduplication_prompt (str): Custom prompt for analysis
														
 
															-                - max_description_input_length (int): Max chars for entity descriptions
														
 
															-                - generation_config (dict): LLM generation parameters
														
 
															-            run_type (KGRunType): Whether to estimate cost or run deduplication
														
 
															-            run_with_orchestration (bool): Whether to run async with task queue
														
 
															-            auth_user: Authenticated user making request
														
 
															-
														
 
															-        Returns:
														
 
															-            Result containing:
														
 
															-                message (str): Status message
														
 
															-                task_id (UUID): Async task ID if run with orchestration
														
 
															-
														
 
															-        Raises:
														
 
															-            R2RException: If user unauthorized or deduplication fails
														
 
															-        """
														
 
															-        if not auth_user.is_superuser:
														
 
															-            raise R2RException(
														
 
															-                "Only superusers can deduplicate a graphs entities", 403
														
 
															-            )
														
 
															-
														
 
															-        server_settings = (
														
 
															-            self.providers.database.config.graph_entity_deduplication_settings
														
 
															-        )
														
 
															-        if settings:
														
 
															-            server_settings = update_settings_from_dict(
														
 
															-                server_settings, settings
														
 
															-            )
														
 
															-
														
 
															-        # Return cost estimate if requested
														
 
															-        if run_type == KGRunType.ESTIMATE:
														
 
															-            return await self.services.graph.get_deduplication_estimate(
														
 
															-                collection_id, server_settings
														
 
															-            )
														
 
															-
														
 
															-        workflow_input = {
														
 
															-            "graph_id": str(collection_id),
														
 
															-            "graph_entity_deduplication_settings": server_settings.model_dump_json(),
														
 
															-            "user": auth_user.model_dump_json(),
														
 
															-        }
														
 
															-
														
 
															-        if run_with_orchestration:
														
 
															-            return await self.providers.orchestration.run_workflow(  # type: ignore
														
 
															-                "entity-deduplication", {"request": workflow_input}, {}
														
 
															-            )
														
 
															-        else:
														
 
															-            from core.main.orchestration import simple_kg_factory
														
 
															-
														
 
															-            simple_kg = simple_kg_factory(self.services.graph)
														
 
															-            await simple_kg["entity-deduplication"](workflow_input)
														
 
															-            return {  # type: ignore
														
 
															-                "message": "Entity deduplication completed successfully.",
														
 
															-                "task_id": None,
														
 
															-            }
														
 
															-
														
 
															     async def _get_collection_id(
														
 
															         self, collection_id: Optional[UUID], auth_user
														
 
															     ) -> UUID:
														
@@ -307,7 +229,7 @@ class GraphRouter(BaseRouterV3):
 
															                 offset=0,
														
 
															                 limit=1,
														
 
															             )
														
 
															-            return list_graphs_response["results"][0]
														
 
															+            return list_graphs_response["results"][0]  # type: ignore
														
 
															         @self.router.post(
														
 
															             "/graphs/{collection_id}/communities/build",
														
@@ -645,6 +567,119 @@ class GraphRouter(BaseRouterV3):
 
															                 "total_entries": count,
														
 
															             }
														
 
															+        @self.router.post(
														
 
															+            "/graphs/{collection_id}/entities/export",
														
 
															+            summary="Export graph entities to CSV",
														
 
															+            dependencies=[Depends(self.rate_limit_dependency)],
														
 
															+            openapi_extra={
														
 
															+                "x-codeSamples": [
														
 
															+                    {
														
 
															+                        "lang": "Python",
														
 
															+                        "source": textwrap.dedent(
														
 
															+                            """
														
 
															+                            from r2r import R2RClient
														
 
															+
														
 
															+                            client = R2RClient("http://localhost:7272")
														
 
															+                            # when using auth, do client.login(...)
														
 
															+
														
 
															+                            response = client.graphs.export_entities(
														
 
															+                                collection_id="b4ac4dd6-5f27-596e-a55b-7cf242ca30aa",
														
 
															+                                output_path="export.csv",
														
 
															+                                columns=["id", "title", "created_at"],
														
 
															+                                include_header=True,
														
 
															+                            )
														
 
															+                            """
														
 
															+                        ),
														
 
															+                    },
														
 
															+                    {
														
 
															+                        "lang": "JavaScript",
														
 
															+                        "source": textwrap.dedent(
														
 
															+                            """
														
 
															+                            const { r2rClient } = require("r2r-js");
														
 
															+
														
 
															+                            const client = new r2rClient("http://localhost:7272");
														
 
															+
														
 
															+                            function main() {
														
 
															+                                await client.graphs.exportEntities({
														
 
															+                                    collectionId: "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa",
														
 
															+                                    outputPath: "export.csv",
														
 
															+                                    columns: ["id", "title", "created_at"],
														
 
															+                                    includeHeader: true,
														
 
															+                                });
														
 
															+                            }
														
 
															+
														
 
															+                            main();
														
 
															+                            """
														
 
															+                        ),
														
 
															+                    },
														
 
															+                    {
														
 
															+                        "lang": "CLI",
														
 
															+                        "source": textwrap.dedent(
														
 
															+                            """
														
 
															+                            """
														
 
															+                        ),
														
 
															+                    },
														
 
															+                    {
														
 
															+                        "lang": "cURL",
														
 
															+                        "source": textwrap.dedent(
														
 
															+                            """
														
 
															+                            curl -X POST "http://127.0.0.1:7272/v3/graphs/export_entities" \
														
 
															+                            -H "Authorization: Bearer YOUR_API_KEY" \
														
 
															+                            -H "Content-Type: application/json" \
														
 
															+                            -H "Accept: text/csv" \
														
 
															+                            -d '{ "columns": ["id", "title", "created_at"], "include_header": true }' \
														
 
															+                            --output export.csv
														
 
															+                            """
														
 
															+                        ),
														
 
															+                    },
														
 
															+                ]
														
 
															+            },
														
 
															+        )
														
 
															+        @self.base_endpoint
														
 
															+        async def export_entities(
														
 
															+            background_tasks: BackgroundTasks,
														
 
															+            collection_id: UUID = Path(
														
 
															+                ...,
														
 
															+                description="The ID of the collection to export entities from.",
														
 
															+            ),
														
 
															+            columns: Optional[list[str]] = Body(
														
 
															+                None, description="Specific columns to export"
														
 
															+            ),
														
 
															+            filters: Optional[dict] = Body(
														
 
															+                None, description="Filters to apply to the export"
														
 
															+            ),
														
 
															+            include_header: Optional[bool] = Body(
														
 
															+                True, description="Whether to include column headers"
														
 
															+            ),
														
 
															+            auth_user=Depends(self.providers.auth.auth_wrapper()),
														
 
															+        ) -> FileResponse:
														
 
															+            """
														
 
															+            Export documents as a downloadable CSV file.
														
 
															+            """
														
 
															+
														
 
															+            if not auth_user.is_superuser:
														
 
															+                raise R2RException(
														
 
															+                    "Only a superuser can export data.",
														
 
															+                    403,
														
 
															+                )
														
 
															+
														
 
															+            csv_file_path, temp_file = (
														
 
															+                await self.services.management.export_graph_entities(
														
 
															+                    id=collection_id,
														
 
															+                    columns=columns,
														
 
															+                    filters=filters,
														
 
															+                    include_header=include_header,
														
 
															+                )
														
 
															+            )
														
 
															+
														
 
															+            background_tasks.add_task(temp_file.close)
														
 
															+
														
 
															+            return FileResponse(
														
 
															+                path=csv_file_path,
														
 
															+                media_type="text/csv",
														
 
															+                filename="documents_export.csv",
														
 
															+            )
														
 
															+
														
 
															         @self.router.post(
														
 
															             "/graphs/{collection_id}/entities",
														
 
															             dependencies=[Depends(self.rate_limit_dependency)],
														
@@ -754,6 +789,119 @@ class GraphRouter(BaseRouterV3):
 
															                 parent_id=collection_id,
														
 
															             )
														
 
															+        @self.router.post(
														
 
															+            "/graphs/{collection_id}/relationships/export",
														
 
															+            summary="Export graph relationships to CSV",
														
 
															+            dependencies=[Depends(self.rate_limit_dependency)],
														
 
															+            openapi_extra={
														
 
															+                "x-codeSamples": [
														
 
															+                    {
														
 
															+                        "lang": "Python",
														
 
															+                        "source": textwrap.dedent(
														
 
															+                            """
														
 
															+                            from r2r import R2RClient
														
 
															+
														
 
															+                            client = R2RClient("http://localhost:7272")
														
 
															+                            # when using auth, do client.login(...)
														
 
															+
														
 
															+                            response = client.graphs.export_entities(
														
 
															+                                collection_id="b4ac4dd6-5f27-596e-a55b-7cf242ca30aa",
														
 
															+                                output_path="export.csv",
														
 
															+                                columns=["id", "title", "created_at"],
														
 
															+                                include_header=True,
														
 
															+                            )
														
 
															+                            """
														
 
															+                        ),
														
 
															+                    },
														
 
															+                    {
														
 
															+                        "lang": "JavaScript",
														
 
															+                        "source": textwrap.dedent(
														
 
															+                            """
														
 
															+                            const { r2rClient } = require("r2r-js");
														
 
															+
														
 
															+                            const client = new r2rClient("http://localhost:7272");
														
 
															+
														
 
															+                            function main() {
														
 
															+                                await client.graphs.exportEntities({
														
 
															+                                    collectionId: "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa",
														
 
															+                                    outputPath: "export.csv",
														
 
															+                                    columns: ["id", "title", "created_at"],
														
 
															+                                    includeHeader: true,
														
 
															+                                });
														
 
															+                            }
														
 
															+
														
 
															+                            main();
														
 
															+                            """
														
 
															+                        ),
														
 
															+                    },
														
 
															+                    {
														
 
															+                        "lang": "CLI",
														
 
															+                        "source": textwrap.dedent(
														
 
															+                            """
														
 
															+                            """
														
 
															+                        ),
														
 
															+                    },
														
 
															+                    {
														
 
															+                        "lang": "cURL",
														
 
															+                        "source": textwrap.dedent(
														
 
															+                            """
														
 
															+                            curl -X POST "http://127.0.0.1:7272/v3/graphs/export_relationships" \
														
 
															+                            -H "Authorization: Bearer YOUR_API_KEY" \
														
 
															+                            -H "Content-Type: application/json" \
														
 
															+                            -H "Accept: text/csv" \
														
 
															+                            -d '{ "columns": ["id", "title", "created_at"], "include_header": true }' \
														
 
															+                            --output export.csv
														
 
															+                            """
														
 
															+                        ),
														
 
															+                    },
														
 
															+                ]
														
 
															+            },
														
 
															+        )
														
 
															+        @self.base_endpoint
														
 
															+        async def export_relationships(
														
 
															+            background_tasks: BackgroundTasks,
														
 
															+            collection_id: UUID = Path(
														
 
															+                ...,
														
 
															+                description="The ID of the document to export entities from.",
														
 
															+            ),
														
 
															+            columns: Optional[list[str]] = Body(
														
 
															+                None, description="Specific columns to export"
														
 
															+            ),
														
 
															+            filters: Optional[dict] = Body(
														
 
															+                None, description="Filters to apply to the export"
														
 
															+            ),
														
 
															+            include_header: Optional[bool] = Body(
														
 
															+                True, description="Whether to include column headers"
														
 
															+            ),
														
 
															+            auth_user=Depends(self.providers.auth.auth_wrapper()),
														
 
															+        ) -> FileResponse:
														
 
															+            """
														
 
															+            Export documents as a downloadable CSV file.
														
 
															+            """
														
 
															+
														
 
															+            if not auth_user.is_superuser:
														
 
															+                raise R2RException(
														
 
															+                    "Only a superuser can export data.",
														
 
															+                    403,
														
 
															+                )
														
 
															+
														
 
															+            csv_file_path, temp_file = (
														
 
															+                await self.services.management.export_graph_relationships(
														
 
															+                    id=collection_id,
														
 
															+                    columns=columns,
														
 
															+                    filters=filters,
														
 
															+                    include_header=include_header,
														
 
															+                )
														
 
															+            )
														
 
															+
														
 
															+            background_tasks.add_task(temp_file.close)
														
 
															+
														
 
															+            return FileResponse(
														
 
															+                path=csv_file_path,
														
 
															+                media_type="text/csv",
														
 
															+                filename="documents_export.csv",
														
 
															+            )
														
 
															+
														
 
															         @self.router.get(
														
 
															             "/graphs/{collection_id}/entities/{entity_id}",
														
 
															             dependencies=[Depends(self.rate_limit_dependency)],
														
@@ -821,7 +969,7 @@ class GraphRouter(BaseRouterV3):
 
															             result = await self.providers.database.graphs_handler.entities.get(
														
 
															                 parent_id=collection_id,
														
 
															-                store_type="graphs",
														
 
															+                store_type=StoreType.GRAPHS,
														
 
															                 offset=0,
														
 
															                 limit=1,
														
 
															                 entity_ids=[entity_id],
														
@@ -1110,7 +1258,7 @@ class GraphRouter(BaseRouterV3):
 
															             results = (
														
 
															                 await self.providers.database.graphs_handler.relationships.get(
														
 
															                     parent_id=collection_id,
														
 
															-                    store_type="graphs",
														
 
															+                    store_type=StoreType.GRAPHS,
														
 
															                     offset=0,
														
 
															                     limit=1,
														
 
															                     relationship_ids=[relationship_id],
														
@@ -1526,7 +1674,7 @@ class GraphRouter(BaseRouterV3):
 
															                 await self.providers.database.graphs_handler.communities.get(
														
 
															                     parent_id=collection_id,
														
 
															                     community_ids=[community_id],
														
 
															-                    store_type="graphs",
														
 
															+                    store_type=StoreType.GRAPHS,
														
 
															                     offset=0,
														
 
															                     limit=1,
														
 
															                 )
														
@@ -1615,6 +1763,119 @@ class GraphRouter(BaseRouterV3):
 
															             )
														
 
															             return GenericBooleanResponse(success=True)  # type: ignore
														
 
															+        @self.router.post(
														
 
															+            "/graphs/{collection_id}/communities/export",
														
 
															+            summary="Export document communities to CSV",
														
 
															+            dependencies=[Depends(self.rate_limit_dependency)],
														
 
															+            openapi_extra={
														
 
															+                "x-codeSamples": [
														
 
															+                    {
														
 
															+                        "lang": "Python",
														
 
															+                        "source": textwrap.dedent(
														
 
															+                            """
														
 
															+                            from r2r import R2RClient
														
 
															+
														
 
															+                            client = R2RClient("http://localhost:7272")
														
 
															+                            # when using auth, do client.login(...)
														
 
															+
														
 
															+                            response = client.graphs.export_communities(
														
 
															+                                collection_id="b4ac4dd6-5f27-596e-a55b-7cf242ca30aa",
														
 
															+                                output_path="export.csv",
														
 
															+                                columns=["id", "title", "created_at"],
														
 
															+                                include_header=True,
														
 
															+                            )
														
 
															+                            """
														
 
															+                        ),
														
 
															+                    },
														
 
															+                    {
														
 
															+                        "lang": "JavaScript",
														
 
															+                        "source": textwrap.dedent(
														
 
															+                            """
														
 
															+                            const { r2rClient } = require("r2r-js");
														
 
															+
														
 
															+                            const client = new r2rClient("http://localhost:7272");
														
 
															+
														
 
															+                            function main() {
														
 
															+                                await client.graphs.exportCommunities({
														
 
															+                                    collectionId: "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa",
														
 
															+                                    outputPath: "export.csv",
														
 
															+                                    columns: ["id", "title", "created_at"],
														
 
															+                                    includeHeader: true,
														
 
															+                                });
														
 
															+                            }
														
 
															+
														
 
															+                            main();
														
 
															+                            """
														
 
															+                        ),
														
 
															+                    },
														
 
															+                    {
														
 
															+                        "lang": "CLI",
														
 
															+                        "source": textwrap.dedent(
														
 
															+                            """
														
 
															+                            """
														
 
															+                        ),
														
 
															+                    },
														
 
															+                    {
														
 
															+                        "lang": "cURL",
														
 
															+                        "source": textwrap.dedent(
														
 
															+                            """
														
 
															+                            curl -X POST "http://127.0.0.1:7272/v3/graphs/export_communities" \
														
 
															+                            -H "Authorization: Bearer YOUR_API_KEY" \
														
 
															+                            -H "Content-Type: application/json" \
														
 
															+                            -H "Accept: text/csv" \
														
 
															+                            -d '{ "columns": ["id", "title", "created_at"], "include_header": true }' \
														
 
															+                            --output export.csv
														
 
															+                            """
														
 
															+                        ),
														
 
															+                    },
														
 
															+                ]
														
 
															+            },
														
 
															+        )
														
 
															+        @self.base_endpoint
														
 
															+        async def export_relationships(
														
 
															+            background_tasks: BackgroundTasks,
														
 
															+            collection_id: UUID = Path(
														
 
															+                ...,
														
 
															+                description="The ID of the document to export entities from.",
														
 
															+            ),
														
 
															+            columns: Optional[list[str]] = Body(
														
 
															+                None, description="Specific columns to export"
														
 
															+            ),
														
 
															+            filters: Optional[dict] = Body(
														
 
															+                None, description="Filters to apply to the export"
														
 
															+            ),
														
 
															+            include_header: Optional[bool] = Body(
														
 
															+                True, description="Whether to include column headers"
														
 
															+            ),
														
 
															+            auth_user=Depends(self.providers.auth.auth_wrapper()),
														
 
															+        ) -> FileResponse:
														
 
															+            """
														
 
															+            Export documents as a downloadable CSV file.
														
 
															+            """
														
 
															+
														
 
															+            if not auth_user.is_superuser:
														
 
															+                raise R2RException(
														
 
															+                    "Only a superuser can export data.",
														
 
															+                    403,
														
 
															+                )
														
 
															+
														
 
															+            csv_file_path, temp_file = (
														
 
															+                await self.services.management.export_graph_communities(
														
 
															+                    id=collection_id,
														
 
															+                    columns=columns,
														
 
															+                    filters=filters,
														
 
															+                    include_header=include_header,
														
 
															+                )
														
 
															+            )
														
 
															+
														
 
															+            background_tasks.add_task(temp_file.close)
														
 
															+
														
 
															+            return FileResponse(
														
 
															+                path=csv_file_path,
														
 
															+                media_type="text/csv",
														
 
															+                filename="documents_export.csv",
														
 
															+            )
														
 
															+
														
 
															         @self.router.post(
														
 
															             "/graphs/{collection_id}/communities/{community_id}",
														
 
															             dependencies=[Depends(self.rate_limit_dependency)],
														
@@ -1839,7 +2100,7 @@ class GraphRouter(BaseRouterV3):
 
															                 entities = (
														
 
															                     await self.providers.database.graphs_handler.entities.get(
														
 
															                         parent_id=document.id,
														
 
															-                        store_type="documents",
														
 
															+                        store_type=StoreType.DOCUMENTS,
														
 
															                         offset=0,
														
 
															                         limit=100,
														
 
															                     )
														
--- a/core/main/api/v3/indices_router.py
+++ b/core/main/api/v3/indices_router.py
@@ -230,7 +230,7 @@ class IndicesRouter(BaseRouterV3):
 
															                 },
														
 
															             )
														
 
															-            return result
														
 
															+            return result  # type: ignore
														
 
															         @self.router.get(
														
 
															             "/indices",
														
@@ -599,7 +599,7 @@ class IndicesRouter(BaseRouterV3):
 
															                 f"Deleting vector index {index_name} from table {table_name}"
														
 
															             )
														
 
															-            return await self.providers.orchestration.run_workflow(
														
 
															+            return await self.providers.orchestration.run_workflow(  # type: ignore
														
 
															                 "delete-vector-index",
														
 
															                 {
														
 
															                     "request": {
														
--- a/core/main/api/v3/users_router.py
+++ b/core/main/api/v3/users_router.py
@@ -1,8 +1,10 @@
 
															 import textwrap
														
 
															-from typing import Optional, Union
														
 
															+from typing import Optional
														
 
															 from uuid import UUID
														
 
															 from fastapi import Body, Depends, Path, Query
														
 
															+from fastapi.background import BackgroundTasks
														
 
															+from fastapi.responses import FileResponse
														
 
															 from fastapi.security import OAuth2PasswordBearer, OAuth2PasswordRequestForm
														
 
															 from pydantic import EmailStr
														
@@ -139,6 +141,112 @@ class UsersRouter(BaseRouterV3):
 
															             return registration_response
														
 
															+        @self.router.post(
														
 
															+            "/users/export",
														
 
															+            summary="Export users to CSV",
														
 
															+            dependencies=[Depends(self.rate_limit_dependency)],
														
 
															+            openapi_extra={
														
 
															+                "x-codeSamples": [
														
 
															+                    {
														
 
															+                        "lang": "Python",
														
 
															+                        "source": textwrap.dedent(
														
 
															+                            """
														
 
															+                            from r2r import R2RClient
														
 
															+
														
 
															+                            client = R2RClient("http://localhost:7272")
														
 
															+                            # when using auth, do client.login(...)
														
 
															+
														
 
															+                            response = client.users.export(
														
 
															+                                output_path="export.csv",
														
 
															+                                columns=["id", "name", "created_at"],
														
 
															+                                include_header=True,
														
 
															+                            )
														
 
															+                            """
														
 
															+                        ),
														
 
															+                    },
														
 
															+                    {
														
 
															+                        "lang": "JavaScript",
														
 
															+                        "source": textwrap.dedent(
														
 
															+                            """
														
 
															+                            const { r2rClient } = require("r2r-js");
														
 
															+
														
 
															+                            const client = new r2rClient("http://localhost:7272");
														
 
															+
														
 
															+                            function main() {
														
 
															+                                await client.users.export({
														
 
															+                                    outputPath: "export.csv",
														
 
															+                                    columns: ["id", "name", "created_at"],
														
 
															+                                    includeHeader: true,
														
 
															+                                });
														
 
															+                            }
														
 
															+
														
 
															+                            main();
														
 
															+                            """
														
 
															+                        ),
														
 
															+                    },
														
 
															+                    {
														
 
															+                        "lang": "CLI",
														
 
															+                        "source": textwrap.dedent(
														
 
															+                            """
														
 
															+                            """
														
 
															+                        ),
														
 
															+                    },
														
 
															+                    {
														
 
															+                        "lang": "cURL",
														
 
															+                        "source": textwrap.dedent(
														
 
															+                            """
														
 
															+                            curl -X POST "http://127.0.0.1:7272/v3/users/export" \
														
 
															+                            -H "Authorization: Bearer YOUR_API_KEY" \
														
 
															+                            -H "Content-Type: application/json" \
														
 
															+                            -H "Accept: text/csv" \
														
 
															+                            -d '{ "columns": ["id", "name", "created_at"], "include_header": true }' \
														
 
															+                            --output export.csv
														
 
															+                            """
														
 
															+                        ),
														
 
															+                    },
														
 
															+                ]
														
 
															+            },
														
 
															+        )
														
 
															+        @self.base_endpoint
														
 
															+        async def export_users(
														
 
															+            background_tasks: BackgroundTasks,
														
 
															+            columns: Optional[list[str]] = Body(
														
 
															+                None, description="Specific columns to export"
														
 
															+            ),
														
 
															+            filters: Optional[dict] = Body(
														
 
															+                None, description="Filters to apply to the export"
														
 
															+            ),
														
 
															+            include_header: Optional[bool] = Body(
														
 
															+                True, description="Whether to include column headers"
														
 
															+            ),
														
 
															+            auth_user=Depends(self.providers.auth.auth_wrapper()),
														
 
															+        ) -> FileResponse:
														
 
															+            """
														
 
															+            Export users as a CSV file.
														
 
															+            """
														
 
															+
														
 
															+            if not auth_user.is_superuser:
														
 
															+                raise R2RException(
														
 
															+                    "Only a superuser can export data.",
														
 
															+                    403,
														
 
															+                )
														
 
															+
														
 
															+            csv_file_path, temp_file = (
														
 
															+                await self.services.management.export_users(
														
 
															+                    columns=columns,
														
 
															+                    filters=filters,
														
 
															+                    include_header=include_header,
														
 
															+                )
														
 
															+            )
														
 
															+
														
 
															+            background_tasks.add_task(temp_file.close)
														
 
															+
														
 
															+            return FileResponse(
														
 
															+                path=csv_file_path,
														
 
															+                media_type="text/csv",
														
 
															+                filename="users_export.csv",
														
 
															+            )
														
 
															+
														
 
															         # TODO: deprecated, remove in next release
														
 
															         @self.router.post(
														
 
															             "/users/register",
														
--- a/core/main/assembly/factory.py
+++ b/core/main/assembly/factory.py
@@ -17,7 +17,23 @@ from core.base import (
 
															     OrchestrationConfig,
														
 
															 )
														
 
															 from core.pipelines import RAGPipeline, SearchPipeline
														
 
															-from core.pipes import GeneratorPipe, MultiSearchPipe, SearchPipe
														
 
															+from core.pipes import (
														
 
															+    EmbeddingPipe,
														
 
															+    GeneratorPipe,
														
 
															+    GraphClusteringPipe,
														
 
															+    GraphCommunitySummaryPipe,
														
 
															+    GraphDescriptionPipe,
														
 
															+    GraphExtractionPipe,
														
 
															+    GraphSearchSearchPipe,
														
 
															+    GraphStoragePipe,
														
 
															+    MultiSearchPipe,
														
 
															+    ParsingPipe,
														
 
															+    RAGPipe,
														
 
															+    SearchPipe,
														
 
															+    StreamingRAGPipe,
														
 
															+    VectorSearchPipe,
														
 
															+    VectorStoragePipe,
														
 
															+)
														
 
															 from core.providers.email.sendgrid import SendGridEmailProvider
														
 
															 from ..abstractions import R2RAgents, R2RPipelines, R2RPipes, R2RProviders
														
@@ -366,20 +382,20 @@ class R2RPipeFactory:
 
															     def create_pipes(
														
 
															         self,
														
 
															-        parsing_pipe_override: Optional[AsyncPipe] = None,
														
 
															-        embedding_pipe_override: Optional[AsyncPipe] = None,
														
 
															-        graph_extraction_pipe_override: Optional[AsyncPipe] = None,
														
 
															-        graph_storage_pipe_override: Optional[AsyncPipe] = None,
														
 
															-        graph_search_pipe_override: Optional[AsyncPipe] = None,
														
 
															-        vector_storage_pipe_override: Optional[AsyncPipe] = None,
														
 
															-        vector_search_pipe_override: Optional[AsyncPipe] = None,
														
 
															-        rag_pipe_override: Optional[AsyncPipe] = None,
														
 
															-        streaming_rag_pipe_override: Optional[AsyncPipe] = None,
														
 
															-        graph_description_pipe: Optional[AsyncPipe] = None,
														
 
															-        graph_clustering_pipe: Optional[AsyncPipe] = None,
														
 
															-        graph_deduplication_pipe: Optional[AsyncPipe] = None,
														
 
															-        graph_deduplication_summary_pipe: Optional[AsyncPipe] = None,
														
 
															-        graph_community_summary_pipe: Optional[AsyncPipe] = None,
														
 
															+        parsing_pipe_override: Optional[ParsingPipe] = None,
														
 
															+        embedding_pipe_override: Optional[EmbeddingPipe] = None,
														
 
															+        graph_extraction_pipe_override: Optional[GraphExtractionPipe] = None,
														
 
															+        graph_storage_pipe_override: Optional[GraphStoragePipe] = None,
														
 
															+        graph_search_pipe_override: Optional[GraphSearchSearchPipe] = None,
														
 
															+        vector_storage_pipe_override: Optional[VectorStoragePipe] = None,
														
 
															+        vector_search_pipe_override: Optional[VectorSearchPipe] = None,
														
 
															+        rag_pipe_override: Optional[RAGPipe] = None,
														
 
															+        streaming_rag_pipe_override: Optional[StreamingRAGPipe] = None,
														
 
															+        graph_description_pipe: Optional[GraphDescriptionPipe] = None,
														
 
															+        graph_clustering_pipe: Optional[GraphClusteringPipe] = None,
														
 
															+        graph_community_summary_pipe: Optional[
														
 
															+            GraphCommunitySummaryPipe
														
 
															+        ] = None,
														
 
															         *args,
														
 
															         **kwargs,
														
 
															     ) -> R2RPipes:
														
@@ -410,10 +426,6 @@ class R2RPipeFactory:
 
															             or self.create_graph_description_pipe(*args, **kwargs),
														
 
															             graph_clustering_pipe=graph_clustering_pipe
														
 
															             or self.create_graph_clustering_pipe(*args, **kwargs),
														
 
															-            graph_deduplication_pipe=graph_deduplication_pipe
														
 
															-            or self.create_graph_deduplication_pipe(*args, **kwargs),
														
 
															-            graph_deduplication_summary_pipe=graph_deduplication_summary_pipe
														
 
															-            or self.create_graph_deduplication_summary_pipe(*args, **kwargs),
														
 
															             graph_community_summary_pipe=graph_community_summary_pipe
														
 
															             or self.create_graph_community_summary_pipe(*args, **kwargs),
														
 
															         )
														
@@ -598,16 +610,6 @@ class R2RPipeFactory:
 
															             config=AsyncPipe.PipeConfig(name="graph_clustering_pipe"),
														
 
															         )
														
 
															-    def create_kg_deduplication_summary_pipe(self, *args, **kwargs) -> Any:
														
 
															-        from core.pipes import GraphDeduplicationSummaryPipe
														
 
															-
														
 
															-        return GraphDeduplicationSummaryPipe(
														
 
															-            database_provider=self.providers.database,
														
 
															-            llm_provider=self.providers.llm,
														
 
															-            embedding_provider=self.providers.embedding,
														
 
															-            config=AsyncPipe.PipeConfig(name="kg_deduplication_summary_pipe"),
														
 
															-        )
														
 
															-
														
 
															     def create_graph_community_summary_pipe(self, *args, **kwargs) -> Any:
														
 
															         from core.pipes import GraphCommunitySummaryPipe
														
@@ -618,28 +620,6 @@ class R2RPipeFactory:
 
															             config=AsyncPipe.PipeConfig(name="graph_community_summary_pipe"),
														
 
															         )
														
 
															-    def create_graph_deduplication_pipe(self, *args, **kwargs) -> Any:
														
 
															-        from core.pipes import GraphDeduplicationPipe
														
 
															-
														
 
															-        return GraphDeduplicationPipe(
														
 
															-            database_provider=self.providers.database,
														
 
															-            llm_provider=self.providers.llm,
														
 
															-            embedding_provider=self.providers.embedding,
														
 
															-            config=AsyncPipe.PipeConfig(name="graph_deduplication_pipe"),
														
 
															-        )
														
 
															-
														
 
															-    def create_graph_deduplication_summary_pipe(self, *args, **kwargs) -> Any:
														
 
															-        from core.pipes import GraphDeduplicationSummaryPipe
														
 
															-
														
 
															-        return GraphDeduplicationSummaryPipe(
														
 
															-            database_provider=self.providers.database,
														
 
															-            llm_provider=self.providers.llm,
														
 
															-            embedding_provider=self.providers.embedding,
														
 
															-            config=AsyncPipe.PipeConfig(
														
 
															-                name="graph_deduplication_summary_pipe"
														
 
															-            ),
														
 
															-        )
														
 
															-
														
 
															 class R2RPipelineFactory:
														
 
															     def __init__(
														
--- a/core/main/orchestration/hatchet/kg_workflow.py
+++ b/core/main/orchestration/hatchet/kg_workflow.py
@@ -97,25 +97,6 @@ def hatchet_kg_factory(
 
															                 except:
														
 
															                     pass
														
 
															-            if key == "graph_entity_deduplication_settings":
														
 
															-                try:
														
 
															-                    input_data[key] = json.loads(value)
														
 
															-                except:
														
 
															-                    pass
														
 
															-
														
 
															-                if isinstance(input_data[key]["generation_config"], str):
														
 
															-                    input_data[key]["generation_config"] = json.loads(
														
 
															-                        input_data[key]["generation_config"]
														
 
															-                    )
														
 
															-
														
 
															-                input_data[key]["generation_config"] = GenerationConfig(
														
 
															-                    **input_data[key]["generation_config"]
														
 
															-                )
														
 
															-
														
 
															-                logger.info(
														
 
															-                    f"KG Entity Deduplication Settings: {input_data[key]}"
														
 
															-                )
														
 
															-
														
 
															             if key == "generation_config":
														
 
															                 input_data[key] = GenerationConfig(**input_data[key])
														
 
															         return input_data
														
@@ -383,104 +364,6 @@ def hatchet_kg_factory(
 
															                     f"Failed to update document status for {document_id}: {e}"
														
 
															                 )
														
 
															-    @orchestration_provider.workflow(
														
 
															-        name="entity-deduplication", timeout="360m"
														
 
															-    )
														
 
															-    class EntityDeduplicationWorkflow:
														
 
															-        def __init__(self, kg_service: GraphService):
														
 
															-            self.kg_service = kg_service
														
 
															-
														
 
															-        @orchestration_provider.step(retries=0, timeout="360m")
														
 
															-        async def kg_entity_deduplication_setup(
														
 
															-            self, context: Context
														
 
															-        ) -> dict:
														
 
															-            input_data = get_input_data_dict(
														
 
															-                context.workflow_input()["request"]
														
 
															-            )
														
 
															-
														
 
															-            graph_id = input_data["graph_id"]
														
 
															-
														
 
															-            logger.info(
														
 
															-                f"Running KG Entity Deduplication for collection {graph_id}"
														
 
															-            )
														
 
															-            logger.info(f"Input data: {input_data}")
														
 
															-            logger.info(
														
 
															-                f"KG Entity Deduplication Settings: {input_data['graph_entity_deduplication_settings']}"
														
 
															-            )
														
 
															-
														
 
															-            number_of_distinct_entities = (
														
 
															-                await self.kg_service.kg_entity_deduplication(
														
 
															-                    graph_id=graph_id,
														
 
															-                    **input_data["graph_entity_deduplication_settings"],
														
 
															-                )
														
 
															-            )[0]["num_entities"]
														
 
															-
														
 
															-            input_data["graph_entity_deduplication_settings"][
														
 
															-                "generation_config"
														
 
															-            ] = input_data["graph_entity_deduplication_settings"][
														
 
															-                "generation_config"
														
 
															-            ].model_dump_json()
														
 
															-
														
 
															-            # run 100 entities in one workflow
														
 
															-            total_workflows = math.ceil(number_of_distinct_entities / 100)
														
 
															-            workflows = []
														
 
															-            for i in range(total_workflows):
														
 
															-                offset = i * 100
														
 
															-                workflows.append(
														
 
															-                    context.aio.spawn_workflow(
														
 
															-                        "kg-entity-deduplication-summary",
														
 
															-                        {
														
 
															-                            "request": {
														
 
															-                                "graph_id": graph_id,
														
 
															-                                "offset": offset,
														
 
															-                                "limit": 100,
														
 
															-                                "graph_entity_deduplication_settings": json.dumps(
														
 
															-                                    input_data[
														
 
															-                                        "graph_entity_deduplication_settings"
														
 
															-                                    ]
														
 
															-                                ),
														
 
															-                            }
														
 
															-                        },
														
 
															-                        key=f"{i}/{total_workflows}_entity_deduplication_part",
														
 
															-                    )
														
 
															-                )
														
 
															-
														
 
															-            await asyncio.gather(*workflows)
														
 
															-            return {
														
 
															-                "result": f"successfully queued kg entity deduplication for collection {graph_id} with {number_of_distinct_entities} distinct entities"
														
 
															-            }
														
 
															-
														
 
															-    @orchestration_provider.workflow(
														
 
															-        name="kg-entity-deduplication-summary", timeout="360m"
														
 
															-    )
														
 
															-    class EntityDeduplicationSummaryWorkflow:
														
 
															-        def __init__(self, kg_service: GraphService):
														
 
															-            self.kg_service = kg_service
														
 
															-
														
 
															-        @orchestration_provider.step(retries=0, timeout="360m")
														
 
															-        async def kg_entity_deduplication_summary(
														
 
															-            self, context: Context
														
 
															-        ) -> dict:
														
 
															-            logger.info(
														
 
															-                f"Running KG Entity Deduplication Summary for input data: {context.workflow_input()['request']}"
														
 
															-            )
														
 
															-
														
 
															-            input_data = get_input_data_dict(
														
 
															-                context.workflow_input()["request"]
														
 
															-            )
														
 
															-            graph_id = input_data["graph_id"]
														
 
															-
														
 
															-            await self.kg_service.kg_entity_deduplication_summary(
														
 
															-                graph_id=graph_id,
														
 
															-                offset=input_data["offset"],
														
 
															-                limit=input_data["limit"],
														
 
															-                **input_data["graph_entity_deduplication_settings"],
														
 
															-            )
														
 
															-
														
 
															-            return {
														
 
															-                "result": f"successfully queued kg entity deduplication summary for collection {graph_id}"
														
 
															-            }
														
 
															-
														
 
															     @orchestration_provider.workflow(name="build-communities", timeout="360m")
														
 
															     class EnrichGraphWorkflow:
														
 
															         def __init__(self, kg_service: GraphService):
														
@@ -676,8 +559,4 @@ def hatchet_kg_factory(
 
															         "extract-triples": CreateGraphWorkflow(service),
														
 
															         "build-communities": EnrichGraphWorkflow(service),
														
 
															         "kg-community-summary": KGCommunitySummaryWorkflow(service),
														
 
															-        "kg-entity-deduplication": EntityDeduplicationWorkflow(service),
														
 
															-        "kg-entity-deduplication-summary": EntityDeduplicationSummaryWorkflow(
														
 
															-            service
														
 
															-        ),
														
 
															     }
														
--- a/core/main/orchestration/simple/kg_workflow.py
+++ b/core/main/orchestration/simple/kg_workflow.py
@@ -174,34 +174,8 @@ def simple_kg_factory(service: GraphService):
 
															             **input_data["graph_enrichment_settings"],
														
 
															         )
														
 
															-    async def entity_deduplication_workflow(input_data):
														
 
															-        # TODO: We should determine how we want to handle the input here and syncronize it across all simple orchestration methods
														
 
															-        if isinstance(input_data["graph_entity_deduplication_settings"], str):
														
 
															-            input_data["graph_entity_deduplication_settings"] = json.loads(
														
 
															-                input_data["graph_entity_deduplication_settings"]
														
 
															-            )
														
 
															-
														
 
															-        collection_id = input_data.get("collection_id", None)
														
 
															-        graph_id = input_data.get("graph_id", None)
														
 
															-
														
 
															-        number_of_distinct_entities = (
														
 
															-            await service.kg_entity_deduplication(
														
 
															-                collection_id=collection_id,
														
 
															-                graph_id=graph_id,
														
 
															-                **input_data["graph_entity_deduplication_settings"],
														
 
															-            )
														
 
															-        )[0]["num_entities"]
														
 
															-
														
 
															-        await service.kg_entity_deduplication_summary(
														
 
															-            collection_id=collection_id,
														
 
															-            offset=0,
														
 
															-            limit=number_of_distinct_entities,
														
 
															-            **input_data["graph_entity_deduplication_settings"],
														
 
															-        )
														
 
															-
														
 
															     return {
														
 
															         "extract-triples": extract_triples,
														
 
															         "build-communities": enrich_graph,
														
 
															         "kg-community-summary": kg_community_summary,
														
 
															-        "entity-deduplication": entity_deduplication_workflow,
														
 
															     }
														
--- a/core/main/services/auth_service.py
+++ b/core/main/services/auth_service.py
@@ -283,7 +283,7 @@ class AuthService(Service):
 
															         """
														
 
															         return await self.providers.auth.create_user_api_key(user_id)
														
 
															-    async def delete_user_api_key(self, user_id: UUID, key_id: UUID) -> dict:
														
 
															+    async def delete_user_api_key(self, user_id: UUID, key_id: UUID) -> bool:
														
 
															         """
														
 
															         Delete the API key for the user.
														
@@ -292,7 +292,7 @@ class AuthService(Service):
 
															             key_id (str): The ID of the API key
														
 
															         Returns:
														
 
															-            dict: Contains the message
														
 
															+            bool: True if the API key was deleted successfully
														
 
															         """
														
 
															         return await self.providers.auth.delete_user_api_key(
														
 
															             user_id=user_id, key_id=key_id
														
--- a/core/main/services/graph_service.py
+++ b/core/main/services/graph_service.py
@@ -21,10 +21,9 @@ from core.base.abstractions import (
 
															     KGCreationSettings,
														
 
															     KGEnrichmentSettings,
														
 
															     KGEnrichmentStatus,
														
 
															-    KGEntityDeduplicationSettings,
														
 
															-    KGEntityDeduplicationType,
														
 
															     R2RException,
														
 
															     Relationship,
														
 
															+    StoreType,
														
 
															 )
														
 
															 from core.base.api.models import GraphResponse
														
 
															 from core.telemetry.telemetry_decorator import telemetry_event
														
@@ -144,7 +143,7 @@ class GraphService(Service):
 
															         return await self.providers.database.graphs_handler.entities.create(
														
 
															             name=name,
														
 
															             parent_id=parent_id,
														
 
															-            store_type="graphs",  # type: ignore
														
 
															+            store_type=StoreType.GRAPHS,
														
 
															             category=category,
														
 
															             description=description,
														
 
															             description_embedding=description_embedding,
														
@@ -168,7 +167,7 @@ class GraphService(Service):
 
															         return await self.providers.database.graphs_handler.entities.update(
														
 
															             entity_id=entity_id,
														
 
															-            store_type="graphs",  # type: ignore
														
 
															+            store_type=StoreType.GRAPHS,
														
 
															             name=name,
														
 
															             description=description,
														
 
															             description_embedding=description_embedding,
														
@@ -185,7 +184,7 @@ class GraphService(Service):
 
															         return await self.providers.database.graphs_handler.entities.delete(
														
 
															             parent_id=parent_id,
														
 
															             entity_ids=[entity_id],
														
 
															-            store_type="graphs",  # type: ignore
														
 
															+            store_type=StoreType.GRAPHS,
														
 
															         )
														
 
															     @telemetry_event("get_entities")
														
@@ -238,7 +237,7 @@ class GraphService(Service):
 
															                 description_embedding=description_embedding,
														
 
															                 weight=weight,
														
 
															                 metadata=metadata,
														
 
															-                store_type="graphs",  # type: ignore
														
 
															+                store_type=StoreType.GRAPHS,
														
 
															             )
														
 
															         )
														
@@ -252,7 +251,7 @@ class GraphService(Service):
 
															             await self.providers.database.graphs_handler.relationships.delete(
														
 
															                 parent_id=parent_id,
														
 
															                 relationship_ids=[relationship_id],
														
 
															-                store_type="graphs",  # type: ignore
														
 
															+                store_type=StoreType.GRAPHS,
														
 
															             )
														
 
															         )
														
@@ -287,7 +286,7 @@ class GraphService(Service):
 
															                 description_embedding=description_embedding,
														
 
															                 weight=weight,
														
 
															                 metadata=metadata,
														
 
															-                store_type="graphs",  # type: ignore
														
 
															+                store_type=StoreType.GRAPHS,
														
 
															             )
														
 
															         )
														
@@ -302,7 +301,7 @@ class GraphService(Service):
 
															     ):
														
 
															         return await self.providers.database.graphs_handler.relationships.get(
														
 
															             parent_id=parent_id,
														
 
															-            store_type="graphs",  # type: ignore
														
 
															+            store_type=StoreType.GRAPHS,
														
 
															             offset=offset,
														
 
															             limit=limit,
														
 
															             relationship_ids=relationship_ids,
														
@@ -324,7 +323,7 @@ class GraphService(Service):
 
															         )
														
 
															         return await self.providers.database.graphs_handler.communities.create(
														
 
															             parent_id=parent_id,
														
 
															-            store_type="graphs",  # type: ignore
														
 
															+            store_type=StoreType.GRAPHS,
														
 
															             name=name,
														
 
															             summary=summary,
														
 
															             description_embedding=description_embedding,
														
@@ -351,7 +350,7 @@ class GraphService(Service):
 
															         return await self.providers.database.graphs_handler.communities.update(
														
 
															             community_id=community_id,
														
 
															-            store_type="graphs",  # type: ignore
														
 
															+            store_type=StoreType.GRAPHS,
														
 
															             name=name,
														
 
															             summary=summary,
														
 
															             summary_embedding=summary_embedding,
														
@@ -380,7 +379,7 @@ class GraphService(Service):
 
															     ):
														
 
															         return await self.providers.database.graphs_handler.communities.get(
														
 
															             parent_id=collection_id,
														
 
															-            store_type="graphs",  # type: ignore
														
 
															+            store_type=StoreType.GRAPHS,
														
 
															             offset=offset,
														
 
															             limit=limit,
														
 
															         )
														
@@ -622,21 +621,17 @@ class GraphService(Service):
 
															     async def delete_graph(
														
 
															         self,
														
 
															         collection_id: UUID,
														
 
															-        cascade: bool,
														
 
															-        **kwargs,
														
 
															     ):
														
 
															-        return await self.delete(collection_id=collection_id, cascade=cascade)
														
 
															+        return await self.delete(collection_id=collection_id)
														
 
															     @telemetry_event("delete")
														
 
															     async def delete(
														
 
															         self,
														
 
															         collection_id: UUID,
														
 
															-        cascade: bool,
														
 
															         **kwargs,
														
 
															     ):
														
 
															         return await self.providers.database.graphs_handler.delete(
														
 
															             collection_id=collection_id,
														
 
															-            cascade=cascade,
														
 
															         )
														
 
															     @telemetry_event("get_creation_estimate")
														
@@ -674,75 +669,6 @@ class GraphService(Service):
 
															             graph_enrichment_settings=graph_enrichment_settings,
														
 
															         )
														
 
															-    @telemetry_event("get_deduplication_estimate")
														
 
															-    async def get_deduplication_estimate(
														
 
															-        self,
														
 
															-        collection_id: UUID,
														
 
															-        kg_deduplication_settings: KGEntityDeduplicationSettings,
														
 
															-        **kwargs,
														
 
															-    ):
														
 
															-        return await self.providers.database.graphs_handler.get_deduplication_estimate(
														
 
															-            collection_id=collection_id,
														
 
															-            kg_deduplication_settings=kg_deduplication_settings,
														
 
															-        )
														
 
															-
														
 
															-    @telemetry_event("kg_entity_deduplication")
														
 
															-    async def kg_entity_deduplication(
														
 
															-        self,
														
 
															-        collection_id: UUID,
														
 
															-        graph_id: UUID,
														
 
															-        graph_entity_deduplication_type: KGEntityDeduplicationType,
														
 
															-        graph_entity_deduplication_prompt: str,
														
 
															-        generation_config: GenerationConfig,
														
 
															-        **kwargs,
														
 
															-    ):
														
 
															-        deduplication_results = await self.pipes.graph_deduplication_pipe.run(
														
 
															-            input=self.pipes.graph_deduplication_pipe.Input(
														
 
															-                message={
														
 
															-                    "collection_id": collection_id,
														
 
															-                    "graph_id": graph_id,
														
 
															-                    "graph_entity_deduplication_type": graph_entity_deduplication_type,
														
 
															-                    "graph_entity_deduplication_prompt": graph_entity_deduplication_prompt,
														
 
															-                    "generation_config": generation_config,
														
 
															-                    **kwargs,
														
 
															-                }
														
 
															-            ),
														
 
															-            state=None,
														
 
															-            run_manager=self.run_manager,
														
 
															-        )
														
 
															-        return await _collect_results(deduplication_results)
														
 
															-
														
 
															-    @telemetry_event("kg_entity_deduplication_summary")
														
 
															-    async def kg_entity_deduplication_summary(
														
 
															-        self,
														
 
															-        collection_id: UUID,
														
 
															-        offset: int,
														
 
															-        limit: int,
														
 
															-        graph_entity_deduplication_type: KGEntityDeduplicationType,
														
 
															-        graph_entity_deduplication_prompt: str,
														
 
															-        generation_config: GenerationConfig,
														
 
															-        **kwargs,
														
 
															-    ):
														
 
															-        logger.info(
														
 
															-            f"Running kg_entity_deduplication_summary for collection {collection_id} with settings {kwargs}"
														
 
															-        )
														
 
															-        deduplication_summary_results = await self.pipes.graph_deduplication_summary_pipe.run(
														
 
															-            input=self.pipes.graph_deduplication_summary_pipe.Input(
														
 
															-                message={
														
 
															-                    "collection_id": collection_id,
														
 
															-                    "offset": offset,
														
 
															-                    "limit": limit,
														
 
															-                    "graph_entity_deduplication_type": graph_entity_deduplication_type,
														
 
															-                    "graph_entity_deduplication_prompt": graph_entity_deduplication_prompt,
														
 
															-                    "generation_config": generation_config,
														
 
															-                }
														
 
															-            ),
														
 
															-            state=None,
														
 
															-            run_manager=self.run_manager,
														
 
															-        )
														
 
															-
														
 
															-        return await _collect_results(deduplication_summary_results)
														
 
															-
														
 
															     async def kg_extraction(  # type: ignore
														
 
															         self,
														
 
															         document_id: UUID,
														
@@ -1045,7 +971,7 @@ class GraphService(Service):
 
															                 result = await self.providers.database.graphs_handler.entities.create(
														
 
															                     name=entity.name,
														
 
															                     parent_id=entity.parent_id,
														
 
															-                    store_type="documents",  # type: ignore
														
 
															+                    store_type=StoreType.DOCUMENTS,
														
 
															                     category=entity.category,
														
 
															                     description=entity.description,
														
 
															                     description_embedding=entity.description_embedding,
														
@@ -1067,5 +993,5 @@ class GraphService(Service):
 
															                         description_embedding=relationship.description_embedding,
														
 
															                         weight=relationship.weight,
														
 
															                         metadata=relationship.metadata,
														
 
															-                        store_type="documents",  # type: ignore
														
 
															+                        store_type=StoreType.DOCUMENTS,
														
 
															                     )
														
--- a/core/main/services/management_service.py
+++ b/core/main/services/management_service.py
@@ -1,7 +1,8 @@
 
															 import logging
														
 
															 import os
														
 
															 from collections import defaultdict
														
 
															-from typing import Any, BinaryIO, Optional, Tuple
														
 
															+from datetime import datetime
														
 
															+from typing import IO, Any, BinaryIO, Optional, Tuple
														
 
															 from uuid import UUID
														
 
															 import toml
														
@@ -16,6 +17,7 @@ from core.base import (
 
															     Prompt,
														
 
															     R2RException,
														
 
															     RunManager,
														
 
															+    StoreType,
														
 
															     User,
														
 
															 )
														
 
															 from core.telemetry.telemetry_decorator import telemetry_event
														
@@ -189,10 +191,12 @@ class ManagementService(Service):
 
															         for doc_id in docs_to_delete:
														
 
															             # Delete related entities & relationships if needed:
														
 
															             await self.providers.database.graphs_handler.entities.delete(
														
 
															-                parent_id=doc_id, store_type="documents"
														
 
															+                parent_id=doc_id,
														
 
															+                store_type=StoreType.DOCUMENTS,
														
 
															             )
														
 
															             await self.providers.database.graphs_handler.relationships.delete(
														
 
															-                parent_id=doc_id, store_type="documents"
														
 
															+                parent_id=doc_id,
														
 
															+                store_type=StoreType.DOCUMENTS,
														
 
															             )
														
 
															             # Finally, delete the document from documents_overview:
														
@@ -218,6 +222,166 @@ class ManagementService(Service):
 
															             return result
														
 
															         return None
														
 
															+    @telemetry_event("ExportFiles")
														
 
															+    async def export_files(
														
 
															+        self,
														
 
															+        document_ids: Optional[list[UUID]] = None,
														
 
															+        start_date: Optional[datetime] = None,
														
 
															+        end_date: Optional[datetime] = None,
														
 
															+    ) -> tuple[str, BinaryIO, int]:
														
 
															+        return (
														
 
															+            await self.providers.database.files_handler.retrieve_files_as_zip(
														
 
															+                document_ids=document_ids,
														
 
															+                start_date=start_date,
														
 
															+                end_date=end_date,
														
 
															+            )
														
 
															+        )
														
 
															+
														
 
															+    @telemetry_event("ExportCollections")
														
 
															+    async def export_collections(
														
 
															+        self,
														
 
															+        columns: Optional[list[str]] = None,
														
 
															+        filters: Optional[dict] = None,
														
 
															+        include_header: bool = True,
														
 
															+    ) -> tuple[str, IO]:
														
 
															+        return await self.providers.database.collections_handler.export_to_csv(
														
 
															+            columns=columns,
														
 
															+            filters=filters,
														
 
															+            include_header=include_header,
														
 
															+        )
														
 
															+
														
 
															+    @telemetry_event("ExportDocuments")
														
 
															+    async def export_documents(
														
 
															+        self,
														
 
															+        columns: Optional[list[str]] = None,
														
 
															+        filters: Optional[dict] = None,
														
 
															+        include_header: bool = True,
														
 
															+    ) -> tuple[str, IO]:
														
 
															+        return await self.providers.database.documents_handler.export_to_csv(
														
 
															+            columns=columns,
														
 
															+            filters=filters,
														
 
															+            include_header=include_header,
														
 
															+        )
														
 
															+
														
 
															+    @telemetry_event("ExportDocumentEntities")
														
 
															+    async def export_document_entities(
														
 
															+        self,
														
 
															+        id: UUID,
														
 
															+        columns: Optional[list[str]] = None,
														
 
															+        filters: Optional[dict] = None,
														
 
															+        include_header: bool = True,
														
 
															+    ) -> tuple[str, IO]:
														
 
															+        return await self.providers.database.graphs_handler.entities.export_to_csv(
														
 
															+            parent_id=id,
														
 
															+            store_type=StoreType.DOCUMENTS,
														
 
															+            columns=columns,
														
 
															+            filters=filters,
														
 
															+            include_header=include_header,
														
 
															+        )
														
 
															+
														
 
															+    @telemetry_event("ExportDocumentRelationships")
														
 
															+    async def export_document_relationships(
														
 
															+        self,
														
 
															+        id: UUID,
														
 
															+        columns: Optional[list[str]] = None,
														
 
															+        filters: Optional[dict] = None,
														
 
															+        include_header: bool = True,
														
 
															+    ) -> tuple[str, IO]:
														
 
															+        return await self.providers.database.graphs_handler.relationships.export_to_csv(
														
 
															+            parent_id=id,
														
 
															+            store_type=StoreType.DOCUMENTS,
														
 
															+            columns=columns,
														
 
															+            filters=filters,
														
 
															+            include_header=include_header,
														
 
															+        )
														
 
															+
														
 
															+    @telemetry_event("ExportConversations")
														
 
															+    async def export_conversations(
														
 
															+        self,
														
 
															+        columns: Optional[list[str]] = None,
														
 
															+        filters: Optional[dict] = None,
														
 
															+        include_header: bool = True,
														
 
															+    ) -> tuple[str, IO]:
														
 
															+        return await self.providers.database.conversations_handler.export_conversations_to_csv(
														
 
															+            columns=columns,
														
 
															+            filters=filters,
														
 
															+            include_header=include_header,
														
 
															+        )
														
 
															+
														
 
															+    @telemetry_event("ExportGraphEntities")
														
 
															+    async def export_graph_entities(
														
 
															+        self,
														
 
															+        id: UUID,
														
 
															+        columns: Optional[list[str]] = None,
														
 
															+        filters: Optional[dict] = None,
														
 
															+        include_header: bool = True,
														
 
															+    ) -> tuple[str, IO]:
														
 
															+        return await self.providers.database.graphs_handler.entities.export_to_csv(
														
 
															+            parent_id=id,
														
 
															+            store_type=StoreType.GRAPHS,
														
 
															+            columns=columns,
														
 
															+            filters=filters,
														
 
															+            include_header=include_header,
														
 
															+        )
														
 
															+
														
 
															+    @telemetry_event("ExportGraphRelationships")
														
 
															+    async def export_graph_relationships(
														
 
															+        self,
														
 
															+        id: UUID,
														
 
															+        columns: Optional[list[str]] = None,
														
 
															+        filters: Optional[dict] = None,
														
 
															+        include_header: bool = True,
														
 
															+    ) -> tuple[str, IO]:
														
 
															+        return await self.providers.database.graphs_handler.relationships.export_to_csv(
														
 
															+            parent_id=id,
														
 
															+            store_type=StoreType.GRAPHS,
														
 
															+            columns=columns,
														
 
															+            filters=filters,
														
 
															+            include_header=include_header,
														
 
															+        )
														
 
															+
														
 
															+    @telemetry_event("ExportGraphCommunities")
														
 
															+    async def export_graph_communities(
														
 
															+        self,
														
 
															+        id: UUID,
														
 
															+        columns: Optional[list[str]] = None,
														
 
															+        filters: Optional[dict] = None,
														
 
															+        include_header: bool = True,
														
 
															+    ) -> tuple[str, IO]:
														
 
															+        return await self.providers.database.graphs_handler.communities.export_to_csv(
														
 
															+            parent_id=id,
														
 
															+            store_type=StoreType.GRAPHS,
														
 
															+            columns=columns,
														
 
															+            filters=filters,
														
 
															+            include_header=include_header,
														
 
															+        )
														
 
															+
														
 
															+    @telemetry_event("ExportMessages")
														
 
															+    async def export_messages(
														
 
															+        self,
														
 
															+        columns: Optional[list[str]] = None,
														
 
															+        filters: Optional[dict] = None,
														
 
															+        include_header: bool = True,
														
 
															+    ) -> tuple[str, IO]:
														
 
															+        return await self.providers.database.conversations_handler.export_messages_to_csv(
														
 
															+            columns=columns,
														
 
															+            filters=filters,
														
 
															+            include_header=include_header,
														
 
															+        )
														
 
															+
														
 
															+    @telemetry_event("ExportUsers")
														
 
															+    async def export_users(
														
 
															+        self,
														
 
															+        columns: Optional[list[str]] = None,
														
 
															+        filters: Optional[dict] = None,
														
 
															+        include_header: bool = True,
														
 
															+    ) -> tuple[str, IO]:
														
 
															+        return await self.providers.database.users_handler.export_to_csv(
														
 
															+            columns=columns,
														
 
															+            filters=filters,
														
 
															+            include_header=include_header,
														
 
															+        )
														
 
															+
														
 
															     @telemetry_event("DocumentsOverview")
														
 
															     async def documents_overview(
														
 
															         self,
														
@@ -538,7 +702,9 @@ class ManagementService(Service):
 
															             return {
														
 
															                 "message": (
														
 
															                     await self.providers.database.prompts_handler.get_cached_prompt(
														
 
															-                        prompt_name, inputs, prompt_override
														
 
															+                        prompt_name=prompt_name,
														
 
															+                        inputs=inputs,
														
 
															+                        prompt_override=prompt_override,
														
 
															                     )
														
 
															                 )
														
 
															             }
														
@@ -674,11 +840,11 @@ class ManagementService(Service):
 
															             filter_user_ids=user_ids,
														
 
															         )
														
 
															-    async def get_user_max_documents(self, user_id: UUID) -> int:
														
 
															+    async def get_user_max_documents(self, user_id: UUID) -> int | None:
														
 
															         return self.config.app.default_max_documents_per_user
														
 
															-    async def get_user_max_chunks(self, user_id: UUID) -> int:
														
 
															+    async def get_user_max_chunks(self, user_id: UUID) -> int | None:
														
 
															         return self.config.app.default_max_chunks_per_user
														
 
															-    async def get_user_max_collections(self, user_id: UUID) -> int:
														
 
															+    async def get_user_max_collections(self, user_id: UUID) -> int | None:
														
 
															         return self.config.app.default_max_collections_per_user
														
--- a/core/parsers/structured/csv_parser.py
+++ b/core/parsers/structured/csv_parser.py
@@ -1,5 +1,5 @@
 
															 # type: ignore
														
 
															-from typing import IO, AsyncGenerator, Optional, Union
														
 
															+from typing import IO, AsyncGenerator, Optional
														
 
															 from core.base.parsers.base_parser import AsyncParser
														
 
															 from core.base.providers import (
														
@@ -29,7 +29,7 @@ class CSVParser(AsyncParser[str | bytes]):
 
															         self.StringIO = StringIO
														
 
															     async def ingest(
														
 
															-        self, data: Union[str, bytes], *args, **kwargs
														
 
															+        self, data: str | bytes, *args, **kwargs
														
 
															     ) -> AsyncGenerator[str, None]:
														
 
															         """Ingest CSV data and yield text from each row."""
														
 
															         if isinstance(data, bytes):
														
@@ -72,7 +72,7 @@ class CSVParserAdvanced(AsyncParser[str | bytes]):
 
															     async def ingest(
														
 
															         self,
														
 
															-        data: Union[str, bytes],
														
 
															+        data: str | bytes,
														
 
															         num_col_times_num_rows: int = 100,
														
 
															         *args,
														
 
															         **kwargs,
														
--- a/core/pipes/__init__.py
+++ b/core/pipes/__init__.py
@@ -5,8 +5,6 @@ from .ingestion.parsing_pipe import ParsingPipe
 
															 from .ingestion.vector_storage_pipe import VectorStoragePipe
														
 
															 from .kg.clustering import GraphClusteringPipe
														
 
															 from .kg.community_summary import GraphCommunitySummaryPipe
														
 
															-from .kg.deduplication import GraphDeduplicationPipe
														
 
															-from .kg.deduplication_summary import GraphDeduplicationSummaryPipe
														
 
															 from .kg.description import GraphDescriptionPipe
														
 
															 from .kg.extraction import GraphExtractionPipe
														
 
															 from .kg.storage import GraphStoragePipe
														
@@ -36,6 +34,4 @@ __all__ = [
 
															     "MultiSearchPipe",
														
 
															     "GraphCommunitySummaryPipe",
														
 
															     "RoutingSearchPipe",
														
 
															-    "GraphDeduplicationPipe",
														
 
															-    "GraphDeduplicationSummaryPipe",
														
 
															 ]
														
--- a/core/pipes/abstractions/search_pipe.py
+++ b/core/pipes/abstractions/search_pipe.py
@@ -1,6 +1,6 @@
 
															 import logging
														
 
															 from abc import abstractmethod
														
 
															-from typing import Any, AsyncGenerator, Optional, Union
														
 
															+from typing import Any, AsyncGenerator
														
 
															 from uuid import UUID
														
 
															 from core.base import AsyncPipe, AsyncState, ChunkSearchResult
														
@@ -15,7 +15,7 @@ class SearchPipe(AsyncPipe[ChunkSearchResult]):
 
															         limit: int = 10
														
 
															     class Input(AsyncPipe.Input):
														
 
															-        message: Union[AsyncGenerator[str, None], str]
														
 
															+        message: AsyncGenerator[str, None] | str
														
 
															     def __init__(
														
 
															         self,
														
--- a/core/pipes/ingestion/embedding_pipe.py
+++ b/core/pipes/ingestion/embedding_pipe.py
@@ -1,6 +1,6 @@
 
															 import asyncio
														
 
															 import logging
														
 
															-from typing import Any, AsyncGenerator, Optional, Union
														
 
															+from typing import Any, AsyncGenerator
														
 
															 from core.base import (
														
 
															     AsyncState,
														
@@ -113,7 +113,7 @@ class EmbeddingPipe(AsyncPipe[VectorEntry]):
 
															     async def _process_extraction(
														
 
															         self, extraction: DocumentChunk
														
 
															-    ) -> Union[VectorEntry, R2RDocumentProcessingError]:
														
 
															+    ) -> VectorEntry | R2RDocumentProcessingError:
														
 
															         try:
														
 
															             if isinstance(extraction.data, bytes):
														
 
															                 raise ValueError(
														
--- a/core/pipes/kg/community_summary.py
+++ b/core/pipes/kg/community_summary.py
@@ -300,7 +300,7 @@ class GraphCommunitySummaryPipe(AsyncPipe):
 
															         )
														
 
															         # Organize clusters
														
 
															-        clusters: dict[Any] = {}
														
 
															+        clusters: dict[Any, Any] = {}
														
 
															         for item in community_clusters:
														
 
															             cluster_id = (
														
 
															                 item["cluster"]
														
--- a/core/pipes/kg/extraction.py
+++ b/core/pipes/kg/extraction.py
@@ -3,7 +3,7 @@ import json
 
															 import logging
														
 
															 import re
														
 
															 import time
														
 
															-from typing import Any, AsyncGenerator, Optional, Union
														
 
															+from typing import Any, AsyncGenerator, Optional
														
 
															 from core.base import (
														
 
															     AsyncState,
														
@@ -211,7 +211,7 @@ class GraphExtractionPipe(AsyncPipe[dict]):
 
															         run_id: Any,
														
 
															         *args: Any,
														
 
															         **kwargs: Any,
														
 
															-    ) -> AsyncGenerator[Union[KGExtraction, R2RDocumentProcessingError], None]:
														
 
															+    ) -> AsyncGenerator[KGExtraction | R2RDocumentProcessingError, None]:
														
 
															         start_time = time.time()
														
 
															         document_id = input.message["document_id"]
														
--- a/core/providers/auth/r2r_auth.py
+++ b/core/providers/auth/r2r_auth.py
@@ -106,17 +106,24 @@ class R2RAuthProvider(AuthProvider):
 
															                 status_code=401, message="Invalid or expired token"
														
 
															             )
														
 
															-        email: str = payload.get("sub")
														
 
															-        token_type: str = payload.get("token_type")
														
 
															-        exp: float = payload.get("exp")
														
 
															+        email = payload.get("sub")
														
 
															+        token_type = payload.get("token_type")
														
 
															+        exp = payload.get("exp")
														
 
															+
														
 
															         if email is None or token_type is None or exp is None:
														
 
															             raise R2RException(status_code=401, message="Invalid token claims")
														
 
															-        exp_datetime = datetime.fromtimestamp(exp, tz=timezone.utc)
														
 
															+        email_str: str = email
														
 
															+        token_type_str: str = token_type
														
 
															+        exp_float: float = exp
														
 
															+
														
 
															+        exp_datetime = datetime.fromtimestamp(exp_float, tz=timezone.utc)
														
 
															         if exp_datetime < datetime.now(timezone.utc):
														
 
															             raise R2RException(status_code=401, message="Token has expired")
														
 
															-        return TokenData(email=email, token_type=token_type, exp=exp_datetime)
														
 
															+        return TokenData(
														
 
															+            email=email_str, token_type=token_type_str, exp=exp_datetime
														
 
															+        )
														
 
															     async def authenticate_api_key(self, api_key: str) -> Optional[User]:
														
 
															         """
														
@@ -479,7 +486,7 @@ class R2RAuthProvider(AuthProvider):
 
															             user_id=user_id
														
 
															         )
														
 
															-    async def delete_user_api_key(self, user_id: UUID, key_id: UUID) -> bool:
														
 
															+    async def delete_user_api_key(self, user_id: UUID, key_id: UUID) -> dict:
														
 
															         return await self.database_provider.users_handler.delete_api_key(
														
 
															             user_id=user_id,
														
 
															             key_id=key_id,
														
--- a/core/providers/auth/supabase.py
+++ b/core/providers/auth/supabase.py
@@ -1,5 +1,7 @@
 
															 import logging
														
 
															 import os
														
 
															+from typing import Optional
														
 
															+from uuid import UUID
														
 
															 from fastapi import Depends, HTTPException
														
 
															 from fastapi.security import OAuth2PasswordBearer
														
@@ -216,3 +218,20 @@ class SupabaseAuthProvider(AuthProvider):
 
															     async def send_reset_email(self, email: str) -> dict[str, str]:
														
 
															         raise NotImplementedError("send_reset_email is not used with Supabase")
														
 
															+
														
 
															+    async def create_user_api_key(
														
 
															+        self, user_id: UUID, name: Optional[str] = None
														
 
															+    ) -> dict[str, str]:
														
 
															+        raise NotImplementedError(
														
 
															+            "API key management is not supported with Supabase authentication"
														
 
															+        )
														
 
															+
														
 
															+    async def list_user_api_keys(self, user_id: UUID) -> list[dict]:
														
 
															+        raise NotImplementedError(
														
 
															+            "API key management is not supported with Supabase authentication"
														
 
															+        )
														
 
															+
														
 
															+    async def delete_user_api_key(self, user_id: UUID, key_id: UUID) -> dict:
														
 
															+        raise NotImplementedError(
														
 
															+            "API key management is not supported with Supabase authentication"
														
 
															+        )
														
--- a/core/providers/ingestion/unstructured/base.py
+++ b/core/providers/ingestion/unstructured/base.py
@@ -63,7 +63,7 @@ class UnstructuredIngestionConfig(IngestionConfig):
 
															     split_pdf_page: Optional[bool] = None
														
 
															     starting_page_number: Optional[int] = None
														
 
															     strategy: Optional[str] = None
														
 
															-    chunking_strategy: Optional[ChunkingStrategy] = None
														
 
															+    chunking_strategy: Optional[str | ChunkingStrategy] = None
														
 
															     unique_element_ids: Optional[bool] = None
														
 
															     xml_keep_tags: Optional[bool] = None
														
@@ -99,8 +99,8 @@ class UnstructuredIngestionProvider(IngestionProvider):
 
															     EXTRA_PARSERS = {
														
 
															         DocumentType.CSV: {"advanced": parsers.CSVParserAdvanced},  # type: ignore
														
 
															         DocumentType.PDF: {
														
 
															-            "unstructured": parsers.PDFParserUnstructured,
														
 
															-            "zerox": parsers.VLMPDFParser,
														
 
															+            "unstructured": parsers.PDFParserUnstructured,  # type: ignore
														
 
															+            "zerox": parsers.VLMPDFParser,  # type: ignore
														
 
															         },
														
 
															         DocumentType.XLSX: {"advanced": parsers.XLSXParserAdvanced},  # type: ignore
														
 
															     }
														
--- a/core/telemetry/events.py
+++ b/core/telemetry/events.py
@@ -9,11 +9,6 @@ class BaseTelemetryEvent:
 
															         self.event_id = str(uuid.uuid4())
														
 
															-class DailyActiveUserEvent(BaseTelemetryEvent):
														
 
															-    def __init__(self, user_id: str):
														
 
															-        super().__init__("DailyActiveUser", {"user_id": user_id})
														
 
															-
														
 
															-
														
 
															 class FeatureUsageEvent(BaseTelemetryEvent):
														
 
															     def __init__(
														
 
															         self,
														
@@ -48,67 +43,3 @@ class ErrorEvent(BaseTelemetryEvent):
 
															                 "properties": properties or {},
														
 
															             },
														
 
															         )
														
 
															-
														
 
															-
														
 
															-class RequestLatencyEvent(BaseTelemetryEvent):
														
 
															-    def __init__(
														
 
															-        self,
														
 
															-        endpoint: str,
														
 
															-        latency: float,
														
 
															-        properties: Optional[dict[str, Any]] = None,
														
 
															-    ):
														
 
															-        super().__init__(
														
 
															-            "RequestLatency",
														
 
															-            {
														
 
															-                "endpoint": endpoint,
														
 
															-                "latency": latency,
														
 
															-                "properties": properties or {},
														
 
															-            },
														
 
															-        )
														
 
															-
														
 
															-
														
 
															-class GeographicDistributionEvent(BaseTelemetryEvent):
														
 
															-    def __init__(
														
 
															-        self,
														
 
															-        user_id: str,
														
 
															-        country: str,
														
 
															-        properties: Optional[dict[str, Any]] = None,
														
 
															-    ):
														
 
															-        super().__init__(
														
 
															-            "GeographicDistribution",
														
 
															-            {
														
 
															-                "user_id": user_id,
														
 
															-                "country": country,
														
 
															-                "properties": properties or {},
														
 
															-            },
														
 
															-        )
														
 
															-
														
 
															-
														
 
															-class SessionDurationEvent(BaseTelemetryEvent):
														
 
															-    def __init__(
														
 
															-        self,
														
 
															-        user_id: str,
														
 
															-        duration: float,
														
 
															-        properties: Optional[dict[str, Any]] = None,
														
 
															-    ):
														
 
															-        super().__init__(
														
 
															-            "SessionDuration",
														
 
															-            {
														
 
															-                "user_id": user_id,
														
 
															-                "duration": duration,
														
 
															-                "properties": properties or {},
														
 
															-            },
														
 
															-        )
														
 
															-
														
 
															-
														
 
															-class UserPathEvent(BaseTelemetryEvent):
														
 
															-    def __init__(
														
 
															-        self,
														
 
															-        user_id: str,
														
 
															-        path: str,
														
 
															-        properties: Optional[dict[str, Any]] = None,
														
 
															-    ):
														
 
															-        super().__init__(
														
 
															-            "UserPath",
														
 
															-            {"user_id": user_id, "path": path, "properties": properties or {}},
														
 
															-        )
														
--- a/sdk/async_client.py
+++ b/sdk/async_client.py
@@ -7,14 +7,6 @@ import httpx
 
															 from shared.abstractions import R2RException
														
 
															 from .base.base_client import BaseClient
														
 
															-from .v2 import (
														
 
															-    AuthMixins,
														
 
															-    IngestionMixins,
														
 
															-    KGMixins,
														
 
															-    ManagementMixins,
														
 
															-    RetrievalMixins,
														
 
															-    ServerMixins,
														
 
															-)
														
 
															 from .v3 import (
														
 
															     ChunksSDK,
														
 
															     CollectionsSDK,
														
@@ -29,15 +21,7 @@ from .v3 import (
 
															 )
														
 
															-class R2RAsyncClient(
														
 
															-    BaseClient,
														
 
															-    AuthMixins,
														
 
															-    IngestionMixins,
														
 
															-    KGMixins,
														
 
															-    ManagementMixins,
														
 
															-    RetrievalMixins,
														
 
															-    ServerMixins,
														
 
															-):
														
 
															+class R2RAsyncClient(BaseClient):
														
 
															     """
														
 
															     Asynchronous client for interacting with the R2R API.
														
 
															     """
														
--- a/sdk/models.py
+++ b/sdk/models.py
@@ -8,9 +8,7 @@ from shared.abstractions import (
 
															     KGCommunityResult,
														
 
															     KGCreationSettings,
														
 
															     KGEnrichmentSettings,
														
 
															-    KGEntityDeduplicationSettings,
														
 
															     KGEntityResult,
														
 
															-    KGGlobalResult,
														
 
															     KGRelationshipResult,
														
 
															     KGRunType,
														
 
															     KGSearchResultType,
														
@@ -33,7 +31,6 @@ __all__ = [
 
															     "KGCreationSettings",
														
 
															     "KGEnrichmentSettings",
														
 
															     "KGEntityResult",
														
 
															-    "KGGlobalResult",
														
 
															     "KGRelationshipResult",
														
 
															     "KGRunType",
														
 
															     "GraphSearchResult",
														
@@ -48,7 +45,6 @@ __all__ = [
 
															     "SearchSettings",
														
 
															     "select_search_filters",
														
 
															     "SearchMode",
														
 
															-    "KGEntityDeduplicationSettings",
														
 
															     "RAGResponse",
														
 
															     "CombinedSearchResponse",
														
 
															     "User",
														
--- a/sdk/sync_client.py
+++ b/sdk/sync_client.py
@@ -5,14 +5,6 @@ import inspect
 
															 from typing import Any, Callable, Coroutine, TypeVar
														
 
															 from .async_client import R2RAsyncClient
														
 
															-from .v2 import (
														
 
															-    SyncAuthMixins,
														
 
															-    SyncIngestionMixins,
														
 
															-    SyncKGMixins,
														
 
															-    SyncManagementMixins,
														
 
															-    SyncRetrievalMixins,
														
 
															-    SyncServerMixins,
														
 
															-)
														
 
															 T = TypeVar("T")
														
@@ -29,52 +21,12 @@ class R2RClient(R2RAsyncClient):
 
															         # Only wrap v3 methods since they're already working
														
 
															         self._wrap_v3_methods()
														
 
															-        # Override v2 methods with sync versions
														
 
															-        self._override_v2_methods()
														
 
															-
														
 
															     def _make_sync_request(self, *args, **kwargs):
														
 
															         """Sync version of _make_request for v2 methods"""
														
 
															         return self._loop.run_until_complete(
														
 
															             self._async_make_request(*args, **kwargs)
														
 
															         )
														
 
															-    def _override_v2_methods(self):
														
 
															-        """
														
 
															-        Replace async v2 methods with sync versions
														
 
															-        This is really ugly, but it's the only way to make it work once we
														
 
															-        remove v2, we can just resort to the metaclass approach that is in utils
														
 
															-        """
														
 
															-        sync_mixins = {
														
 
															-            SyncAuthMixins: ["auth_methods"],
														
 
															-            SyncIngestionMixins: ["ingestion_methods"],
														
 
															-            SyncKGMixins: ["kg_methods"],
														
 
															-            SyncManagementMixins: ["management_methods"],
														
 
															-            SyncRetrievalMixins: ["retrieval_methods"],
														
 
															-            SyncServerMixins: ["server_methods"],
														
 
															-        }
														
 
															-
														
 
															-        for sync_class in sync_mixins:
														
 
															-            for name, method in sync_class.__dict__.items():
														
 
															-                if not name.startswith("_") and inspect.isfunction(method):
														
 
															-                    # Create a wrapper that uses sync _make_request
														
 
															-                    def wrap_method(m):
														
 
															-                        def wrapped(self, *args, **kwargs):
														
 
															-                            # Temporarily swap _make_request
														
 
															-                            original_make_request = self._make_request
														
 
															-                            self._make_request = self._make_sync_request
														
 
															-                            try:
														
 
															-                                return m(self, *args, **kwargs)
														
 
															-                            finally:
														
 
															-                                # Restore original _make_request
														
 
															-                                self._make_request = original_make_request
														
 
															-
														
 
															-                        return wrapped
														
 
															-
														
 
															-                    bound_method = wrap_method(method).__get__(
														
 
															-                        self, self.__class__
														
 
															-                    )
														
 
															-                    setattr(self, name, bound_method)
														
 
															-
														
 
															     def _wrap_v3_methods(self) -> None:
														
 
															         """Wraps only v3 SDK object methods"""
														
 
															         sdk_objects = [
														
--- a/sdk/v3/chunks.py
+++ b/sdk/v3/chunks.py
@@ -1,5 +1,5 @@
 
															 import json
														
 
															-from typing import Optional
														
 
															+from typing import Any, Optional
														
 
															 from uuid import UUID
														
 
															 from shared.api.models.base import WrappedBooleanResponse
														
@@ -102,7 +102,7 @@ class ChunksSDK:
 
															         Delete a specific chunk.
														
 
															         Args:
														
 
															-            id (Union[str, UUID]): ID of chunk to delete
														
 
															+            id (str | UUID): ID of chunk to delete
														
 
															         """
														
 
															         return await self.client._make_request(
														
 
															             "DELETE",
														
@@ -168,7 +168,7 @@ class ChunksSDK:
 
															         if search_settings and not isinstance(search_settings, dict):
														
 
															             search_settings = search_settings.model_dump()
														
 
															-        data = {
														
 
															+        data: dict[str, Any] = {
														
 
															             "query": query,
														
 
															             "search_settings": search_settings,
														
 
															         }
														
--- a/sdk/v3/collections.py
+++ b/sdk/v3/collections.py
@@ -1,4 +1,4 @@
 
															-from typing import Optional
														
 
															+from typing import Any, Optional
														
 
															 from uuid import UUID
														
 
															 from shared.api.models.base import (
														
@@ -32,7 +32,7 @@ class CollectionsSDK:
 
															         Returns:
														
 
															             dict: Created collection information
														
 
															         """
														
 
															-        data = {"name": name, "description": description}
														
 
															+        data: dict[str, Any] = {"name": name, "description": description}
														
 
															         return await self.client._make_request(
														
 
															             "POST",
														
 
															             "collections",
														
@@ -104,7 +104,7 @@ class CollectionsSDK:
 
															         Returns:
														
 
															             dict: Updated collection information
														
 
															         """
														
 
															-        data = {}
														
 
															+        data: dict[str, Any] = {}
														
 
															         if name is not None:
														
 
															             data["name"] = name
														
 
															         if description is not None:
														
@@ -304,7 +304,7 @@ class CollectionsSDK:
 
															             "run_with_orchestration": run_with_orchestration
														
 
															         }
														
 
															-        data = {}
														
 
															+        data: dict[str, Any] = {}
														
 
															         if settings is not None:
														
 
															             data["settings"] = settings
														
@@ -312,6 +312,6 @@ class CollectionsSDK:
 
															             "POST",
														
 
															             f"collections/{str(id)}/extract",
														
 
															             params=params,
														
 
															-            json=data if data else None,
														
 
															+            json=data or None,
														
 
															             version="v3",
														
 
															         )
														
--- a/sdk/v3/conversations.py
+++ b/sdk/v3/conversations.py
@@ -1,6 +1,10 @@
 
															+from builtins import list as _list
														
 
															+from pathlib import Path
														
 
															 from typing import Any, Optional
														
 
															 from uuid import UUID
														
 
															+import aiofiles
														
 
															+
														
 
															 from shared.api.models.base import WrappedBooleanResponse
														
 
															 from shared.api.models.management.responses import (
														
 
															     WrappedConversationMessagesResponse,
														
@@ -24,7 +28,9 @@ class ConversationsSDK:
 
															         Returns:
														
 
															             dict: Created conversation information
														
 
															         """
														
 
															-        data = {"name": name} if name else None
														
 
															+        data: dict[str, Any] = {}
														
 
															+        if name:
														
 
															+            data["name"] = name
														
 
															         return await self.client._make_request(
														
 
															             "POST",
														
@@ -43,7 +49,7 @@ class ConversationsSDK:
 
															         List conversations with pagination and sorting options.
														
 
															         Args:
														
 
															-            ids (Optional[list[Union[str, UUID]]]): List of conversation IDs to retrieve
														
 
															+            ids (Optional[list[str | UUID]]): List of conversation IDs to retrieve
														
 
															             offset (int, optional): Specifies the number of objects to skip. Defaults to 0.
														
 
															             limit (int, optional): Specifies a limit on the number of objects to return, ranging between 1 and 100. Defaults to 100.
														
@@ -72,7 +78,7 @@ class ConversationsSDK:
 
															         Get detailed information about a specific conversation.
														
 
															         Args:
														
 
															-            id (Union[str, UUID]): The ID of the conversation to retrieve
														
 
															+            id (str | UUID): The ID of the conversation to retrieve
														
 
															         Returns:
														
 
															             dict: Detailed conversation information
														
@@ -92,7 +98,7 @@ class ConversationsSDK:
 
															         Update an existing conversation.
														
 
															         Args:
														
 
															-            id (Union[str, UUID]): The ID of the conversation to update
														
 
															+            id (str | UUID): The ID of the conversation to update
														
 
															             name (str): The new name of the conversation
														
 
															         Returns:
														
@@ -117,7 +123,7 @@ class ConversationsSDK:
 
															         Delete a conversation.
														
 
															         Args:
														
 
															-            id (Union[str, UUID]): The ID of the conversation to delete
														
 
															+            id (str | UUID): The ID of the conversation to delete
														
 
															         Returns:
														
 
															             bool: True if deletion was successful
														
@@ -140,7 +146,7 @@ class ConversationsSDK:
 
															         Add a new message to a conversation.
														
 
															         Args:
														
 
															-            id (Union[str, UUID]): The ID of the conversation to add the message to
														
 
															+            id (str | UUID): The ID of the conversation to add the message to
														
 
															             content (str): The content of the message
														
 
															             role (str): The role of the message (e.g., "user" or "assistant")
														
 
															             parent_id (Optional[str]): The ID of the parent message
														
@@ -193,3 +199,99 @@ class ConversationsSDK:
 
															             json=data,
														
 
															             version="v3",
														
 
															         )
														
 
															+
														
 
															+    async def export(
														
 
															+        self,
														
 
															+        output_path: str | Path,
														
 
															+        columns: Optional[_list[str]] = None,
														
 
															+        filters: Optional[dict] = None,
														
 
															+        include_header: bool = True,
														
 
															+    ) -> None:
														
 
															+        """
														
 
															+        Export conversations to a CSV file, streaming the results directly to disk.
														
 
															+
														
 
															+        Args:
														
 
															+            output_path (str | Path): Local path where the CSV file should be saved
														
 
															+            columns (Optional[list[str]]): Specific columns to export. If None, exports default columns
														
 
															+            filters (Optional[dict]): Optional filters to apply when selecting conversations
														
 
															+            include_header (bool): Whether to include column headers in the CSV (default: True)
														
 
															+        """
														
 
															+        # Convert path to string if it's a Path object
														
 
															+        output_path = (
														
 
															+            str(output_path) if isinstance(output_path, Path) else output_path
														
 
															+        )
														
 
															+
														
 
															+        # Prepare request data
														
 
															+        data: dict[str, Any] = {"include_header": include_header}
														
 
															+        if columns:
														
 
															+            data["columns"] = columns
														
 
															+        if filters:
														
 
															+            data["filters"] = filters
														
 
															+
														
 
															+        # Stream response directly to file
														
 
															+        async with aiofiles.open(output_path, "wb") as f:
														
 
															+            async with self.client.session.post(
														
 
															+                f"{self.client.base_url}/v3/conversations/export",
														
 
															+                json=data,
														
 
															+                headers={
														
 
															+                    "Accept": "text/csv",
														
 
															+                    **self.client._get_auth_headers(),
														
 
															+                },
														
 
															+            ) as response:
														
 
															+                if response.status != 200:
														
 
															+                    raise ValueError(
														
 
															+                        f"Export failed with status {response.status}",
														
 
															+                        response,
														
 
															+                    )
														
 
															+
														
 
															+                async for chunk in response.content.iter_chunks():
														
 
															+                    if chunk:
														
 
															+                        await f.write(chunk[0])
														
 
															+
														
 
															+    async def export_messages(
														
 
															+        self,
														
 
															+        output_path: str | Path,
														
 
															+        columns: Optional[_list[str]] = None,
														
 
															+        filters: Optional[dict] = None,
														
 
															+        include_header: bool = True,
														
 
															+    ) -> None:
														
 
															+        """
														
 
															+        Export messages to a CSV file, streaming the results directly to disk.
														
 
															+
														
 
															+        Args:
														
 
															+            output_path (str | Path): Local path where the CSV file should be saved
														
 
															+            columns (Optional[list[str]]): Specific columns to export. If None, exports default columns
														
 
															+            filters (Optional[dict]): Optional filters to apply when selecting messages
														
 
															+            include_header (bool): Whether to include column headers in the CSV (default: True)
														
 
															+        """
														
 
															+        # Convert path to string if it's a Path object
														
 
															+        output_path = (
														
 
															+            str(output_path) if isinstance(output_path, Path) else output_path
														
 
															+        )
														
 
															+
														
 
															+        # Prepare request data
														
 
															+        data: dict[str, Any] = {"include_header": include_header}
														
 
															+        if columns:
														
 
															+            data["columns"] = columns
														
 
															+        if filters:
														
 
															+            data["filters"] = filters
														
 
															+
														
 
															+        # Stream response directly to file
														
 
															+        async with aiofiles.open(output_path, "wb") as f:
														
 
															+            async with self.client.session.post(
														
 
															+                f"{self.client.base_url}/v3/conversations/export_messages",
														
 
															+                json=data,
														
 
															+                headers={
														
 
															+                    "Accept": "text/csv",
														
 
															+                    **self.client._get_auth_headers(),
														
 
															+                },
														
 
															+            ) as response:
														
 
															+                if response.status != 200:
														
 
															+                    raise ValueError(
														
 
															+                        f"Export failed with status {response.status}",
														
 
															+                        response,
														
 
															+                    )
														
 
															+
														
 
															+                async for chunk in response.content.iter_chunks():
														
 
															+                    if chunk:
														
 
															+                        await f.write(chunk[0])
														
--- a/sdk/v3/documents.py
+++ b/sdk/v3/documents.py
@@ -1,8 +1,12 @@
 
															 import json
														
 
															+from datetime import datetime
														
 
															 from io import BytesIO
														
 
															+from pathlib import Path
														
 
															 from typing import Any, Optional
														
 
															 from uuid import UUID
														
 
															+import aiofiles
														
 
															+
														
 
															 from shared.api.models.base import WrappedBooleanResponse
														
 
															 from shared.api.models.ingestion.responses import WrappedIngestionResponse
														
 
															 from shared.api.models.management.responses import (
														
@@ -41,8 +45,8 @@ class DocumentsSDK:
 
															         Args:
														
 
															             file_path (Optional[str]): The file to upload, if any
														
 
															             content (Optional[str]): Optional text content to upload, if no file path is provided
														
 
															-            id (Optional[Union[str, UUID]]): Optional ID to assign to the document
														
 
															-            collection_ids (Optional[list[Union[str, UUID]]]): Collection IDs to associate with the document. If none are provided, the document will be assigned to the user's default collection.
														
 
															+            id (Optional[str | UUID]): Optional ID to assign to the document
														
 
															+            collection_ids (Optional[list[str | UUID]]): Collection IDs to associate with the document. If none are provided, the document will be assigned to the user's default collection.
														
 
															             metadata (Optional[dict]): Optional metadata to assign to the document
														
 
															             ingestion_config (Optional[dict]): Optional ingestion configuration to use
														
 
															             run_with_orchestration (Optional[bool]): Whether to run with orchestration
														
@@ -60,17 +64,23 @@ class DocumentsSDK:
 
															                 "Only one of `file_path`, `raw_text` or `chunks` may be provided"
														
 
															             )
														
 
															-        data = {}
														
 
															+        data: dict[str, Any] = {}
														
 
															         files = None
														
 
															         if id:
														
 
															-            data["id"] = str(id)  # json.dumps(str(id))
														
 
															+            data["id"] = str(id)
														
 
															         if metadata:
														
 
															             data["metadata"] = json.dumps(metadata)
														
 
															         if ingestion_config:
														
 
															-            if not isinstance(ingestion_config, dict):
														
 
															-                ingestion_config = ingestion_config.model_dump()
														
 
															-            ingestion_config["app"] = {}
														
 
															+            if isinstance(ingestion_config, IngestionMode):
														
 
															+                ingestion_config = {"mode": ingestion_config.value}
														
 
															+            app_config: dict[str, Any] = (
														
 
															+                {}
														
 
															+                if isinstance(ingestion_config, dict)
														
 
															+                else ingestion_config["app"]
														
 
															+            )
														
 
															+            ingestion_config = dict(ingestion_config)
														
 
															+            ingestion_config["app"] = app_config
														
 
															             data["ingestion_config"] = json.dumps(ingestion_config)
														
 
															         if collection_ids:
														
 
															             collection_ids = [str(collection_id) for collection_id in collection_ids]  # type: ignore
														
@@ -125,7 +135,7 @@ class DocumentsSDK:
 
															         Get a specific document by ID.
														
 
															         Args:
														
 
															-            id (Union[str, UUID]): ID of document to retrieve
														
 
															+            id (str | UUID): ID of document to retrieve
														
 
															         Returns:
														
 
															             dict: Document information
														
@@ -136,7 +146,6 @@ class DocumentsSDK:
 
															             version="v3",
														
 
															         )
														
 
															-    # you could do something like:
														
 
															     async def download(
														
 
															         self,
														
 
															         id: str | UUID,
														
@@ -145,12 +154,196 @@ class DocumentsSDK:
 
															             "GET",
														
 
															             f"documents/{str(id)}/download",
														
 
															             version="v3",
														
 
															-            # No json parsing here, if possible
														
 
															         )
														
 
															         if not isinstance(response, BytesIO):
														
 
															             raise ValueError("Expected BytesIO response")
														
 
															         return response
														
 
															+    async def download_zip(
														
 
															+        self,
														
 
															+        document_ids: Optional[list[str | UUID]] = None,
														
 
															+        start_date: Optional[datetime] = None,
														
 
															+        end_date: Optional[datetime] = None,
														
 
															+        output_path: Optional[str | Path] = None,
														
 
															+    ) -> BytesIO | None:
														
 
															+        """
														
 
															+        Download multiple documents as a zip file.
														
 
															+        """
														
 
															+        params: dict[str, Any] = {}
														
 
															+        if document_ids:
														
 
															+            params["document_ids"] = [str(doc_id) for doc_id in document_ids]
														
 
															+        if start_date:
														
 
															+            params["start_date"] = start_date.isoformat()
														
 
															+        if end_date:
														
 
															+            params["end_date"] = end_date.isoformat()
														
 
															+
														
 
															+        response = await self.client._make_request(
														
 
															+            "GET",
														
 
															+            "documents/download_zip",
														
 
															+            params=params,
														
 
															+            version="v3",
														
 
															+        )
														
 
															+
														
 
															+        if not isinstance(response, BytesIO):
														
 
															+            raise ValueError("Expected BytesIO response")
														
 
															+
														
 
															+        if output_path:
														
 
															+            output_path = (
														
 
															+                Path(output_path)
														
 
															+                if isinstance(output_path, str)
														
 
															+                else output_path
														
 
															+            )
														
 
															+            async with aiofiles.open(output_path, "wb") as f:
														
 
															+                await f.write(response.getvalue())
														
 
															+            return None
														
 
															+
														
 
															+        return response
														
 
															+
														
 
															+    async def export(
														
 
															+        self,
														
 
															+        output_path: str | Path,
														
 
															+        columns: Optional[list[str]] = None,
														
 
															+        filters: Optional[dict] = None,
														
 
															+        include_header: bool = True,
														
 
															+    ) -> None:
														
 
															+        """
														
 
															+        Export documents to a CSV file, streaming the results directly to disk.
														
 
															+
														
 
															+        Args:
														
 
															+            output_path (str | Path): Local path where the CSV file should be saved
														
 
															+            columns (Optional[list[str]]): Specific columns to export. If None, exports default columns
														
 
															+            filters (Optional[dict]): Optional filters to apply when selecting documents
														
 
															+            include_header (bool): Whether to include column headers in the CSV (default: True)
														
 
															+        """
														
 
															+        # Convert path to string if it's a Path object
														
 
															+        output_path = (
														
 
															+            str(output_path) if isinstance(output_path, Path) else output_path
														
 
															+        )
														
 
															+
														
 
															+        data: dict[str, Any] = {"include_header": include_header}
														
 
															+        if columns:
														
 
															+            data["columns"] = columns
														
 
															+        if filters:
														
 
															+            data["filters"] = filters
														
 
															+
														
 
															+        # Stream response directly to file
														
 
															+        async with aiofiles.open(output_path, "wb") as f:
														
 
															+            async with self.client.session.post(
														
 
															+                f"{self.client.base_url}/v3/documents/export",
														
 
															+                json=data,
														
 
															+                headers={
														
 
															+                    "Accept": "text/csv",
														
 
															+                    **self.client._get_auth_headers(),
														
 
															+                },
														
 
															+            ) as response:
														
 
															+                if response.status != 200:
														
 
															+                    raise ValueError(
														
 
															+                        f"Export failed with status {response.status}",
														
 
															+                        response,
														
 
															+                    )
														
 
															+
														
 
															+                async for chunk in response.content.iter_chunks():
														
 
															+                    if chunk:
														
 
															+                        await f.write(chunk[0])
														
 
															+
														
 
															+    async def export_entities(
														
 
															+        self,
														
 
															+        id: str | UUID,
														
 
															+        output_path: str | Path,
														
 
															+        columns: Optional[list[str]] = None,
														
 
															+        filters: Optional[dict] = None,
														
 
															+        include_header: bool = True,
														
 
															+    ) -> None:
														
 
															+        """
														
 
															+        Export documents to a CSV file, streaming the results directly to disk.
														
 
															+
														
 
															+        Args:
														
 
															+            output_path (str | Path): Local path where the CSV file should be saved
														
 
															+            columns (Optional[list[str]]): Specific columns to export. If None, exports default columns
														
 
															+            filters (Optional[dict]): Optional filters to apply when selecting documents
														
 
															+            include_header (bool): Whether to include column headers in the CSV (default: True)
														
 
															+        """
														
 
															+        # Convert path to string if it's a Path object
														
 
															+        output_path = (
														
 
															+            str(output_path) if isinstance(output_path, Path) else output_path
														
 
															+        )
														
 
															+
														
 
															+        # Prepare request data
														
 
															+        data: dict[str, Any] = {"include_header": include_header}
														
 
															+        if columns:
														
 
															+            data["columns"] = columns
														
 
															+        if filters:
														
 
															+            data["filters"] = filters
														
 
															+
														
 
															+        # Stream response directly to file
														
 
															+        async with aiofiles.open(output_path, "wb") as f:
														
 
															+            async with self.client.session.post(
														
 
															+                f"{self.client.base_url}/v3/documents/{str(id)}/entities/export",
														
 
															+                json=data,
														
 
															+                headers={
														
 
															+                    "Accept": "text/csv",
														
 
															+                    **self.client._get_auth_headers(),
														
 
															+                },
														
 
															+            ) as response:
														
 
															+                if response.status != 200:
														
 
															+                    raise ValueError(
														
 
															+                        f"Export failed with status {response.status}",
														
 
															+                        response,
														
 
															+                    )
														
 
															+
														
 
															+                async for chunk in response.content.iter_chunks():
														
 
															+                    if chunk:
														
 
															+                        await f.write(chunk[0])
														
 
															+
														
 
															+    async def export_relationships(
														
 
															+        self,
														
 
															+        id: str | UUID,
														
 
															+        output_path: str | Path,
														
 
															+        columns: Optional[list[str]] = None,
														
 
															+        filters: Optional[dict] = None,
														
 
															+        include_header: bool = True,
														
 
															+    ) -> None:
														
 
															+        """
														
 
															+        Export document relationships to a CSV file, streaming the results directly to disk.
														
 
															+
														
 
															+        Args:
														
 
															+            output_path (str | Path): Local path where the CSV file should be saved
														
 
															+            columns (Optional[list[str]]): Specific columns to export. If None, exports default columns
														
 
															+            filters (Optional[dict]): Optional filters to apply when selecting documents
														
 
															+            include_header (bool): Whether to include column headers in the CSV (default: True)
														
 
															+        """
														
 
															+        # Convert path to string if it's a Path object
														
 
															+        output_path = (
														
 
															+            str(output_path) if isinstance(output_path, Path) else output_path
														
 
															+        )
														
 
															+
														
 
															+        # Prepare request data
														
 
															+        data: dict[str, Any] = {"include_header": include_header}
														
 
															+        if columns:
														
 
															+            data["columns"] = columns
														
 
															+        if filters:
														
 
															+            data["filters"] = filters
														
 
															+
														
 
															+        # Stream response directly to file
														
 
															+        async with aiofiles.open(output_path, "wb") as f:
														
 
															+            async with self.client.session.post(
														
 
															+                f"{self.client.base_url}/v3/documents/{str(id)}/relationships/export",
														
 
															+                json=data,
														
 
															+                headers={
														
 
															+                    "Accept": "text/csv",
														
 
															+                    **self.client._get_auth_headers(),
														
 
															+                },
														
 
															+            ) as response:
														
 
															+                if response.status != 200:
														
 
															+                    raise ValueError(
														
 
															+                        f"Export failed with status {response.status}",
														
 
															+                        response,
														
 
															+                    )
														
 
															+
														
 
															+                async for chunk in response.content.iter_chunks():
														
 
															+                    if chunk:
														
 
															+                        await f.write(chunk[0])
														
 
															+
														
 
															     async def delete(
														
 
															         self,
														
 
															         id: str | UUID,
														
@@ -159,7 +352,7 @@ class DocumentsSDK:
 
															         Delete a specific document.
														
 
															         Args:
														
 
															-            id (Union[str, UUID]): ID of document to delete
														
 
															+            id (str | UUID): ID of document to delete
														
 
															         """
														
 
															         return await self.client._make_request(
														
 
															             "DELETE",
														
@@ -178,7 +371,7 @@ class DocumentsSDK:
 
															         Get chunks for a specific document.
														
 
															         Args:
														
 
															-            id (Union[str, UUID]): ID of document to retrieve chunks for
														
 
															+            id (str | UUID): ID of document to retrieve chunks for
														
 
															             include_vectors (Optional[bool]): Whether to include vector embeddings in the response
														
 
															             offset (int, optional): Specifies the number of objects to skip. Defaults to 0.
														
 
															             limit (int, optional): Specifies a limit on the number of objects to return, ranging between 1 and 100. Defaults to 100.
														
@@ -209,7 +402,7 @@ class DocumentsSDK:
 
															         List collections for a specific document.
														
 
															         Args:
														
 
															-            id (Union[str, UUID]): ID of document to retrieve collections for
														
 
															+            id (str | UUID): ID of document to retrieve collections for
														
 
															             offset (int, optional): Specifies the number of objects to skip. Defaults to 0.
														
 
															             limit (int, optional): Specifies a limit on the number of objects to return, ranging between 1 and 100. Defaults to 100.
														
@@ -259,7 +452,7 @@ class DocumentsSDK:
 
															         Extract entities and relationships from a document.
														
 
															         Args:
														
 
															-            id (Union[str, UUID]): ID of document to extract from
														
 
															+            id (str, UUID): ID of document to extract from
														
 
															             run_type (Optional[str]): Whether to return an estimate or run extraction
														
 
															             settings (Optional[dict]): Settings for extraction process
														
 
															             run_with_orchestration (Optional[bool]): Whether to run with orchestration
														
@@ -267,7 +460,7 @@ class DocumentsSDK:
 
															         Returns:
														
 
															             dict: Extraction results or cost estimate
														
 
															         """
														
 
															-        data = {}
														
 
															+        data: dict[str, Any] = {}
														
 
															         if run_type:
														
 
															             data["run_type"] = run_type
														
 
															         if settings:
														
@@ -293,7 +486,7 @@ class DocumentsSDK:
 
															         List entities extracted from a document.
														
 
															         Args:
														
 
															-            id (Union[str, UUID]): ID of document to get entities from
														
 
															+            id (str | UUID): ID of document to get entities from
														
 
															             offset (Optional[int]): Number of items to skip
														
 
															             limit (Optional[int]): Max number of items to return
														
 
															             include_embeddings (Optional[bool]): Whether to include embeddings
														
@@ -325,7 +518,7 @@ class DocumentsSDK:
 
															         List relationships extracted from a document.
														
 
															         Args:
														
 
															-            id (Union[str, UUID]): ID of document to get relationships from
														
 
															+            id (str | UUID): ID of document to get relationships from
														
 
															             offset (Optional[int]): Number of items to skip
														
 
															             limit (Optional[int]): Max number of items to return
														
 
															             entity_names (Optional[list[str]]): Filter by entity names
														
@@ -350,28 +543,6 @@ class DocumentsSDK:
 
															             version="v3",
														
 
															         )
														
 
															-    # async def extract(
														
 
															-    #     self,
														
 
															-    #     id: str | UUID,
														
 
															-    #     run_type: Optional[str] = None,
														
 
															-    #     run_with_orchestration: Optional[bool] = True,
														
 
															-    # ):
														
 
															-    #     data = {}
														
 
															-
														
 
															-    #     if run_type:
														
 
															-    #         data["run_type"] = run_type
														
 
															-    #     if run_with_orchestration is not None:
														
 
															-    #         data["run_with_orchestration"] = str(run_with_orchestration)
														
 
															-
														
 
															-    #     return await self.client._make_request(
														
 
															-    #         "POST",
														
 
															-    #         f"documents/{str(id)}/extract",
														
 
															-    #         params=data,
														
 
															-    #         version="v3",
														
 
															-    #     )
														
 
															-
														
 
															-    # Be sure to put at bottom of the page...
														
 
															-
														
 
															     async def list(
														
 
															         self,
														
 
															         ids: Optional[list[str | UUID]] = None,
														
@@ -382,7 +553,7 @@ class DocumentsSDK:
 
															         List documents with pagination.
														
 
															         Args:
														
 
															-            ids (Optional[list[Union[str, UUID]]]): Optional list of document IDs to filter by
														
 
															+            ids (Optional[list[str | UUID]]): Optional list of document IDs to filter by
														
 
															             offset (int, optional): Specifies the number of objects to skip. Defaults to 0.
														
 
															             limit (int, optional): Specifies a limit on the number of objects to return, ranging between 1 and 100. Defaults to 100.
														
@@ -424,7 +595,7 @@ class DocumentsSDK:
 
															         if search_settings and not isinstance(search_settings, dict):
														
 
															             search_settings = search_settings.model_dump()
														
 
															-        data = {
														
 
															+        data: dict[str, Any] = {
														
 
															             "query": query,
														
 
															             "search_settings": search_settings,
														
 
															         }
														
--- a/sdk/v3/graphs.py
+++ b/sdk/v3/graphs.py
@@ -108,7 +108,7 @@ class GraphsSDK:
 
															         Returns:
														
 
															             dict: Updated graph information
														
 
															         """
														
 
															-        data = {}
														
 
															+        data: dict[str, Any] = {}
														
 
															         if name is not None:
														
 
															             data["name"] = name
														
 
															         if description is not None:
														
@@ -290,7 +290,7 @@ class GraphsSDK:
 
															         Returns:
														
 
															             dict: Success message
														
 
															         """
														
 
															-        data = {
														
 
															+        data: dict[str, Any] = {
														
 
															             "run_type": run_type,
														
 
															             "run_with_orchestration": run_with_orchestration,
														
 
															         }
														
--- a/sdk/v3/indices.py
+++ b/sdk/v3/indices.py
@@ -1,5 +1,5 @@
 
															 import json
														
 
															-from typing import Optional
														
 
															+from typing import Any, Optional
														
 
															 from shared.api.models.base import WrappedGenericMessageResponse
														
 
															 from shared.api.models.ingestion.responses import (
														
@@ -20,13 +20,13 @@ class IndicesSDK:
 
															         Create a new vector similarity search index in the database.
														
 
															         Args:
														
 
															-            config (Union[dict, IndexConfig]): Configuration for the vector index.
														
 
															+            config (dict | IndexConfig): Configuration for the vector index.
														
 
															             run_with_orchestration (Optional[bool]): Whether to run index creation as an orchestrated task.
														
 
															         """
														
 
															         if not isinstance(config, dict):
														
 
															             config = config.model_dump()
														
 
															-        data = {
														
 
															+        data: dict[str, Any] = {
														
 
															             "config": config,
														
 
															             "run_with_orchestration": run_with_orchestration,
														
 
															         }
														
@@ -88,32 +88,6 @@ class IndicesSDK:
 
															             version="v3",
														
 
															         )
														
 
															-    # async def update_index(
														
 
															-    #     self,
														
 
															-    #     id: Union[str, UUID],
														
 
															-    #     config: dict,  # Union[dict, IndexConfig],
														
 
															-    #     run_with_orchestration: Optional[bool] = True,
														
 
															-    # ) -> dict:
														
 
															-    #     """
														
 
															-    #     Update an existing index's configuration.
														
 
															-
														
 
															-    #     Args:
														
 
															-    #         id (Union[str, UUID]): The ID of the index to update.
														
 
															-    #         config (Union[dict, IndexConfig]): The new configuration for the index.
														
 
															-    #         run_with_orchestration (Optional[bool]): Whether to run the update as an orchestrated task.
														
 
															-
														
 
															-    #     Returns:
														
 
															-    #         WrappedUpdateIndexResponse: The response containing the updated index details.
														
 
															-    #     """
														
 
															-    #     if not isinstance(config, dict):
														
 
															-    #         config = config.model_dump()
														
 
															-
														
 
															-    #     data = {
														
 
															-    #         "config": config,
														
 
															-    #         "run_with_orchestration": run_with_orchestration,
														
 
															-    #     }
														
 
															-    #     return await self.client._make_request("POST", f"indices/{id}", json=data)  # type: ignore
														
 
															-
														
 
															     async def delete(
														
 
															         self,
														
 
															         index_name: str,
														
--- a/sdk/v3/prompts.py
+++ b/sdk/v3/prompts.py
@@ -1,5 +1,5 @@
 
															 import json
														
 
															-from typing import Optional
														
 
															+from typing import Any, Optional
														
 
															 from shared.api.models.base import (
														
 
															     WrappedBooleanResponse,
														
@@ -27,7 +27,11 @@ class PromptsSDK:
 
															         Returns:
														
 
															             dict: Created prompt information
														
 
															         """
														
 
															-        data = {"name": name, "template": template, "input_types": input_types}
														
 
															+        data: dict[str, Any] = {
														
 
															+            "name": name,
														
 
															+            "template": template,
														
 
															+            "input_types": input_types,
														
 
															+        }
														
 
															         return await self.client._make_request(
														
 
															             "POST",
														
 
															             "prompts",
														
--- a/sdk/v3/retrieval.py
+++ b/sdk/v3/retrieval.py
@@ -1,4 +1,4 @@
 
															-from typing import AsyncGenerator, Optional
														
 
															+from typing import Any, AsyncGenerator, Optional
														
 
															 from ..models import (
														
 
															     CombinedSearchResponse,
														
@@ -41,7 +41,7 @@ class RetrievalSDK:
 
															         if search_settings and not isinstance(search_settings, dict):
														
 
															             search_settings = search_settings.model_dump()
														
 
															-        data = {
														
 
															+        data: dict[str, Any] = {
														
 
															             "query": query,
														
 
															             "search_settings": search_settings,
														
 
															         }
														
@@ -68,7 +68,7 @@ class RetrievalSDK:
 
															         if generation_config and not isinstance(generation_config, dict):
														
 
															             generation_config = generation_config.model_dump()
														
 
															-        data = {
														
 
															+        data: dict[str, Any] = {
														
 
															             "messages": [msg.model_dump() for msg in cast_messages],
														
 
															             "generation_config": generation_config,
														
 
															         }
														
@@ -83,7 +83,7 @@ class RetrievalSDK:
 
															         self,
														
 
															         text: str,
														
 
															     ):
														
 
															-        data = {
														
 
															+        data: dict[str, Any] = {
														
 
															             "text": text,
														
 
															         }
														
@@ -123,7 +123,7 @@ class RetrievalSDK:
 
															         if search_settings and not isinstance(search_settings, dict):
														
 
															             search_settings = search_settings.model_dump()
														
 
															-        data = {
														
 
															+        data: dict[str, Any] = {
														
 
															             "query": query,
														
 
															             "rag_generation_config": rag_generation_config,
														
 
															             "search_settings": search_settings,
														
@@ -179,7 +179,7 @@ class RetrievalSDK:
 
															         if search_settings and not isinstance(search_settings, dict):
														
 
															             search_settings = search_settings.model_dump()
														
 
															-        data = {
														
 
															+        data: dict[str, Any] = {
														
 
															             "rag_generation_config": rag_generation_config or {},
														
 
															             "search_settings": search_settings,
														
 
															             "task_prompt_override": task_prompt_override,
														
--- a/sdk/v3/users.py
+++ b/sdk/v3/users.py
@@ -1,7 +1,6 @@
 
															 from __future__ import annotations  # for Python 3.10+
														
 
															-import json
														
 
															-from typing import Optional
														
 
															+from typing import Any, Optional
														
 
															 from uuid import UUID
														
 
															 from shared.api.models.auth.responses import WrappedTokenResponse
														
@@ -72,7 +71,7 @@ class UsersSDK:
 
															         Returns:
														
 
															             UserResponse: New user information
														
 
															         """
														
 
															-        data = {"email": email, "password": password}
														
 
															+        data: dict[str, Any] = {"email": email, "password": password}
														
 
															         return await self.client._make_request(
														
 
															             "POST",
														
 
															             "users/register",
														
@@ -94,7 +93,7 @@ class UsersSDK:
 
															         Returns:
														
 
															             dict: Deletion result
														
 
															         """
														
 
															-        data = {"password": password}
														
 
															+        data: dict[str, Any] = {"password": password}
														
 
															         response = await self.client._make_request(
														
 
															             "DELETE",
														
 
															             f"users/{str(id)}",
														
@@ -118,7 +117,10 @@ class UsersSDK:
 
															         Returns:
														
 
															             dict: Verification result
														
 
															         """
														
 
															-        data = {"email": email, "verification_code": verification_code}
														
 
															+        data: dict[str, Any] = {
														
 
															+            "email": email,
														
 
															+            "verification_code": verification_code,
														
 
															+        }
														
 
															         return await self.client._make_request(
														
 
															             "POST",
														
 
															             "users/verify-email",
														
@@ -141,7 +143,7 @@ class UsersSDK:
 
															             raise ValueError(
														
 
															                 "Cannot log in after setting an API key, please unset your R2R_API_KEY variable or call client.set_api_key(None)"
														
 
															             )
														
 
															-        data = {"username": email, "password": password}
														
 
															+        data: dict[str, Any] = {"username": email, "password": password}
														
 
															         response = await self.client._make_request(
														
 
															             "POST",
														
 
															             "users/login",
														
@@ -227,7 +229,7 @@ class UsersSDK:
 
															         Returns:
														
 
															             dict: Change password result
														
 
															         """
														
 
															-        data = {
														
 
															+        data: dict[str, Any] = {
														
 
															             "current_password": current_password,
														
 
															             "new_password": new_password,
														
 
															         }
														
@@ -270,7 +272,10 @@ class UsersSDK:
 
															         Returns:
														
 
															             dict: Password reset result
														
 
															         """
														
 
															-        data = {"reset_token": reset_token, "new_password": new_password}
														
 
															+        data: dict[str, Any] = {
														
 
															+            "reset_token": reset_token,
														
 
															+            "new_password": new_password,
														
 
															+        }
														
 
															         return await self.client._make_request(
														
 
															             "POST",
														
 
															             "users/reset-password",
														
--- a/shared/abstractions/__init__.py
+++ b/shared/abstractions/__init__.py
@@ -18,15 +18,11 @@ from .exception import (
 
															     R2RDocumentProcessingError,
														
 
															     R2RException,
														
 
															 )
														
 
															-from .graph import Community, Entity, KGExtraction, Relationship
														
 
															+from .graph import Community, Entity, KGExtraction, Relationship, StoreType
														
 
															 from .kg import (
														
 
															-    GraphBuildSettings,
														
 
															     GraphCommunitySettings,
														
 
															-    GraphEntitySettings,
														
 
															-    GraphRelationshipSettings,
														
 
															     KGCreationSettings,
														
 
															     KGEnrichmentSettings,
														
 
															-    KGEntityDeduplicationSettings,
														
 
															     KGRunType,
														
 
															 )
														
 
															 from .llm import (
														
@@ -47,7 +43,6 @@ from .search import (
 
															     HybridSearchSettings,
														
 
															     KGCommunityResult,
														
 
															     KGEntityResult,
														
 
															-    KGGlobalResult,
														
 
															     KGRelationshipResult,
														
 
															     KGSearchResultType,
														
 
															     SearchMode,
														
@@ -99,6 +94,7 @@ __all__ = [
 
															     "Community",
														
 
															     "KGExtraction",
														
 
															     "Relationship",
														
 
															+    "StoreType",
														
 
															     # LLM abstractions
														
 
															     "GenerationConfig",
														
 
															     "LLMChatCompletion",
														
@@ -114,7 +110,6 @@ __all__ = [
 
															     "KGEntityResult",
														
 
															     "KGRelationshipResult",
														
 
															     "KGCommunityResult",
														
 
															-    "KGGlobalResult",
														
 
															     "GraphSearchSettings",
														
 
															     "ChunkSearchSettings",
														
 
															     "ChunkSearchResult",
														
@@ -127,8 +122,6 @@ __all__ = [
 
															     "KGEnrichmentSettings",
														
 
															     "KGExtraction",
														
 
															     "KGRunType",
														
 
															-    "GraphEntitySettings",
														
 
															-    "GraphRelationshipSettings",
														
 
															     "GraphCommunitySettings",
														
 
															     # User abstractions
														
 
															     "Token",
														
--- a/shared/abstractions/base.py
+++ b/shared/abstractions/base.py
@@ -2,7 +2,7 @@ import asyncio
 
															 import json
														
 
															 from datetime import datetime
														
 
															 from enum import Enum
														
 
															-from typing import Any, Type, TypeVar, Union
														
 
															+from typing import Any, Type, TypeVar
														
 
															 from uuid import UUID
														
 
															 from pydantic import BaseModel
														
@@ -12,10 +12,15 @@ T = TypeVar("T", bound="R2RSerializable")
 
															 class R2RSerializable(BaseModel):
														
 
															     @classmethod
														
 
															-    def from_dict(cls: Type[T], data: Union[dict[str, Any], str]) -> T:
														
 
															+    def from_dict(cls: Type[T], data: dict[str, Any] | str) -> T:
														
 
															         if isinstance(data, str):
														
 
															-            data = json.loads(data)
														
 
															-        return cls(**data)
														
 
															+            try:
														
 
															+                data_dict = json.loads(data)
														
 
															+            except json.JSONDecodeError as e:
														
 
															+                raise ValueError(f"Invalid JSON string: {e}") from e
														
 
															+        else:
														
 
															+            data_dict = data
														
 
															+        return cls(**data_dict)
														
 
															     def to_dict(self) -> dict[str, Any]:
														
 
															         data = self.model_dump(exclude_unset=True)
														
--- a/shared/abstractions/document.py
+++ b/shared/abstractions/document.py
@@ -282,7 +282,7 @@ class ChunkEnrichmentSettings(R2RSerializable):
 
															     )
														
 
															     strategies: list[ChunkEnrichmentStrategy] = Field(
														
 
															         default=[],
														
 
															-        description="The strategies to use for chunk enrichment. Union of chunks obtained from each strategy is used as context.",
														
 
															+        description="The strategies to use for chunk enrichment. List of chunks obtained from each strategy is used as context.",
														
 
															     )
														
 
															     forward_chunks: int = Field(
														
 
															         default=3,
														
--- a/shared/abstractions/graph.py
+++ b/shared/abstractions/graph.py
@@ -1,6 +1,7 @@
 
															 import json
														
 
															 from dataclasses import dataclass
														
 
															 from datetime import datetime
														
 
															+from enum import Enum
														
 
															 from typing import Any, Optional
														
 
															 from uuid import UUID
														
@@ -134,3 +135,8 @@ class Graph(R2RSerializable):
 
															     def __init__(self, **kwargs):
														
 
															         super().__init__(**kwargs)
														
 
															+
														
 
															+
														
 
															+class StoreType(str, Enum):
														
 
															+    GRAPHS = "graphs"
														
 
															+    DOCUMENTS = "documents"
														
--- a/shared/abstractions/ingestion.py
+++ b/shared/abstractions/ingestion.py
@@ -27,7 +27,7 @@ class ChunkEnrichmentSettings(R2RSerializable):
 
															     )
														
 
															     strategies: list[ChunkEnrichmentStrategy] = Field(
														
 
															         default=[],
														
 
															-        description="The strategies to use for chunk enrichment. Union of chunks obtained from each strategy is used as context.",
														
 
															+        description="The strategies to use for chunk enrichment.",
														
 
															     )
														
 
															     forward_chunks: int = Field(
														
 
															         default=3,
														
--- a/shared/abstractions/kg.py
+++ b/shared/abstractions/kg.py
@@ -19,17 +19,6 @@ class KGRunType(str, Enum):
 
															 GraphRunType = KGRunType
														
 
															-class KGEntityDeduplicationType(str, Enum):
														
 
															-    """Type of KG entity deduplication."""
														
 
															-
														
 
															-    BY_NAME = "by_name"
														
 
															-    BY_DESCRIPTION = "by_description"
														
 
															-    BY_LLM = "by_llm"
														
 
															-
														
 
															-    def __str__(self):
														
 
															-        return self.value
														
 
															-
														
 
															-
														
 
															 class KGCreationSettings(R2RSerializable):
														
 
															     """Settings for knowledge graph creation."""
														
@@ -81,30 +70,6 @@ class KGCreationSettings(R2RSerializable):
 
															     )
														
 
															-class KGEntityDeduplicationSettings(R2RSerializable):
														
 
															-    """Settings for knowledge graph entity deduplication."""
														
 
															-
														
 
															-    graph_entity_deduplication_type: KGEntityDeduplicationType = Field(
														
 
															-        default=KGEntityDeduplicationType.BY_NAME,
														
 
															-        description="The type of entity deduplication to use.",
														
 
															-    )
														
 
															-
														
 
															-    max_description_input_length: int = Field(
														
 
															-        default=65536,
														
 
															-        description="The maximum length of the description for a node in the graph.",
														
 
															-    )
														
 
															-
														
 
															-    graph_entity_deduplication_prompt: str = Field(
														
 
															-        default="graphrag_entity_deduplication",
														
 
															-        description="The prompt to use for knowledge graph entity deduplication.",
														
 
															-    )
														
 
															-
														
 
															-    generation_config: GenerationConfig = Field(
														
 
															-        default_factory=GenerationConfig,
														
 
															-        description="Configuration for text generation during graph entity deduplication.",
														
 
															-    )
														
 
															-
														
 
															-
														
 
															 class KGEnrichmentSettings(R2RSerializable):
														
 
															     """Settings for knowledge graph enrichment."""
														
@@ -135,36 +100,6 @@ class KGEnrichmentSettings(R2RSerializable):
 
															     )
														
 
															-class GraphEntitySettings(R2RSerializable):
														
 
															-    """Settings for knowledge graph entity creation."""
														
 
															-
														
 
															-    graph_entity_deduplication_type: KGEntityDeduplicationType = Field(
														
 
															-        default=KGEntityDeduplicationType.BY_NAME,
														
 
															-        description="The type of entity deduplication to use.",
														
 
															-    )
														
 
															-
														
 
															-    max_description_input_length: int = Field(
														
 
															-        default=65536,
														
 
															-        description="The maximum length of the description for a node in the graph.",
														
 
															-    )
														
 
															-
														
 
															-    graph_entity_deduplication_prompt: str = Field(
														
 
															-        default="graphrag_entity_deduplication",
														
 
															-        description="The prompt to use for knowledge graph entity deduplication.",
														
 
															-    )
														
 
															-
														
 
															-    generation_config: GenerationConfig = Field(
														
 
															-        default_factory=GenerationConfig,
														
 
															-        description="Configuration for text generation during graph entity deduplication.",
														
 
															-    )
														
 
															-
														
 
															-
														
 
															-class GraphRelationshipSettings(R2RSerializable):
														
 
															-    """Settings for knowledge graph relationship creation."""
														
 
															-
														
 
															-    pass
														
 
															-
														
 
															-
														
 
															 class GraphCommunitySettings(R2RSerializable):
														
 
															     """Settings for knowledge graph community enrichment."""
														
@@ -192,22 +127,3 @@ class GraphCommunitySettings(R2RSerializable):
 
															         default_factory=dict,
														
 
															         description="Parameters for the Leiden algorithm.",
														
 
															     )
														
 
															-
														
 
															-
														
 
															-class GraphBuildSettings(R2RSerializable):
														
 
															-    """Settings for knowledge graph build."""
														
 
															-
														
 
															-    entity_settings: GraphEntitySettings = Field(
														
 
															-        default=GraphEntitySettings(),
														
 
															-        description="Settings for knowledge graph entity creation.",
														
 
															-    )
														
 
															-
														
 
															-    relationship_settings: GraphRelationshipSettings = Field(
														
 
															-        default=GraphRelationshipSettings(),
														
 
															-        description="Settings for knowledge graph relationship creation.",
														
 
															-    )
														
 
															-
														
 
															-    community_settings: GraphCommunitySettings = Field(
														
 
															-        default=GraphCommunitySettings(),
														
 
															-        description="Settings for knowledge graph community enrichment.",
														
 
															-    )
														
--- a/shared/abstractions/search.py
+++ b/shared/abstractions/search.py
@@ -113,26 +113,8 @@ class KGCommunityResult(R2RSerializable):
 
															         }
														
 
															-class KGGlobalResult(R2RSerializable):
														
 
															-    name: str
														
 
															-    description: str
														
 
															-    metadata: Optional[dict[str, Any]] = None
														
 
															-
														
 
															-    class Config:
														
 
															-        json_schema_extra = {
														
 
															-            "name": "Global Result Name",
														
 
															-            "description": "Global Result Description",
														
 
															-            "metadata": {},
														
 
															-        }
														
 
															-
														
 
															-
														
 
															 class GraphSearchResult(R2RSerializable):
														
 
															-    content: (
														
 
															-        KGEntityResult
														
 
															-        | KGRelationshipResult
														
 
															-        | KGCommunityResult
														
 
															-        | KGGlobalResult
														
 
															-    )
														
 
															+    content: KGEntityResult | KGRelationshipResult | KGCommunityResult
														
 
															     result_type: Optional[KGSearchResultType] = None
														
 
															     chunk_ids: Optional[list[UUID]] = None
														
 
															     metadata: dict[str, Any] = {}
														
--- a/shared/utils/base_utils.py
+++ b/shared/utils/base_utils.py
@@ -17,7 +17,6 @@ from ..abstractions.search import (
 
															     AggregateSearchResult,
														
 
															     KGCommunityResult,
														
 
															     KGEntityResult,
														
 
															-    KGGlobalResult,
														
 
															     KGRelationshipResult,
														
 
															 )
														
 
															 from ..abstractions.vector import VectorQuantizationType
														
--- a/shared/utils/splitter/text.py
+++ b/shared/utils/splitter/text.py
@@ -37,9 +37,7 @@ from typing import (
 
															     Any,
														
 
															     Callable,
														
 
															     Collection,
														
 
															-    Dict,
														
 
															     Iterable,
														
 
															-    List,
														
 
															     Literal,
														
 
															     Optional,
														
 
															     Sequence,
														
@@ -47,7 +45,6 @@ from typing import (
 
															     Type,
														
 
															     TypedDict,
														
 
															     TypeVar,
														
 
															-    Union,
														
 
															     cast,
														
 
															 )
														
@@ -64,16 +61,16 @@ class BaseSerialized(TypedDict):
 
															     """Base class for serialized objects."""
														
 
															     lc: int
														
 
															-    id: List[str]
														
 
															+    id: list[str]
														
 
															     name: NotRequired[str]
														
 
															-    graph: NotRequired[Dict[str, Any]]
														
 
															+    graph: NotRequired[dict[str, Any]]
														
 
															 class SerializedConstructor(BaseSerialized):
														
 
															     """Serialized constructor."""
														
 
															     type: Literal["constructor"]
														
 
															-    kwargs: Dict[str, Any]
														
 
															+    kwargs: dict[str, Any]
														
 
															 class SerializedSecret(BaseSerialized):
														
@@ -115,7 +112,7 @@ class Serializable(BaseModel, ABC):
 
															         return False
														
 
															     @classmethod
														
 
															-    def get_lc_namespace(cls) -> List[str]:
														
 
															+    def get_lc_namespace(cls) -> list[str]:
														
 
															         """Get the namespace of the langchain object.
														
 
															         For example, if the class is `langchain.llms.openai.OpenAI`, then the
														
@@ -124,16 +121,16 @@ class Serializable(BaseModel, ABC):
 
															         return cls.__module__.split(".")
														
 
															     @property
														
 
															-    def lc_secrets(self) -> Dict[str, str]:
														
 
															+    def lc_secrets(self) -> dict[str, str]:
														
 
															         """A map of constructor argument names to secret ids.
														
 
															         For example,
														
 
															             {"openai_api_key": "OPENAI_API_KEY"}
														
 
															         """
														
 
															-        return dict()
														
 
															+        return {}
														
 
															     @property
														
 
															-    def lc_attributes(self) -> Dict:
														
 
															+    def lc_attributes(self) -> dict:
														
 
															         """List of attribute names that should be included in the serialized kwargs.
														
 
															         These attributes must be accepted by the constructor.
														
@@ -141,7 +138,7 @@ class Serializable(BaseModel, ABC):
 
															         return {}
														
 
															     @classmethod
														
 
															-    def lc_id(cls) -> List[str]:
														
 
															+    def lc_id(cls) -> list[str]:
														
 
															         """A unique identifier for this class for serialization purposes.
														
 
															         The unique identifier is a list of strings that describes the path
														
@@ -159,7 +156,7 @@ class Serializable(BaseModel, ABC):
 
															             if (k not in self.__fields__ or try_neq_default(v, k, self))
														
 
															         ]
														
 
															-    _lc_kwargs = PrivateAttr(default_factory=dict)
														
 
															+    _lc_kwargs: dict[str, Any] = PrivateAttr(default_factory=dict)
														
 
															     def __init__(self, **kwargs: Any) -> None:
														
 
															         super().__init__(**kwargs)
														
@@ -167,7 +164,7 @@ class Serializable(BaseModel, ABC):
 
															     def to_json(
														
 
															         self,
														
 
															-    ) -> Union[SerializedConstructor, SerializedNotImplemented]:
														
 
															+    ) -> SerializedConstructor | SerializedNotImplemented:
														
 
															         if not self.is_lc_serializable():
														
 
															             return self.to_json_not_implemented()
														
@@ -238,8 +235,8 @@ class Serializable(BaseModel, ABC):
 
															 def _replace_secrets(
														
 
															-    root: Dict[Any, Any], secrets_map: Dict[str, str]
														
 
															-) -> Dict[Any, Any]:
														
 
															+    root: dict[Any, Any], secrets_map: dict[str, str]
														
 
															+) -> dict[Any, Any]:
														
 
															     result = root.copy()
														
 
															     for path, secret_id in secrets_map.items():
														
 
															         [*parts, last] = path.split(".")
														
@@ -267,7 +264,7 @@ def to_json_not_implemented(obj: object) -> SerializedNotImplemented:
 
															     Returns:
														
 
															         SerializedNotImplemented
														
 
															     """
														
 
															-    _id: List[str] = []
														
 
															+    _id: list[str] = []
														
 
															     try:
														
 
															         if hasattr(obj, "__name__"):
														
 
															             _id = [*obj.__module__.split("."), obj.__name__]
														
@@ -313,7 +310,7 @@ class SplitterDocument(Serializable):
 
															         return True
														
 
															     @classmethod
														
 
															-    def get_lc_namespace(cls) -> List[str]:
														
 
															+    def get_lc_namespace(cls) -> list[str]:
														
 
															         """Get the namespace of the langchain object."""
														
 
															         return ["langchain", "schema", "document"]
														
@@ -406,7 +403,7 @@ def _make_spacy_pipe_for_splitting(
 
															 def _split_text_with_regex(
														
 
															     text: str, separator: str, keep_separator: bool
														
 
															-) -> List[str]:
														
 
															+) -> list[str]:
														
 
															     # Now that we have the separator, split the text
														
 
															     if separator:
														
 
															         if keep_separator:
														
@@ -461,12 +458,12 @@ class TextSplitter(BaseDocumentTransformer, ABC):
 
															         self._strip_whitespace = strip_whitespace
														
 
															     @abstractmethod
														
 
															-    def split_text(self, text: str) -> List[str]:
														
 
															+    def split_text(self, text: str) -> list[str]:
														
 
															         """Split text into multiple components."""
														
 
															     def create_documents(
														
 
															-        self, texts: List[str], metadatas: Optional[List[dict]] = None
														
 
															-    ) -> List[SplitterDocument]:
														
 
															+        self, texts: list[str], metadatas: Optional[list[dict]] = None
														
 
															+    ) -> list[SplitterDocument]:
														
 
															         """Create documents from a list of texts."""
														
 
															         _metadatas = metadatas or [{}] * len(texts)
														
 
															         documents = []
														
@@ -488,7 +485,7 @@ class TextSplitter(BaseDocumentTransformer, ABC):
 
															     def split_documents(
														
 
															         self, documents: Iterable[SplitterDocument]
														
 
															-    ) -> List[SplitterDocument]:
														
 
															+    ) -> list[SplitterDocument]:
														
 
															         """Split documents."""
														
 
															         texts, metadatas = [], []
														
 
															         for doc in documents:
														
@@ -496,7 +493,7 @@ class TextSplitter(BaseDocumentTransformer, ABC):
 
															             metadatas.append(doc.metadata)
														
 
															         return self.create_documents(texts, metadatas=metadatas)
														
 
															-    def _join_docs(self, docs: List[str], separator: str) -> Optional[str]:
														
 
															+    def _join_docs(self, docs: list[str], separator: str) -> Optional[str]:
														
 
															         text = separator.join(docs)
														
 
															         if self._strip_whitespace:
														
 
															             text = text.strip()
														
@@ -507,13 +504,13 @@ class TextSplitter(BaseDocumentTransformer, ABC):
 
															     def _merge_splits(
														
 
															         self, splits: Iterable[str], separator: str
														
 
															-    ) -> List[str]:
														
 
															+    ) -> list[str]:
														
 
															         # We now want to combine these smaller pieces into medium size
														
 
															         # chunks to send to the LLM.
														
 
															         separator_len = self._length_function(separator)
														
 
															         docs = []
														
 
															-        current_doc: List[str] = []
														
 
															+        current_doc: list[str] = []
														
 
															         total = 0
														
 
															         for d in splits:
														
 
															             _len = self._length_function(d)
														
@@ -579,8 +576,8 @@ class TextSplitter(BaseDocumentTransformer, ABC):
 
															         cls: Type[TS],
														
 
															         encoding_name: str = "gpt2",
														
 
															         model: Optional[str] = None,
														
 
															-        allowed_special: Union[Literal["all"], AbstractSet[str]] = set(),
														
 
															-        disallowed_special: Union[Literal["all"], Collection[str]] = "all",
														
 
															+        allowed_special: Literal["all"] | AbstractSet[str] = set(),
														
 
															+        disallowed_special: Literal["all"] | Collection[str] = "all",
														
 
															         **kwargs: Any,
														
 
															     ) -> TS:
														
 
															         """Text splitter that uses tiktoken encoder to count length."""
														
@@ -641,7 +638,7 @@ class CharacterTextSplitter(TextSplitter):
 
															         self._separator = separator
														
 
															         self._is_separator_regex = is_separator_regex
														
 
															-    def split_text(self, text: str) -> List[str]:
														
 
															+    def split_text(self, text: str) -> list[str]:
														
 
															         """Split incoming text and return chunks."""
														
 
															         # First we naively split the large input into a bunch of smaller ones.
														
 
															         separator = (
														
@@ -657,7 +654,7 @@ class CharacterTextSplitter(TextSplitter):
 
															 class LineType(TypedDict):
														
 
															     """Line type as typed dict."""
														
 
															-    metadata: Dict[str, str]
														
 
															+    metadata: dict[str, str]
														
 
															     content: str
														
@@ -674,7 +671,7 @@ class MarkdownHeaderTextSplitter:
 
															     def __init__(
														
 
															         self,
														
 
															-        headers_to_split_on: List[Tuple[str, str]],
														
 
															+        headers_to_split_on: list[Tuple[str, str]],
														
 
															         return_each_line: bool = False,
														
 
															         strip_headers: bool = True,
														
 
															     ):
														
@@ -696,13 +693,13 @@ class MarkdownHeaderTextSplitter:
 
															         self.strip_headers = strip_headers
														
 
															     def aggregate_lines_to_chunks(
														
 
															-        self, lines: List[LineType]
														
 
															-    ) -> List[SplitterDocument]:
														
 
															+        self, lines: list[LineType]
														
 
															+    ) -> list[SplitterDocument]:
														
 
															         """Combine lines with common metadata into chunks
														
 
															         Args:
														
 
															             lines: Line of text / associated header metadata
														
 
															         """
														
 
															-        aggregated_chunks: List[LineType] = []
														
 
															+        aggregated_chunks: list[LineType] = []
														
 
															         for line in lines:
														
 
															             if (
														
@@ -742,7 +739,7 @@ class MarkdownHeaderTextSplitter:
 
															             for chunk in aggregated_chunks
														
 
															         ]
														
 
															-    def split_text(self, text: str) -> List[SplitterDocument]:
														
 
															+    def split_text(self, text: str) -> list[SplitterDocument]:
														
 
															         """Split markdown file
														
 
															         Args:
														
 
															             text: Markdown file"""
														
@@ -750,14 +747,14 @@ class MarkdownHeaderTextSplitter:
 
															         # Split the input text by newline character ("\n").
														
 
															         lines = text.split("\n")
														
 
															         # Final output
														
 
															-        lines_with_metadata: List[LineType] = []
														
 
															+        lines_with_metadata: list[LineType] = []
														
 
															         # Content and metadata of the chunk currently being processed
														
 
															-        current_content: List[str] = []
														
 
															-        current_metadata: Dict[str, str] = {}
														
 
															+        current_content: list[str] = []
														
 
															+        current_metadata: dict[str, str] = {}
														
 
															         # Keep track of the nested header structure
														
 
															-        # header_stack: List[Dict[str, Union[int, str]]] = []
														
 
															-        header_stack: List[HeaderType] = []
														
 
															-        initial_metadata: Dict[str, str] = {}
														
 
															+        # header_stack: list[dict[str, int | str]] = []
														
 
															+        header_stack: list[HeaderType] = []
														
 
															+        initial_metadata: dict[str, str] = {}
														
 
															         in_code_block = False
														
 
															         opening_fence = ""
														
@@ -879,7 +876,7 @@ class ElementType(TypedDict):
 
															     url: str
														
 
															     xpath: str
														
 
															     content: str
														
 
															-    metadata: Dict[str, str]
														
 
															+    metadata: dict[str, str]
														
 
															 class HTMLHeaderTextSplitter:
														
@@ -890,7 +887,7 @@ class HTMLHeaderTextSplitter:
 
															     def __init__(
														
 
															         self,
														
 
															-        headers_to_split_on: List[Tuple[str, str]],
														
 
															+        headers_to_split_on: list[Tuple[str, str]],
														
 
															         return_each_element: bool = False,
														
 
															     ):
														
 
															         """Create a new HTMLHeaderTextSplitter.
														
@@ -906,14 +903,14 @@ class HTMLHeaderTextSplitter:
 
															         self.headers_to_split_on = sorted(headers_to_split_on)
														
 
															     def aggregate_elements_to_chunks(
														
 
															-        self, elements: List[ElementType]
														
 
															-    ) -> List[SplitterDocument]:
														
 
															+        self, elements: list[ElementType]
														
 
															+    ) -> list[SplitterDocument]:
														
 
															         """Combine elements with common metadata into chunks
														
 
															         Args:
														
 
															             elements: HTML element content with associated identifying info and metadata
														
 
															         """
														
 
															-        aggregated_chunks: List[ElementType] = []
														
 
															+        aggregated_chunks: list[ElementType] = []
														
 
															         for element in elements:
														
 
															             if (
														
@@ -935,7 +932,7 @@ class HTMLHeaderTextSplitter:
 
															             for chunk in aggregated_chunks
														
 
															         ]
														
 
															-    def split_text_from_url(self, url: str) -> List[SplitterDocument]:
														
 
															+    def split_text_from_url(self, url: str) -> list[SplitterDocument]:
														
 
															         """Split HTML from web URL
														
 
															         Args:
														
@@ -944,7 +941,7 @@ class HTMLHeaderTextSplitter:
 
															         r = requests.get(url)
														
 
															         return self.split_text_from_file(BytesIO(r.content))
														
 
															-    def split_text(self, text: str) -> List[SplitterDocument]:
														
 
															+    def split_text(self, text: str) -> list[SplitterDocument]:
														
 
															         """Split HTML text string
														
 
															         Args:
														
@@ -952,7 +949,7 @@ class HTMLHeaderTextSplitter:
 
															         """
														
 
															         return self.split_text_from_file(StringIO(text))
														
 
															-    def split_text_from_file(self, file: Any) -> List[SplitterDocument]:
														
 
															+    def split_text_from_file(self, file: Any) -> list[SplitterDocument]:
														
 
															         """Split HTML file
														
 
															         Args:
														
@@ -1048,15 +1045,15 @@ class Tokenizer:
 
															     """Overlap in tokens between chunks"""
														
 
															     tokens_per_chunk: int
														
 
															     """Maximum number of tokens per chunk"""
														
 
															-    decode: Callable[[List[int]], str]
														
 
															+    decode: Callable[[list[int]], str]
														
 
															     """ Function to decode a list of token ids to a string"""
														
 
															-    encode: Callable[[str], List[int]]
														
 
															+    encode: Callable[[str], list[int]]
														
 
															     """ Function to encode a string to a list of token ids"""
														
 
															-def split_text_on_tokens(*, text: str, tokenizer: Tokenizer) -> List[str]:
														
 
															+def split_text_on_tokens(*, text: str, tokenizer: Tokenizer) -> list[str]:
														
 
															     """Split incoming text and return chunks using tokenizer."""
														
 
															-    splits: List[str] = []
														
 
															+    splits: list[str] = []
														
 
															     input_ids = tokenizer.encode(text)
														
 
															     start_idx = 0
														
 
															     cur_idx = min(start_idx + tokenizer.tokens_per_chunk, len(input_ids))
														
@@ -1078,8 +1075,8 @@ class TokenTextSplitter(TextSplitter):
 
															         self,
														
 
															         encoding_name: str = "gpt2",
														
 
															         model: Optional[str] = None,
														
 
															-        allowed_special: Union[Literal["all"], AbstractSet[str]] = set(),
														
 
															-        disallowed_special: Union[Literal["all"], Collection[str]] = "all",
														
 
															+        allowed_special: Literal["all"] | AbstractSet[str] = set(),
														
 
															+        disallowed_special: Literal["all"] | Collection[str] = "all",
														
 
															         **kwargs: Any,
														
 
															     ) -> None:
														
 
															         """Create a new TextSplitter."""
														
@@ -1101,8 +1098,8 @@ class TokenTextSplitter(TextSplitter):
 
															         self._allowed_special = allowed_special
														
 
															         self._disallowed_special = disallowed_special
														
 
															-    def split_text(self, text: str) -> List[str]:
														
 
															-        def _encode(_text: str) -> List[int]:
														
 
															+    def split_text(self, text: str) -> list[str]:
														
 
															+        def _encode(_text: str) -> list[int]:
														
 
															             return self._tokenizer.encode(
														
 
															                 _text,
														
 
															                 allowed_special=self._allowed_special,
														
@@ -1164,8 +1161,8 @@ class SentenceTransformersTokenTextSplitter(TextSplitter):
 
															                 f" > maximum token limit."
														
 
															             )
														
 
															-    def split_text(self, text: str) -> List[str]:
														
 
															-        def encode_strip_start_and_stop_token_ids(text: str) -> List[int]:
														
 
															+    def split_text(self, text: str) -> list[str]:
														
 
															+        def encode_strip_start_and_stop_token_ids(text: str) -> list[int]:
														
 
															             return self._encode(text)[1:-1]
														
 
															         tokenizer = Tokenizer(
														
@@ -1182,7 +1179,7 @@ class SentenceTransformersTokenTextSplitter(TextSplitter):
 
															     _max_length_equal_32_bit_integer: int = 2**32
														
 
															-    def _encode(self, text: str) -> List[int]:
														
 
															+    def _encode(self, text: str) -> list[int]:
														
 
															         token_ids_with_start_and_end_token_ids = self.tokenizer.encode(
														
 
															             text,
														
 
															             max_length=self._max_length_equal_32_bit_integer,
														
@@ -1228,7 +1225,7 @@ class RecursiveCharacterTextSplitter(TextSplitter):
 
															     def __init__(
														
 
															         self,
														
 
															-        separators: Optional[List[str]] = None,
														
 
															+        separators: Optional[list[str]] = None,
														
 
															         keep_separator: bool = True,
														
 
															         is_separator_regex: bool = False,
														
 
															         chunk_size: int = 4000,
														
@@ -1247,7 +1244,7 @@ class RecursiveCharacterTextSplitter(TextSplitter):
 
															         self.chunk_size = chunk_size
														
 
															         self.chunk_overlap = chunk_overlap
														
 
															-    def _split_text(self, text: str, separators: List[str]) -> List[str]:
														
 
															+    def _split_text(self, text: str, separators: list[str]) -> list[str]:
														
 
															         """Split incoming text and return chunks."""
														
 
															         final_chunks = []
														
 
															         # Get appropriate separator to use
														
@@ -1289,7 +1286,7 @@ class RecursiveCharacterTextSplitter(TextSplitter):
 
															             final_chunks.extend(merged_text)
														
 
															         return final_chunks
														
 
															-    def split_text(self, text: str) -> List[str]:
														
 
															+    def split_text(self, text: str) -> list[str]:
														
 
															         return self._split_text(text, self._separators)
														
 
															     @classmethod
														
@@ -1300,7 +1297,7 @@ class RecursiveCharacterTextSplitter(TextSplitter):
 
															         return cls(separators=separators, is_separator_regex=True, **kwargs)
														
 
															     @staticmethod
														
 
															-    def get_separators_for_language(language: Language) -> List[str]:
														
 
															+    def get_separators_for_language(language: Language) -> list[str]:
														
 
															         if language == Language.CPP:
														
 
															             return [
														
 
															                 # Split along class definitions
														
@@ -1781,7 +1778,7 @@ class NLTKTextSplitter(TextSplitter):
 
															         self._separator = separator
														
 
															         self._language = language
														
 
															-    def split_text(self, text: str) -> List[str]:
														
 
															+    def split_text(self, text: str) -> list[str]:
														
 
															         """Split incoming text and return chunks."""
														
 
															         # First we naively split the large input into a bunch of smaller ones.
														
 
															         splits = self._tokenizer(text, language=self._language)
														
@@ -1812,7 +1809,7 @@ class SpacyTextSplitter(TextSplitter):
 
															         )
														
 
															         self._separator = separator
														
 
															-    def split_text(self, text: str) -> List[str]:
														
 
															+    def split_text(self, text: str) -> list[str]:
														
 
															         """Split incoming text and return chunks."""
														
 
															         splits = (s.text for s in self._tokenizer(text).sents)
														
 
															         return self._merge_splits(splits, self._separator)
														
@@ -1843,7 +1840,7 @@ class KonlpyTextSplitter(TextSplitter):
 
															             )
														
 
															         self.kkma = Kkma()
														
 
															-    def split_text(self, text: str) -> List[str]:
														
 
															+    def split_text(self, text: str) -> list[str]:
														
 
															         """Split incoming text and return chunks."""
														
 
															         splits = self.kkma.sentences(text)
														
 
															         return self._merge_splits(splits, self._separator)
														
@@ -1890,12 +1887,12 @@ class RecursiveJsonSplitter:
 
															         )
														
 
															     @staticmethod
														
 
															-    def _json_size(data: Dict) -> int:
														
 
															+    def _json_size(data: dict) -> int:
														
 
															         """Calculate the size of the serialized JSON object."""
														
 
															         return len(json.dumps(data))
														
 
															     @staticmethod
														
 
															-    def _set_nested_dict(d: Dict, path: List[str], value: Any) -> None:
														
 
															+    def _set_nested_dict(d: dict, path: list[str], value: Any) -> None:
														
 
															         """Set a value in a nested dictionary based on the given path."""
														
 
															         for key in path[:-1]:
														
 
															             d = d.setdefault(key, {})
														
@@ -1919,10 +1916,10 @@ class RecursiveJsonSplitter:
 
															     def _json_split(
														
 
															         self,
														
 
															-        data: Dict[str, Any],
														
 
															-        current_path: List[str] = [],
														
 
															-        chunks: List[Dict] = [{}],
														
 
															-    ) -> List[Dict]:
														
 
															+        data: dict[str, Any],
														
 
															+        current_path: list[str] = [],
														
 
															+        chunks: list[dict] = [{}],
														
 
															+    ) -> list[dict]:
														
 
															         """
														
 
															         Split json into maximum size dictionaries while preserving structure.
														
 
															         """
														
@@ -1950,9 +1947,9 @@ class RecursiveJsonSplitter:
 
															     def split_json(
														
 
															         self,
														
 
															-        json_data: Dict[str, Any],
														
 
															+        json_data: dict[str, Any],
														
 
															         convert_lists: bool = False,
														
 
															-    ) -> List[Dict]:
														
 
															+    ) -> list[dict]:
														
 
															         """Splits JSON into a list of JSON chunks"""
														
 
															         if convert_lists:
														
@@ -1968,8 +1965,8 @@ class RecursiveJsonSplitter:
 
															         return chunks
														
 
															     def split_text(
														
 
															-        self, json_data: Dict[str, Any], convert_lists: bool = False
														
 
															-    ) -> List[str]:
														
 
															+        self, json_data: dict[str, Any], convert_lists: bool = False
														
 
															+    ) -> list[str]:
														
 
															         """Splits JSON into a list of JSON formatted strings"""
														
 
															         chunks = self.split_json(
														
@@ -1981,11 +1978,11 @@ class RecursiveJsonSplitter:
 
															     def create_documents(
														
 
															         self,
														
 
															-        texts: List[Dict],
														
 
															+        texts: list[dict],
														
 
															         convert_lists: bool = False,
														
 
															-        metadatas: Optional[List[dict]] = None,
														
 
															-    ) -> List[SplitterDocument]:
														
 
															-        """Create documents from a list of json objects (Dict)."""
														
 
															+        metadatas: Optional[list[dict]] = None,
														
 
															+    ) -> list[SplitterDocument]:
														
 
															+        """Create documents from a list of json objects (dict)."""
														
 
															         _metadatas = metadatas or [{}] * len(texts)
														
 
															         documents = []
														
 
															         for i, text in enumerate(texts):
														
--- a/tests/unit/test_collections.py
+++ b/tests/unit/test_collections.py
@@ -1,8 +1,10 @@
 
															-import pytest
														
 
															 import uuid
														
 
															 from uuid import UUID
														
 
															-from core.base.api.models import CollectionResponse
														
 
															+
														
 
															+import pytest
														
 
															+
														
 
															 from core.base import R2RException
														
 
															+from core.base.api.models import CollectionResponse
														
 
															 @pytest.mark.asyncio
														
--- a/tests/unit/test_graphs.py
+++ b/tests/unit/test_graphs.py
@@ -1,9 +1,8 @@
 
															-import pytest
														
 
															 import uuid
														
 
															-from uuid import UUID
														
 
															-
														
 
															 from enum import Enum
														
 
															-from core.base.abstractions import Entity, Relationship, Community
														
 
															+
														
 
															+import pytest
														
 
															+
														
 
															 from core.base.api.models import GraphResponse
														
@@ -35,7 +34,7 @@ async def test_add_entities_and_relationships(graphs_handler):
 
															     # Add an entity
														
 
															     entity = await graphs_handler.entities.create(
														
 
															         parent_id=graph_id,
														
 
															-        store_type=StoreType.GRAPHS.value,
														
 
															+        store_type=StoreType.GRAPHS,
														
 
															         name="TestEntity",
														
 
															         category="Person",
														
 
															         description="A test entity",
														
@@ -45,7 +44,7 @@ async def test_add_entities_and_relationships(graphs_handler):
 
															     # Add another entity
														
 
															     entity2 = await graphs_handler.entities.create(
														
 
															         parent_id=graph_id,
														
 
															-        store_type=StoreType.GRAPHS.value,
														
 
															+        store_type=StoreType.GRAPHS,
														
 
															         name="AnotherEntity",
														
 
															         category="Place",
														
 
															         description="A test place",
														
@@ -59,7 +58,7 @@ async def test_add_entities_and_relationships(graphs_handler):
 
															         object="AnotherEntity",
														
 
															         object_id=entity2.id,
														
 
															         parent_id=graph_id,
														
 
															-        store_type=StoreType.GRAPHS.value,
														
 
															+        store_type=StoreType.GRAPHS,
														
 
															         description="Entity lives in AnotherEntity",
														
 
															     )
														
 
															     assert rel.predicate == "lives_in"
														
@@ -92,12 +91,12 @@ async def test_delete_entities_and_relationships(graphs_handler):
 
															     # Add entities
														
 
															     e1 = await graphs_handler.entities.create(
														
 
															         parent_id=graph_id,
														
 
															-        store_type=StoreType.GRAPHS.value,
														
 
															+        store_type=StoreType.GRAPHS,
														
 
															         name="DeleteMe",
														
 
															     )
														
 
															     e2 = await graphs_handler.entities.create(
														
 
															         parent_id=graph_id,
														
 
															-        store_type=StoreType.GRAPHS.value,
														
 
															+        store_type=StoreType.GRAPHS,
														
 
															         name="DeleteMeToo",
														
 
															     )
														
@@ -109,14 +108,14 @@ async def test_delete_entities_and_relationships(graphs_handler):
 
															         object="DeleteMeToo",
														
 
															         object_id=e2.id,
														
 
															         parent_id=graph_id,
														
 
															-        store_type=StoreType.GRAPHS.value,
														
 
															+        store_type=StoreType.GRAPHS,
														
 
															     )
														
 
															     # Delete one entity
														
 
															     await graphs_handler.entities.delete(
														
 
															         parent_id=graph_id,
														
 
															         entity_ids=[e1.id],
														
 
															-        store_type=StoreType.GRAPHS.value,
														
 
															+        store_type=StoreType.GRAPHS,
														
 
															     )
														
 
															     ents, count = await graphs_handler.get_entities(
														
 
															         parent_id=graph_id, offset=0, limit=10
														
@@ -128,7 +127,7 @@ async def test_delete_entities_and_relationships(graphs_handler):
 
															     await graphs_handler.relationships.delete(
														
 
															         parent_id=graph_id,
														
 
															         relationship_ids=[rel.id],
														
 
															-        store_type=StoreType.GRAPHS.value,
														
 
															+        store_type=StoreType.GRAPHS,
														
 
															     )
														
 
															     rels, rel_count = await graphs_handler.get_relationships(
														
 
															         parent_id=graph_id, offset=0, limit=10
														
@@ -142,7 +141,7 @@ async def test_communities(graphs_handler):
 
															     coll_id = uuid.uuid4()
														
 
															     await graphs_handler.communities.create(
														
 
															         parent_id=coll_id,
														
 
															-        store_type=StoreType.GRAPHS.value,
														
 
															+        store_type=StoreType.GRAPHS,
														
 
															         name="CommunityOne",
														
 
															         summary="Test community",
														
 
															         findings=["finding1", "finding2"],
														
@@ -153,7 +152,7 @@ async def test_communities(graphs_handler):
 
															     comms, count = await graphs_handler.communities.get(
														
 
															         parent_id=coll_id,
														
 
															-        store_type=StoreType.GRAPHS.value,
														
 
															+        store_type=StoreType.GRAPHS,
														
 
															         offset=0,
														
 
															         limit=10,
														
 
															     )
														
@@ -284,7 +283,7 @@ async def test_bulk_entities(graphs_handler):
 
															     for ent in entities_to_add:
														
 
															         await graphs_handler.entities.create(
														
 
															             parent_id=graph_id,
														
 
															-            store_type=StoreType.GRAPHS.value,
														
 
															+            store_type=StoreType.GRAPHS,
														
 
															             name=ent["name"],
														
 
															             category=ent["category"],
														
 
															             description=ent["description"],
														
@@ -309,13 +308,13 @@ async def test_relationship_filtering(graphs_handler):
 
															     # Add entities
														
 
															     e1 = await graphs_handler.entities.create(
														
 
															-        parent_id=graph_id, store_type=StoreType.GRAPHS.value, name="Node1"
														
 
															+        parent_id=graph_id, store_type=StoreType.GRAPHS, name="Node1"
														
 
															     )
														
 
															     e2 = await graphs_handler.entities.create(
														
 
															-        parent_id=graph_id, store_type=StoreType.GRAPHS.value, name="Node2"
														
 
															+        parent_id=graph_id, store_type=StoreType.GRAPHS, name="Node2"
														
 
															     )
														
 
															     e3 = await graphs_handler.entities.create(
														
 
															-        parent_id=graph_id, store_type=StoreType.GRAPHS.value, name="Node3"
														
 
															+        parent_id=graph_id, store_type=StoreType.GRAPHS, name="Node3"
														
 
															     )
														
 
															     # Add different relationships
														
@@ -326,7 +325,7 @@ async def test_relationship_filtering(graphs_handler):
 
															         object="Node2",
														
 
															         object_id=e2.id,
														
 
															         parent_id=graph_id,
														
 
															-        store_type=StoreType.GRAPHS.value,
														
 
															+        store_type=StoreType.GRAPHS,
														
 
															     )
														
 
															     await graphs_handler.relationships.create(
														
@@ -336,7 +335,7 @@ async def test_relationship_filtering(graphs_handler):
 
															         object="Node3",
														
 
															         object_id=e3.id,
														
 
															         parent_id=graph_id,
														
 
															-        store_type=StoreType.GRAPHS.value,
														
 
															+        store_type=StoreType.GRAPHS,
														
 
															     )
														
 
															     # Get all relationships
														
@@ -366,15 +365,15 @@ async def test_delete_all_entities(graphs_handler):
 
															     # Add some entities
														
 
															     await graphs_handler.entities.create(
														
 
															-        parent_id=graph_id, store_type=StoreType.GRAPHS.value, name="E1"
														
 
															+        parent_id=graph_id, store_type=StoreType.GRAPHS, name="E1"
														
 
															     )
														
 
															     await graphs_handler.entities.create(
														
 
															-        parent_id=graph_id, store_type=StoreType.GRAPHS.value, name="E2"
														
 
															+        parent_id=graph_id, store_type=StoreType.GRAPHS, name="E2"
														
 
															     )
														
 
															     # Delete all entities without specifying IDs
														
 
															     await graphs_handler.entities.delete(
														
 
															-        parent_id=graph_id, store_type=StoreType.GRAPHS.value
														
 
															+        parent_id=graph_id, store_type=StoreType.GRAPHS
														
 
															     )
														
 
															     ents, count = await graphs_handler.get_entities(
														
 
															         parent_id=graph_id, offset=0, limit=10
														
@@ -392,10 +391,10 @@ async def test_delete_all_relationships(graphs_handler):
 
															     # Add two entities and a relationship
														
 
															     e1 = await graphs_handler.entities.create(
														
 
															-        parent_id=graph_id, store_type=StoreType.GRAPHS.value, name="E1"
														
 
															+        parent_id=graph_id, store_type=StoreType.GRAPHS, name="E1"
														
 
															     )
														
 
															     e2 = await graphs_handler.entities.create(
														
 
															-        parent_id=graph_id, store_type=StoreType.GRAPHS.value, name="E2"
														
 
															+        parent_id=graph_id, store_type=StoreType.GRAPHS, name="E2"
														
 
															     )
														
 
															     await graphs_handler.relationships.create(
														
 
															         subject="E1",
														
@@ -404,12 +403,12 @@ async def test_delete_all_relationships(graphs_handler):
 
															         object="E2",
														
 
															         object_id=e2.id,
														
 
															         parent_id=graph_id,
														
 
															-        store_type=StoreType.GRAPHS.value,
														
 
															+        store_type=StoreType.GRAPHS,
														
 
															     )
														
 
															     # Delete all relationships
														
 
															     await graphs_handler.relationships.delete(
														
 
															-        parent_id=graph_id, store_type=StoreType.GRAPHS.value
														
 
															+        parent_id=graph_id, store_type=StoreType.GRAPHS
														
 
															     )
														
 
															     rels, rel_count = await graphs_handler.get_relationships(
														
 
															         parent_id=graph_id, offset=0, limit=10