il y a 3 mois · bf409edbfa
--- a/cli/main.py
+++ b/cli/main.py
@@ -1,5 +1,5 @@
 
				 import json
			
 
				-from typing import Any, Dict
			
 
				+from typing import Any
			
 
				 
			
 
				 import asyncclick as click
			
 
				 from rich.console import Console
			
@@ -73,7 +73,7 @@ def _ensure_config_dir_exists() -> None:
 
				     CONFIG_DIR.mkdir(parents=True, exist_ok=True)
			
 
				 
			
 
				 
			
 
				-def save_config(config_data: Dict[str, Any]) -> None:
			
 
				+def save_config(config_data: dict[str, Any]) -> None:
			
 
				     """
			
 
				     Persist the given config data to ~/.r2r/config.json.
			
 
				     """
			
--- a/compose.full.yaml
+++ b/compose.full.yaml
@@ -307,7 +307,6 @@ services:
 
				       - R2R_POSTGRES_HOST=${R2R_POSTGRES_HOST:-postgres}
			
 
				       - R2R_POSTGRES_PORT=${R2R_POSTGRES_PORT:-5432}
			
 
				       - R2R_POSTGRES_DBNAME=${R2R_POSTGRES_DBNAME:-postgres}
			
 
				-      - R2R_POSTGRES_PROJECT_NAME=${R2R_POSTGRES_PROJECT_NAME:-r2r_default}
			
 
				       - R2R_POSTGRES_MAX_CONNECTIONS=${R2R_POSTGRES_MAX_CONNECTIONS:-1024}
			
 
				       - R2R_POSTGRES_STATEMENT_CACHE_SIZE=${R2R_POSTGRES_STATEMENT_CACHE_SIZE:-100}
			
 
				 
			
--- a/compose.full_with_replicas.yaml
+++ b/compose.full_with_replicas.yaml
@@ -305,7 +305,6 @@ services:
 
				       - R2R_POSTGRES_HOST=${R2R_POSTGRES_HOST:-postgres}
			
 
				       - R2R_POSTGRES_PORT=${R2R_POSTGRES_PORT:-5432}
			
 
				       - R2R_POSTGRES_DBNAME=${R2R_POSTGRES_DBNAME:-postgres}
			
 
				-      - R2R_POSTGRES_PROJECT_NAME=${R2R_POSTGRES_PROJECT_NAME:-r2r_default}
			
 
				       - R2R_POSTGRES_MAX_CONNECTIONS=${R2R_POSTGRES_MAX_CONNECTIONS:-1024}
			
 
				       - R2R_POSTGRES_STATEMENT_CACHE_SIZE=${R2R_POSTGRES_STATEMENT_CACHE_SIZE:-100}
			
 
				 
			
--- a/core/agent/rag.py
+++ b/core/agent/rag.py
@@ -1,5 +1,3 @@
 
				-from typing import Union
			
 
				-
			
 
				 from core.agent import R2RAgent, R2RStreamingAgent
			
 
				 from core.base import (
			
 
				     format_search_results_for_llm,
			
@@ -126,9 +124,7 @@ class R2RRAGAgent(RAGAgentMixin, R2RAgent):
 
				     def __init__(
			
 
				         self,
			
 
				         database_provider: DatabaseProvider,
			
 
				-        llm_provider: Union[
			
 
				-            LiteLLMCompletionProvider, OpenAICompletionProvider
			
 
				-        ],
			
 
				+        llm_provider: LiteLLMCompletionProvider | OpenAICompletionProvider,
			
 
				         search_pipeline: SearchPipeline,
			
 
				         config: AgentConfig,
			
 
				     ):
			
@@ -144,9 +140,7 @@ class R2RStreamingRAGAgent(RAGAgentMixin, R2RStreamingAgent):
 
				     def __init__(
			
 
				         self,
			
 
				         database_provider: DatabaseProvider,
			
 
				-        llm_provider: Union[
			
 
				-            LiteLLMCompletionProvider, OpenAICompletionProvider
			
 
				-        ],
			
 
				+        llm_provider: LiteLLMCompletionProvider | OpenAICompletionProvider,
			
 
				         search_pipeline: SearchPipeline,
			
 
				         config: AgentConfig,
			
 
				     ):
			
--- a/core/base/abstractions/__init__.py
+++ b/core/base/abstractions/__init__.py
@@ -25,20 +25,16 @@ from shared.abstractions.graph import (
 
				     Graph,
			
 
				     KGExtraction,
			
 
				     Relationship,
			
 
				+    StoreType,
			
 
				 )
			
 
				 from shared.abstractions.ingestion import (
			
 
				     ChunkEnrichmentSettings,
			
 
				     ChunkEnrichmentStrategy,
			
 
				 )
			
 
				 from shared.abstractions.kg import (
			
 
				-    GraphBuildSettings,
			
 
				     GraphCommunitySettings,
			
 
				-    GraphEntitySettings,
			
 
				-    GraphRelationshipSettings,
			
 
				     KGCreationSettings,
			
 
				     KGEnrichmentSettings,
			
 
				-    KGEntityDeduplicationSettings,
			
 
				-    KGEntityDeduplicationType,
			
 
				     KGRunType,
			
 
				 )
			
 
				 from shared.abstractions.llm import (
			
@@ -59,7 +55,6 @@ from shared.abstractions.search import (
 
				     HybridSearchSettings,
			
 
				     KGCommunityResult,
			
 
				     KGEntityResult,
			
 
				-    KGGlobalResult,
			
 
				     KGRelationshipResult,
			
 
				     KGSearchResultType,
			
 
				     SearchMode,
			
@@ -110,6 +105,7 @@ __all__ = [
 
				     # Graph abstractions
			
 
				     "Entity",
			
 
				     "Community",
			
 
				+    "StoreType",
			
 
				     "KGExtraction",
			
 
				     "Relationship",
			
 
				     # Index abstractions
			
@@ -130,7 +126,6 @@ __all__ = [
 
				     "KGEntityResult",
			
 
				     "KGRelationshipResult",
			
 
				     "KGCommunityResult",
			
 
				-    "KGGlobalResult",
			
 
				     "GraphSearchSettings",
			
 
				     "ChunkSearchSettings",
			
 
				     "ChunkSearchResult",
			
@@ -141,12 +136,7 @@ __all__ = [
 
				     # KG abstractions
			
 
				     "KGCreationSettings",
			
 
				     "KGEnrichmentSettings",
			
 
				-    "KGEntityDeduplicationSettings",
			
 
				-    "GraphBuildSettings",
			
 
				-    "GraphEntitySettings",
			
 
				-    "GraphRelationshipSettings",
			
 
				     "GraphCommunitySettings",
			
 
				-    "KGEntityDeduplicationType",
			
 
				     "KGRunType",
			
 
				     # User abstractions
			
 
				     "Token",
			
--- a/core/base/logger/__init__.py
+++ b/core/base/logger/__init__.py
@@ -1,9 +1,6 @@
 
				-from .base import RunInfoLog
			
 
				 from .run_manager import RunManager, manage_run
			
 
				 
			
 
				 __all__ = [
			
 
				-    # Basic types
			
 
				-    "RunInfoLog",
			
 
				     # Run Manager
			
 
				     "RunManager",
			
 
				     "manage_run",
			
--- a/core/base/providers/auth.py
+++ b/core/base/providers/auth.py
@@ -66,10 +66,8 @@ class AuthProvider(Provider, ABC):
 
				         self.database_provider = database_provider
			
 
				         self.email_provider = email_provider
			
 
				         super().__init__(config)
			
 
				-        self.config: AuthConfig = config  # for type hinting
			
 
				-        self.database_provider: "PostgresDatabaseProvider" = (
			
 
				-            database_provider  # for type hinting
			
 
				-        )
			
 
				+        self.config: AuthConfig = config
			
 
				+        self.database_provider: "PostgresDatabaseProvider" = database_provider
			
 
				 
			
 
				     async def _get_default_admin_user(self) -> User:
			
 
				         return await self.database_provider.users_handler.get_user_by_email(
			
--- a/core/base/providers/base.py
+++ b/core/base/providers/base.py
@@ -1,5 +1,5 @@
 
				 from abc import ABC, abstractmethod
			
 
				-from typing import Any, Optional, Sequence, Type
			
 
				+from typing import Any, Optional, Type
			
 
				 
			
 
				 from pydantic import BaseModel
			
 
				 
			
--- a/core/base/providers/crypto.py
+++ b/core/base/providers/crypto.py
@@ -10,7 +10,7 @@ class CryptoConfig(ProviderConfig):
 
				 
			
 
				     @property
			
 
				     def supported_providers(self) -> list[str]:
			
 
				-        return ["bcrypt", "nacl"]  # Add other crypto providers as needed
			
 
				+        return ["bcrypt", "nacl"]
			
 
				 
			
 
				     def validate_config(self) -> None:
			
 
				         if self.provider not in self.supported_providers:
			
--- a/core/base/providers/database.py
+++ b/core/base/providers/database.py
@@ -1,56 +1,20 @@
 
				-import logging
			
 
				-from abc import abstractmethod
			
 
				-from datetime import datetime
			
 
				-from io import BytesIO
			
 
				-from typing import BinaryIO, Optional, Tuple
			
 
				-from uuid import UUID
			
 
				-
			
 
				-from pydantic import BaseModel
			
 
				-
			
 
				-from core.base.abstractions import (
			
 
				-    ChunkSearchResult,
			
 
				-    Community,
			
 
				-    DocumentResponse,
			
 
				-    Entity,
			
 
				-    IndexArgsHNSW,
			
 
				-    IndexArgsIVFFlat,
			
 
				-    IndexMeasure,
			
 
				-    IndexMethod,
			
 
				-    KGCreationSettings,
			
 
				-    KGEnrichmentSettings,
			
 
				-    KGEntityDeduplicationSettings,
			
 
				-    Message,
			
 
				-    Relationship,
			
 
				-    SearchSettings,
			
 
				-    User,
			
 
				-    VectorEntry,
			
 
				-    VectorTableName,
			
 
				-)
			
 
				-from core.base.api.models import CollectionResponse, GraphResponse
			
 
				-
			
 
				-from .base import Provider, ProviderConfig
			
 
				-
			
 
				-"""Base classes for knowledge graph providers."""
			
 
				+"""Base classes for database providers."""
			
 
				 
			
 
				 import logging
			
 
				 from abc import ABC, abstractmethod
			
 
				-from typing import Any, Optional, Sequence, Tuple, Type
			
 
				+from typing import Any, Optional, Sequence
			
 
				 from uuid import UUID
			
 
				 
			
 
				 from pydantic import BaseModel
			
 
				 
			
 
				-from ..abstractions import (
			
 
				-    Community,
			
 
				-    Entity,
			
 
				+from core.base.abstractions import (
			
 
				     GraphSearchSettings,
			
 
				     KGCreationSettings,
			
 
				     KGEnrichmentSettings,
			
 
				-    KGEntityDeduplicationSettings,
			
 
				-    KGExtraction,
			
 
				-    R2RSerializable,
			
 
				-    Relationship,
			
 
				 )
			
 
				 
			
 
				+from .base import Provider, ProviderConfig
			
 
				+
			
 
				 logger = logging.getLogger()
			
 
				 
			
 
				 
			
@@ -168,14 +132,11 @@ class DatabaseConfig(ProviderConfig):
 
				     collection_summary_task_prompt: str = "default_collection_summary"
			
 
				     enable_fts: bool = False
			
 
				 
			
 
				-    # KG settings
			
 
				+    # Graph settings
			
 
				     batch_size: Optional[int] = 1
			
 
				     kg_store_path: Optional[str] = None
			
 
				     graph_enrichment_settings: KGEnrichmentSettings = KGEnrichmentSettings()
			
 
				     graph_creation_settings: KGCreationSettings = KGCreationSettings()
			
 
				-    graph_entity_deduplication_settings: KGEntityDeduplicationSettings = (
			
 
				-        KGEntityDeduplicationSettings()
			
 
				-    )
			
 
				     graph_search_settings: GraphSearchSettings = GraphSearchSettings()
			
 
				 
			
 
				     # Rate limits
			
--- a/core/base/providers/email.py
+++ b/core/base/providers/email.py
@@ -29,11 +29,14 @@ class EmailConfig(ProviderConfig):
 
				         ]  # Could add more providers like AWS SES, SendGrid etc.
			
 
				 
			
 
				     def validate_config(self) -> None:
			
 
				-        if self.provider == "sendgrid":
			
 
				-            if not (self.sendgrid_api_key or os.getenv("SENDGRID_API_KEY")):
			
 
				-                raise ValueError(
			
 
				-                    "SendGrid API key is required when using SendGrid provider"
			
 
				-                )
			
 
				+        if (
			
 
				+            self.provider == "sendgrid"
			
 
				+            and not self.sendgrid_api_key
			
 
				+            and not os.getenv("SENDGRID_API_KEY")
			
 
				+        ):
			
 
				+            raise ValueError(
			
 
				+                "SendGrid API key is required when using SendGrid provider"
			
 
				+            )
			
 
				 
			
 
				 
			
 
				 logger = logging.getLogger(__name__)
			
@@ -46,7 +49,7 @@ class EmailProvider(Provider, ABC):
 
				                 "EmailProvider must be initialized with an EmailConfig"
			
 
				             )
			
 
				         super().__init__(config)
			
 
				-        self.config: EmailConfig = config  # for type hinting
			
 
				+        self.config: EmailConfig = config
			
 
				 
			
 
				     @abstractmethod
			
 
				     async def send_email(
			
--- a/core/base/providers/ingestion.py
+++ b/core/base/providers/ingestion.py
@@ -16,6 +16,19 @@ if TYPE_CHECKING:
 
				     from core.database import PostgresDatabaseProvider
			
 
				 
			
 
				 
			
 
				+class ChunkingStrategy(str, Enum):
			
 
				+    RECURSIVE = "recursive"
			
 
				+    CHARACTER = "character"
			
 
				+    BASIC = "basic"
			
 
				+    BY_TITLE = "by_title"
			
 
				+
			
 
				+
			
 
				+class IngestionMode(str, Enum):
			
 
				+    hi_res = "hi-res"
			
 
				+    fast = "fast"
			
 
				+    custom = "custom"
			
 
				+
			
 
				+
			
 
				 class IngestionConfig(ProviderConfig):
			
 
				     _defaults: ClassVar[dict] = {
			
 
				         "app": AppConfig(),
			
@@ -44,7 +57,7 @@ class IngestionConfig(ProviderConfig):
 
				     excluded_parsers: list[str] = Field(
			
 
				         default_factory=lambda: IngestionConfig._defaults["excluded_parsers"]
			
 
				     )
			
 
				-    chunking_strategy: str = Field(
			
 
				+    chunking_strategy: str | ChunkingStrategy = Field(
			
 
				         default_factory=lambda: IngestionConfig._defaults["chunking_strategy"]
			
 
				     )
			
 
				     chunk_enrichment_settings: ChunkEnrichmentSettings = Field(
			
@@ -131,31 +144,6 @@ class IngestionConfig(ProviderConfig):
 
				         else:
			
 
				             return cls(app=app)
			
 
				 
			
 
				-    @classmethod
			
 
				-    def get_default(cls, mode: str, app) -> "IngestionConfig":
			
 
				-        """Return default ingestion configuration for a given mode."""
			
 
				-        if mode == "hi-res":
			
 
				-            # More thorough parsing, no skipping summaries, possibly larger `chunks_for_document_summary`.
			
 
				-            return cls(app=app, parser_overrides={"pdf": "zerox"})
			
 
				-        # elif mode == "fast":
			
 
				-        #     # Skip summaries and other enrichment steps for speed.
			
 
				-        #     return cls(
			
 
				-        #         app=app,
			
 
				-        #     )
			
 
				-        else:
			
 
				-            # For `custom` or any unrecognized mode, return a base config
			
 
				-            return cls(app=app)
			
 
				-
			
 
				-    @classmethod
			
 
				-    def set_default(cls, **kwargs):
			
 
				-        for key, value in kwargs.items():
			
 
				-            if key in cls._defaults:
			
 
				-                cls._defaults[key] = value
			
 
				-            else:
			
 
				-                raise AttributeError(
			
 
				-                    f"No default attribute '{key}' in GenerationConfig"
			
 
				-                )
			
 
				-
			
 
				     class Config:
			
 
				         populate_by_name = True
			
 
				         json_schema_extra = {
			
@@ -193,16 +181,3 @@ class IngestionProvider(Provider, ABC):
 
				         self.config: IngestionConfig = config
			
 
				         self.llm_provider = llm_provider
			
 
				         self.database_provider: "PostgresDatabaseProvider" = database_provider
			
 
				-
			
 
				-
			
 
				-class ChunkingStrategy(str, Enum):
			
 
				-    RECURSIVE = "recursive"
			
 
				-    CHARACTER = "character"
			
 
				-    BASIC = "basic"
			
 
				-    BY_TITLE = "by_title"
			
 
				-
			
 
				-
			
 
				-class IngestionMode(str, Enum):
			
 
				-    hi_res = "hi-res"
			
 
				-    fast = "fast"
			
 
				-    custom = "custom"
			
--- a/core/configs/full_azure.toml
+++ b/core/configs/full_azure.toml
@@ -15,9 +15,6 @@ concurrent_request_limit = 128
 
				     clustering_mode = "remote"
			
 
				     generation_config = { model = "azure/gpt-4o-mini" }
			
 
				 
			
 
				-  [database.graph_entity_deduplication_settings]
			
 
				-    generation_config = { model = "azure/gpt-4o-mini" }
			
 
				-
			
 
				   [database.graph_enrichment_settings]
			
 
				     generation_config = { model = "azure/gpt-4o-mini" }
			
 
				 
			
--- a/core/configs/full_local_llm.toml
+++ b/core/configs/full_local_llm.toml
@@ -31,12 +31,6 @@ provider = "postgres"
 
				     max_description_input_length = 65536
			
 
				     generation_config = { model = "ollama/llama3.1" } # and other params, model used for relationshipt extraction
			
 
				 
			
 
				-  [database.graph_entity_deduplication_settings]
			
 
				-    graph_entity_deduplication_type = "by_name"
			
 
				-    graph_entity_deduplication_prompt = "graphrag_entity_deduplication"
			
 
				-    max_description_input_length = 65536
			
 
				-    generation_config = { model = "ollama/llama3.1" } # and other params, model used for deduplication
			
 
				-
			
 
				   [database.graph_enrichment_settings]
			
 
				     community_reports_prompt = "graphrag_community_reports"
			
 
				     max_summary_input_length = 65536
			
--- a/core/configs/local_llm.toml
+++ b/core/configs/local_llm.toml
@@ -37,12 +37,6 @@ provider = "postgres"
 
				     max_description_input_length = 65536
			
 
				     generation_config = { model = "ollama/llama3.1" } # and other params, model used for relationshipt extraction
			
 
				 
			
 
				-  [database.graph_entity_deduplication_settings]
			
 
				-    graph_entity_deduplication_type = "by_name"
			
 
				-    graph_entity_deduplication_prompt = "graphrag_entity_deduplication"
			
 
				-    max_description_input_length = 65536
			
 
				-    generation_config = { model = "ollama/llama3.1" } # and other params, model used for deduplication
			
 
				-
			
 
				   [database.graph_enrichment_settings]
			
 
				     community_reports_prompt = "graphrag_community_reports"
			
 
				     max_summary_input_length = 65536
			
--- a/core/configs/r2r_azure.toml
+++ b/core/configs/r2r_azure.toml
@@ -13,9 +13,6 @@ batch_size = 256
 
				   [database.graph_creation_settings]
			
 
				     generation_config = { model = "azure/gpt-4o-mini" }
			
 
				 
			
 
				-  [database.graph_entity_deduplication_settings]
			
 
				-    generation_config = { model = "azure/gpt-4o-mini" }
			
 
				-
			
 
				   [database.graph_enrichment_settings]
			
 
				     generation_config = { model = "azure/gpt-4o-mini" }
			
 
				 
			
--- a/core/configs/r2r_azure_with_test_limits.toml
+++ b/core/configs/r2r_azure_with_test_limits.toml
@@ -16,9 +16,6 @@ batch_size = 256
 
				   [database.graph_creation_settings]
			
 
				     generation_config = { model = "azure/gpt-4o-mini" }
			
 
				 
			
 
				-  [database.graph_entity_deduplication_settings]
			
 
				-    generation_config = { model = "azure/gpt-4o-mini" }
			
 
				-
			
 
				   [database.graph_enrichment_settings]
			
 
				     generation_config = { model = "azure/gpt-4o-mini" }
			
 
				 
			
--- a/core/database/chunks.py
+++ b/core/database/chunks.py
@@ -612,10 +612,9 @@ class PostgresChunksHandler(Handler):
 
				         SET collection_ids = array_append(collection_ids, $1)
			
 
				         WHERE document_id = $2 AND NOT ($1 = ANY(collection_ids));
			
 
				         """
			
 
				-        result = await self.connection_manager.execute_query(
			
 
				+        return await self.connection_manager.execute_query(
			
 
				             query, (str(collection_id), str(document_id))
			
 
				         )
			
 
				-        return result
			
 
				 
			
 
				     async def remove_document_from_collection_vector(
			
 
				         self, document_id: UUID, collection_id: UUID
			
@@ -883,7 +882,7 @@ class PostgresChunksHandler(Handler):
 
				 
			
 
				         where_clause = " AND ".join(where_clauses) if where_clauses else ""
			
 
				         if where_clause:
			
 
				-            where_clause = "AND " + where_clause
			
 
				+            where_clause = f"AND {where_clause}"
			
 
				 
			
 
				         query = f"""
			
 
				         WITH index_info AS (
			
@@ -1223,7 +1222,7 @@ class PostgresChunksHandler(Handler):
 
				                     ) as body_rank
			
 
				                 FROM {self._get_table_name(PostgresChunksHandler.TABLE_NAME)}
			
 
				                 WHERE $1 != ''
			
 
				-                {f"AND to_tsvector('english', text) @@ websearch_to_tsquery('english', $1)" if settings.search_over_body else ""}
			
 
				+                {"AND to_tsvector('english', text) @@ websearch_to_tsquery('english', $1)" if settings.search_over_body else ""}
			
 
				                 GROUP BY document_id
			
 
				             ),
			
 
				             -- Combined scores with document metadata
			
--- a/core/database/collections.py
+++ b/core/database/collections.py
@@ -1,6 +1,8 @@
 
				+import csv
			
 
				 import json
			
 
				 import logging
			
 
				-from typing import Any, Optional
			
 
				+import tempfile
			
 
				+from typing import IO, Any, Optional
			
 
				 from uuid import UUID, uuid4
			
 
				 
			
 
				 from asyncpg.exceptions import UniqueViolationError
			
@@ -117,6 +119,11 @@ class PostgresCollectionsHandler(Handler):
 
				                 message="Collection with this ID already exists",
			
 
				                 status_code=409,
			
 
				             )
			
 
				+        except Exception as e:
			
 
				+            raise HTTPException(
			
 
				+                status_code=500,
			
 
				+                detail=f"An error occurred while creating the collection: {e}",
			
 
				+            ) from e
			
 
				 
			
 
				     async def update_collection(
			
 
				         self,
			
@@ -189,7 +196,7 @@ class PostgresCollectionsHandler(Handler):
 
				             raise HTTPException(
			
 
				                 status_code=500,
			
 
				                 detail=f"An error occurred while updating the collection: {e}",
			
 
				-            )
			
 
				+            ) from e
			
 
				 
			
 
				     async def delete_collection_relational(self, collection_id: UUID) -> None:
			
 
				         # Remove collection_id from users
			
@@ -361,7 +368,7 @@ class PostgresCollectionsHandler(Handler):
 
				             raise HTTPException(
			
 
				                 status_code=500,
			
 
				                 detail=f"An error occurred while fetching collections: {e}",
			
 
				-            )
			
 
				+            ) from e
			
 
				 
			
 
				     async def assign_document_to_collection_relational(
			
 
				         self,
			
@@ -435,7 +442,7 @@ class PostgresCollectionsHandler(Handler):
 
				             raise HTTPException(
			
 
				                 status_code=500,
			
 
				                 detail=f"An error '{e}' occurred while assigning the document to the collection",
			
 
				-            )
			
 
				+            ) from e
			
 
				 
			
 
				     async def remove_document_from_collection_relational(
			
 
				         self, document_id: UUID, collection_id: UUID
			
@@ -468,3 +475,112 @@ class PostgresCollectionsHandler(Handler):
 
				                 status_code=404,
			
 
				                 message="Document not found in the specified collection",
			
 
				             )
			
 
				+
			
 
				+    async def export_to_csv(
			
 
				+        self,
			
 
				+        columns: Optional[list[str]] = None,
			
 
				+        filters: Optional[dict] = None,
			
 
				+        include_header: bool = True,
			
 
				+    ) -> tuple[str, IO]:
			
 
				+        """
			
 
				+        Creates a CSV file from the PostgreSQL data and returns the path to the temp file.
			
 
				+        """
			
 
				+        valid_columns = {
			
 
				+            "id",
			
 
				+            "owner_id",
			
 
				+            "name",
			
 
				+            "description",
			
 
				+            "graph_sync_status",
			
 
				+            "graph_cluster_status",
			
 
				+            "created_at",
			
 
				+            "updated_at",
			
 
				+            "user_count",
			
 
				+            "document_count",
			
 
				+        }
			
 
				+
			
 
				+        if not columns:
			
 
				+            columns = list(valid_columns)
			
 
				+        elif invalid_cols := set(columns) - valid_columns:
			
 
				+            raise ValueError(f"Invalid columns: {invalid_cols}")
			
 
				+
			
 
				+        select_stmt = f"""
			
 
				+            SELECT
			
 
				+                id::text,
			
 
				+                owner_id::text,
			
 
				+                name,
			
 
				+                description,
			
 
				+                graph_sync_status,
			
 
				+                graph_cluster_status,
			
 
				+                to_char(created_at, 'YYYY-MM-DD HH24:MI:SS') AS created_at,
			
 
				+                to_char(updated_at, 'YYYY-MM-DD HH24:MI:SS') AS updated_at,
			
 
				+                user_count,
			
 
				+                document_count
			
 
				+            FROM {self._get_table_name(self.TABLE_NAME)}
			
 
				+        """
			
 
				+
			
 
				+        params = []
			
 
				+        if filters:
			
 
				+            conditions = []
			
 
				+            param_index = 1
			
 
				+
			
 
				+            for field, value in filters.items():
			
 
				+                if field not in valid_columns:
			
 
				+                    continue
			
 
				+
			
 
				+                if isinstance(value, dict):
			
 
				+                    for op, val in value.items():
			
 
				+                        if op == "$eq":
			
 
				+                            conditions.append(f"{field} = ${param_index}")
			
 
				+                            params.append(val)
			
 
				+                            param_index += 1
			
 
				+                        elif op == "$gt":
			
 
				+                            conditions.append(f"{field} > ${param_index}")
			
 
				+                            params.append(val)
			
 
				+                            param_index += 1
			
 
				+                        elif op == "$lt":
			
 
				+                            conditions.append(f"{field} < ${param_index}")
			
 
				+                            params.append(val)
			
 
				+                            param_index += 1
			
 
				+                else:
			
 
				+                    # Direct equality
			
 
				+                    conditions.append(f"{field} = ${param_index}")
			
 
				+                    params.append(value)
			
 
				+                    param_index += 1
			
 
				+
			
 
				+            if conditions:
			
 
				+                select_stmt = f"{select_stmt} WHERE {' AND '.join(conditions)}"
			
 
				+
			
 
				+        select_stmt = f"{select_stmt} ORDER BY created_at DESC"
			
 
				+
			
 
				+        temp_file = None
			
 
				+        try:
			
 
				+            temp_file = tempfile.NamedTemporaryFile(
			
 
				+                mode="w", delete=True, suffix=".csv"
			
 
				+            )
			
 
				+            writer = csv.writer(temp_file, quoting=csv.QUOTE_ALL)
			
 
				+
			
 
				+            async with self.connection_manager.pool.get_connection() as conn:  # type: ignore
			
 
				+                async with conn.transaction():
			
 
				+                    cursor = await conn.cursor(select_stmt, *params)
			
 
				+
			
 
				+                    if include_header:
			
 
				+                        writer.writerow(columns)
			
 
				+
			
 
				+                    chunk_size = 1000
			
 
				+                    while True:
			
 
				+                        rows = await cursor.fetch(chunk_size)
			
 
				+                        if not rows:
			
 
				+                            break
			
 
				+                        for row in rows:
			
 
				+                            writer.writerow(row)
			
 
				+
			
 
				+            temp_file.flush()
			
 
				+            return temp_file.name, temp_file
			
 
				+
			
 
				+        except Exception as e:
			
 
				+            if temp_file:
			
 
				+                temp_file.close()
			
 
				+            raise HTTPException(
			
 
				+                status_code=500,
			
 
				+                detail=f"Failed to export data: {str(e)}",
			
 
				+            ) from e
			
--- a/core/database/conversations.py
+++ b/core/database/conversations.py
@@ -1,5 +1,7 @@
 
				+import csv
			
 
				 import json
			
 
				-from typing import Any, Optional
			
 
				+import tempfile
			
 
				+from typing import IO, Any, Optional
			
 
				 from uuid import UUID, uuid4
			
 
				 
			
 
				 from fastapi import HTTPException
			
@@ -452,3 +454,201 @@ class PostgresConversationsHandler(Handler):
 
				         await self.connection_manager.execute_query(
			
 
				             del_conv_query, [conversation_id]
			
 
				         )
			
 
				+
			
 
				+    async def export_conversations_to_csv(
			
 
				+        self,
			
 
				+        columns: Optional[list[str]] = None,
			
 
				+        filters: Optional[dict] = None,
			
 
				+        include_header: bool = True,
			
 
				+    ) -> tuple[str, IO]:
			
 
				+        """
			
 
				+        Creates a CSV file from the PostgreSQL data and returns the path to the temp file.
			
 
				+        """
			
 
				+        valid_columns = {
			
 
				+            "id",
			
 
				+            "user_id",
			
 
				+            "created_at",
			
 
				+            "name",
			
 
				+        }
			
 
				+
			
 
				+        if not columns:
			
 
				+            columns = list(valid_columns)
			
 
				+        elif invalid_cols := set(columns) - valid_columns:
			
 
				+            raise ValueError(f"Invalid columns: {invalid_cols}")
			
 
				+
			
 
				+        select_stmt = f"""
			
 
				+            SELECT
			
 
				+                id::text,
			
 
				+                user_id::text,
			
 
				+                to_char(created_at, 'YYYY-MM-DD HH24:MI:SS') AS created_at,
			
 
				+                name
			
 
				+            FROM {self._get_table_name("conversations")}
			
 
				+        """
			
 
				+
			
 
				+        conditions = []
			
 
				+        params: list[Any] = []
			
 
				+        param_index = 1
			
 
				+
			
 
				+        if filters:
			
 
				+            for field, value in filters.items():
			
 
				+                if field not in valid_columns:
			
 
				+                    continue
			
 
				+
			
 
				+                if isinstance(value, dict):
			
 
				+                    for op, val in value.items():
			
 
				+                        if op == "$eq":
			
 
				+                            conditions.append(f"{field} = ${param_index}")
			
 
				+                            params.append(val)
			
 
				+                            param_index += 1
			
 
				+                        elif op == "$gt":
			
 
				+                            conditions.append(f"{field} > ${param_index}")
			
 
				+                            params.append(val)
			
 
				+                            param_index += 1
			
 
				+                        elif op == "$lt":
			
 
				+                            conditions.append(f"{field} < ${param_index}")
			
 
				+                            params.append(val)
			
 
				+                            param_index += 1
			
 
				+                else:
			
 
				+                    # Direct equality
			
 
				+                    conditions.append(f"{field} = ${param_index}")
			
 
				+                    params.append(value)
			
 
				+                    param_index += 1
			
 
				+
			
 
				+        if conditions:
			
 
				+            select_stmt = f"{select_stmt} WHERE {' AND '.join(conditions)}"
			
 
				+
			
 
				+        select_stmt = f"{select_stmt} ORDER BY created_at DESC"
			
 
				+
			
 
				+        temp_file = None
			
 
				+        try:
			
 
				+            temp_file = tempfile.NamedTemporaryFile(
			
 
				+                mode="w", delete=True, suffix=".csv"
			
 
				+            )
			
 
				+            writer = csv.writer(temp_file, quoting=csv.QUOTE_ALL)
			
 
				+
			
 
				+            async with self.connection_manager.pool.get_connection() as conn:  # type: ignore
			
 
				+                async with conn.transaction():
			
 
				+                    cursor = await conn.cursor(select_stmt, *params)
			
 
				+
			
 
				+                    if include_header:
			
 
				+                        writer.writerow(columns)
			
 
				+
			
 
				+                    chunk_size = 1000
			
 
				+                    while True:
			
 
				+                        rows = await cursor.fetch(chunk_size)
			
 
				+                        if not rows:
			
 
				+                            break
			
 
				+                        for row in rows:
			
 
				+                            writer.writerow(row)
			
 
				+
			
 
				+            temp_file.flush()
			
 
				+            return temp_file.name, temp_file
			
 
				+
			
 
				+        except Exception as e:
			
 
				+            if temp_file:
			
 
				+                temp_file.close()
			
 
				+            raise HTTPException(
			
 
				+                status_code=500,
			
 
				+                detail=f"Failed to export data: {str(e)}",
			
 
				+            ) from e
			
 
				+
			
 
				+    async def export_messages_to_csv(
			
 
				+        self,
			
 
				+        columns: Optional[list[str]] = None,
			
 
				+        filters: Optional[dict] = None,
			
 
				+        include_header: bool = True,
			
 
				+    ) -> tuple[str, IO]:
			
 
				+        """
			
 
				+        Creates a CSV file from the PostgreSQL data and returns the path to the temp file.
			
 
				+        """
			
 
				+        valid_columns = {
			
 
				+            "id",
			
 
				+            "conversation_id",
			
 
				+            "parent_id",
			
 
				+            "content",
			
 
				+            "metadata",
			
 
				+            "created_at",
			
 
				+        }
			
 
				+
			
 
				+        if not columns:
			
 
				+            columns = list(valid_columns)
			
 
				+        elif invalid_cols := set(columns) - valid_columns:
			
 
				+            raise ValueError(f"Invalid columns: {invalid_cols}")
			
 
				+
			
 
				+        select_stmt = f"""
			
 
				+            SELECT
			
 
				+                id::text,
			
 
				+                conversation_id::text,
			
 
				+                parent_id::text,
			
 
				+                content::text,
			
 
				+                metadata::text,
			
 
				+                to_char(created_at, 'YYYY-MM-DD HH24:MI:SS') AS created_at
			
 
				+            FROM {self._get_table_name("messages")}
			
 
				+        """
			
 
				+
			
 
				+        conditions = []
			
 
				+        params: list[Any] = []
			
 
				+        param_index = 1
			
 
				+
			
 
				+        if filters:
			
 
				+            for field, value in filters.items():
			
 
				+                if field not in valid_columns:
			
 
				+                    continue
			
 
				+
			
 
				+                if isinstance(value, dict):
			
 
				+                    for op, val in value.items():
			
 
				+                        if op == "$eq":
			
 
				+                            conditions.append(f"{field} = ${param_index}")
			
 
				+                            params.append(val)
			
 
				+                            param_index += 1
			
 
				+                        elif op == "$gt":
			
 
				+                            conditions.append(f"{field} > ${param_index}")
			
 
				+                            params.append(val)
			
 
				+                            param_index += 1
			
 
				+                        elif op == "$lt":
			
 
				+                            conditions.append(f"{field} < ${param_index}")
			
 
				+                            params.append(val)
			
 
				+                            param_index += 1
			
 
				+                else:
			
 
				+                    # Direct equality
			
 
				+                    conditions.append(f"{field} = ${param_index}")
			
 
				+                    params.append(value)
			
 
				+                    param_index += 1
			
 
				+
			
 
				+        if conditions:
			
 
				+            select_stmt = f"{select_stmt} WHERE {' AND '.join(conditions)}"
			
 
				+
			
 
				+        select_stmt = f"{select_stmt} ORDER BY created_at DESC"
			
 
				+
			
 
				+        temp_file = None
			
 
				+        try:
			
 
				+            temp_file = tempfile.NamedTemporaryFile(
			
 
				+                mode="w", delete=True, suffix=".csv"
			
 
				+            )
			
 
				+            writer = csv.writer(temp_file, quoting=csv.QUOTE_ALL)
			
 
				+
			
 
				+            async with self.connection_manager.pool.get_connection() as conn:  # type: ignore
			
 
				+                async with conn.transaction():
			
 
				+                    cursor = await conn.cursor(select_stmt, *params)
			
 
				+
			
 
				+                    if include_header:
			
 
				+                        writer.writerow(columns)
			
 
				+
			
 
				+                    chunk_size = 1000
			
 
				+                    while True:
			
 
				+                        rows = await cursor.fetch(chunk_size)
			
 
				+                        if not rows:
			
 
				+                            break
			
 
				+                        for row in rows:
			
 
				+                            writer.writerow(row)
			
 
				+
			
 
				+            temp_file.flush()
			
 
				+            return temp_file.name, temp_file
			
 
				+
			
 
				+        except Exception as e:
			
 
				+            if temp_file:
			
 
				+                temp_file.close()
			
 
				+            raise HTTPException(
			
 
				+                status_code=500,
			
 
				+                detail=f"Failed to export data: {str(e)}",
			
 
				+            ) from e
			
--- a/core/database/documents.py
+++ b/core/database/documents.py
@@ -1,8 +1,10 @@
 
				 import asyncio
			
 
				 import copy
			
 
				+import csv
			
 
				 import json
			
 
				 import logging
			
 
				-from typing import Any, Optional
			
 
				+import tempfile
			
 
				+from typing import IO, Any, Optional
			
 
				 from uuid import UUID
			
 
				 
			
 
				 import asyncpg
			
@@ -20,7 +22,7 @@ from core.base import (
 
				 )
			
 
				 
			
 
				 from .base import PostgresConnectionManager
			
 
				-from .filters import apply_filters  # Add this near other imports
			
 
				+from .filters import apply_filters
			
 
				 
			
 
				 logger = logging.getLogger()
			
 
				 
			
@@ -247,7 +249,7 @@ class PostgresDocumentsHandler(Handler):
 
				         Get the IDs from a given table.
			
 
				 
			
 
				         Args:
			
 
				-            status (Union[str, list[str]]): The status or list of statuses to retrieve.
			
 
				+            status (str | list[str]): The status or list of statuses to retrieve.
			
 
				             table_name (str): The table name.
			
 
				             status_type (str): The type of status to retrieve.
			
 
				         """
			
@@ -299,9 +301,7 @@ class PostgresDocumentsHandler(Handler):
 
				             return IngestionStatus
			
 
				         elif status_type == "extraction_status":
			
 
				             return KGExtractionStatus
			
 
				-        elif status_type == "graph_cluster_status":
			
 
				-            return KGEnrichmentStatus
			
 
				-        elif status_type == "graph_sync_status":
			
 
				+        elif status_type in {"graph_cluster_status", "graph_sync_status"}:
			
 
				             return KGEnrichmentStatus
			
 
				         else:
			
 
				             raise R2RException(
			
@@ -315,7 +315,7 @@ class PostgresDocumentsHandler(Handler):
 
				         Get the workflow status for a given document or list of documents.
			
 
				 
			
 
				         Args:
			
 
				-            id (Union[UUID, list[UUID]]): The document ID or list of document IDs.
			
 
				+            id (UUID | list[UUID]): The document ID or list of document IDs.
			
 
				             status_type (str): The type of status to retrieve.
			
 
				 
			
 
				         Returns:
			
@@ -341,7 +341,7 @@ class PostgresDocumentsHandler(Handler):
 
				         Set the workflow status for a given document or list of documents.
			
 
				 
			
 
				         Args:
			
 
				-            id (Union[UUID, list[UUID]]): The document ID or list of document IDs.
			
 
				+            id (UUID | list[UUID]): The document ID or list of document IDs.
			
 
				             status_type (str): The type of status to set.
			
 
				             status (str): The status to set.
			
 
				         """
			
@@ -368,7 +368,7 @@ class PostgresDocumentsHandler(Handler):
 
				         Args:
			
 
				             ids_key (str): The key to retrieve the IDs.
			
 
				             status_type (str): The type of status to retrieve.
			
 
				-            status (Union[str, list[str]]): The status or list of statuses to retrieve.
			
 
				+            status (str | list[str]): The status or list of statuses to retrieve.
			
 
				         """
			
 
				 
			
 
				         if isinstance(status, str):
			
@@ -501,7 +501,7 @@ class PostgresDocumentsHandler(Handler):
 
				             raise HTTPException(
			
 
				                 status_code=500,
			
 
				                 detail="Database query failed",
			
 
				-            )
			
 
				+            ) from e
			
 
				 
			
 
				     async def semantic_document_search(
			
 
				         self, query_embedding: list[float], search_settings: SearchSettings
			
@@ -792,3 +792,118 @@ class PostgresDocumentsHandler(Handler):
 
				             )
			
 
				         else:
			
 
				             return await self.full_text_document_search(query_text, settings)
			
 
				+
			
 
				+    async def export_to_csv(
			
 
				+        self,
			
 
				+        columns: Optional[list[str]] = None,
			
 
				+        filters: Optional[dict] = None,
			
 
				+        include_header: bool = True,
			
 
				+    ) -> tuple[str, IO]:
			
 
				+        """
			
 
				+        Creates a CSV file from the PostgreSQL data and returns the path to the temp file.
			
 
				+        """
			
 
				+        valid_columns = {
			
 
				+            "id",
			
 
				+            "collection_ids",
			
 
				+            "owner_id",
			
 
				+            "type",
			
 
				+            "metadata",
			
 
				+            "title",
			
 
				+            "summary",
			
 
				+            "version",
			
 
				+            "size_in_bytes",
			
 
				+            "ingestion_status",
			
 
				+            "extraction_status",
			
 
				+            "created_at",
			
 
				+            "updated_at",
			
 
				+        }
			
 
				+
			
 
				+        if not columns:
			
 
				+            columns = list(valid_columns)
			
 
				+        elif invalid_cols := set(columns) - valid_columns:
			
 
				+            raise ValueError(f"Invalid columns: {invalid_cols}")
			
 
				+
			
 
				+        select_stmt = f"""
			
 
				+            SELECT
			
 
				+                id::text,
			
 
				+                collection_ids::text,
			
 
				+                owner_id::text,
			
 
				+                type::text,
			
 
				+                metadata::text AS metadata,
			
 
				+                title,
			
 
				+                summary,
			
 
				+                version,
			
 
				+                size_in_bytes,
			
 
				+                ingestion_status,
			
 
				+                extraction_status,
			
 
				+                to_char(created_at, 'YYYY-MM-DD HH24:MI:SS') AS created_at,
			
 
				+                to_char(updated_at, 'YYYY-MM-DD HH24:MI:SS') AS updated_at
			
 
				+            FROM {self._get_table_name(self.TABLE_NAME)}
			
 
				+        """
			
 
				+
			
 
				+        conditions = []
			
 
				+        params: list[Any] = []
			
 
				+        param_index = 1
			
 
				+
			
 
				+        if filters:
			
 
				+            for field, value in filters.items():
			
 
				+                if field not in valid_columns:
			
 
				+                    continue
			
 
				+
			
 
				+                if isinstance(value, dict):
			
 
				+                    for op, val in value.items():
			
 
				+                        if op == "$eq":
			
 
				+                            conditions.append(f"{field} = ${param_index}")
			
 
				+                            params.append(val)
			
 
				+                            param_index += 1
			
 
				+                        elif op == "$gt":
			
 
				+                            conditions.append(f"{field} > ${param_index}")
			
 
				+                            params.append(val)
			
 
				+                            param_index += 1
			
 
				+                        elif op == "$lt":
			
 
				+                            conditions.append(f"{field} < ${param_index}")
			
 
				+                            params.append(val)
			
 
				+                            param_index += 1
			
 
				+                else:
			
 
				+                    # Direct equality
			
 
				+                    conditions.append(f"{field} = ${param_index}")
			
 
				+                    params.append(value)
			
 
				+                    param_index += 1
			
 
				+
			
 
				+        if conditions:
			
 
				+            select_stmt = f"{select_stmt} WHERE {' AND '.join(conditions)}"
			
 
				+
			
 
				+        select_stmt = f"{select_stmt} ORDER BY created_at DESC"
			
 
				+
			
 
				+        temp_file = None
			
 
				+        try:
			
 
				+            temp_file = tempfile.NamedTemporaryFile(
			
 
				+                mode="w", delete=True, suffix=".csv"
			
 
				+            )
			
 
				+            writer = csv.writer(temp_file, quoting=csv.QUOTE_ALL)
			
 
				+
			
 
				+            async with self.connection_manager.pool.get_connection() as conn:  # type: ignore
			
 
				+                async with conn.transaction():
			
 
				+                    cursor = await conn.cursor(select_stmt, *params)
			
 
				+
			
 
				+                    if include_header:
			
 
				+                        writer.writerow(columns)
			
 
				+
			
 
				+                    chunk_size = 1000
			
 
				+                    while True:
			
 
				+                        rows = await cursor.fetch(chunk_size)
			
 
				+                        if not rows:
			
 
				+                            break
			
 
				+                        for row in rows:
			
 
				+                            writer.writerow(row)
			
 
				+
			
 
				+            temp_file.flush()
			
 
				+            return temp_file.name, temp_file
			
 
				+
			
 
				+        except Exception as e:
			
 
				+            if temp_file:
			
 
				+                temp_file.close()
			
 
				+            raise HTTPException(
			
 
				+                status_code=500,
			
 
				+                detail=f"Failed to export data: {str(e)}",
			
 
				+            ) from e
			
--- a/core/database/files.py
+++ b/core/database/files.py
@@ -1,7 +1,10 @@
 
				 import io
			
 
				 import logging
			
 
				-from typing import BinaryIO, Optional, Union
			
 
				+from datetime import datetime
			
 
				+from io import BytesIO
			
 
				+from typing import BinaryIO, Optional
			
 
				 from uuid import UUID
			
 
				+from zipfile import ZipFile
			
 
				 
			
 
				 import asyncpg
			
 
				 from fastapi import HTTPException
			
@@ -119,7 +122,7 @@ class PostgresFilesHandler(Handler):
 
				             raise HTTPException(
			
 
				                 status_code=500,
			
 
				                 detail=f"Failed to write to large object: {e}",
			
 
				-            )
			
 
				+            ) from e
			
 
				 
			
 
				     async def retrieve_file(
			
 
				         self, document_id: UUID
			
@@ -150,6 +153,62 @@ class PostgresFilesHandler(Handler):
 
				             file_content = await self._read_lobject(conn, oid)
			
 
				             return file_name, io.BytesIO(file_content), size
			
 
				 
			
 
				+    async def retrieve_files_as_zip(
			
 
				+        self,
			
 
				+        document_ids: Optional[list[UUID]] = None,
			
 
				+        start_date: Optional[datetime] = None,
			
 
				+        end_date: Optional[datetime] = None,
			
 
				+    ) -> tuple[str, BinaryIO, int]:
			
 
				+        """Retrieve multiple files and return them as a zip file."""
			
 
				+
			
 
				+        query = f"""
			
 
				+        SELECT document_id, name, oid, size
			
 
				+        FROM {self._get_table_name(PostgresFilesHandler.TABLE_NAME)}
			
 
				+        WHERE 1=1
			
 
				+        """
			
 
				+        params: list = []
			
 
				+
			
 
				+        if document_ids:
			
 
				+            query += f" AND document_id = ANY(${len(params) + 1})"
			
 
				+            params.append([str(doc_id) for doc_id in document_ids])
			
 
				+
			
 
				+        if start_date:
			
 
				+            query += f" AND created_at >= ${len(params) + 1}"
			
 
				+            params.append(start_date)
			
 
				+
			
 
				+        if end_date:
			
 
				+            query += f" AND created_at <= ${len(params) + 1}"
			
 
				+            params.append(end_date)
			
 
				+
			
 
				+        query += " ORDER BY created_at DESC"
			
 
				+
			
 
				+        results = await self.connection_manager.fetch_query(query, params)
			
 
				+
			
 
				+        if not results:
			
 
				+            raise R2RException(
			
 
				+                status_code=404,
			
 
				+                message="No files found matching the specified criteria",
			
 
				+            )
			
 
				+
			
 
				+        zip_buffer = BytesIO()
			
 
				+        total_size = 0
			
 
				+
			
 
				+        async with self.connection_manager.pool.get_connection() as conn:  # type: ignore
			
 
				+            with ZipFile(zip_buffer, "w") as zip_file:
			
 
				+                for record in results:
			
 
				+                    file_content = await self._read_lobject(
			
 
				+                        conn, record["oid"]
			
 
				+                    )
			
 
				+
			
 
				+                    zip_file.writestr(record["name"], file_content)
			
 
				+                    total_size += record["size"]
			
 
				+
			
 
				+        zip_buffer.seek(0)
			
 
				+        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
			
 
				+        zip_filename = f"files_export_{timestamp}.zip"
			
 
				+
			
 
				+        return zip_filename, zip_buffer, zip_buffer.getbuffer().nbytes
			
 
				+
			
 
				     async def _read_lobject(self, conn, oid: int) -> bytes:
			
 
				         """Read content from a large object."""
			
 
				         file_data = io.BytesIO()
			
@@ -233,7 +292,7 @@ class PostgresFilesHandler(Handler):
 
				     ) -> list[dict]:
			
 
				         """Get an overview of stored files."""
			
 
				         conditions = []
			
 
				-        params: list[Union[str, list[str], int]] = []
			
 
				+        params: list[str | list[str] | int] = []
			
 
				         query = f"""
			
 
				         SELECT document_id, name, oid, size, type, created_at, updated_at
			
 
				         FROM {self._get_table_name(PostgresFilesHandler.TABLE_NAME)}
			
--- a/core/database/filters.py
+++ b/core/database/filters.py
@@ -1,6 +1,5 @@
 
				 import json
			
 
				-from typing import Any, Optional, Tuple, Union
			
 
				-from uuid import UUID
			
 
				+from typing import Any, Optional, Tuple
			
 
				 
			
 
				 COLUMN_VARS = [
			
 
				     "id",
			
@@ -46,7 +45,7 @@ class FilterCondition:
 
				 class FilterExpression:
			
 
				     def __init__(self, logical_op: Optional[str] = None):
			
 
				         self.logical_op = logical_op
			
 
				-        self.conditions: list[Union[FilterCondition, "FilterExpression"]] = []
			
 
				+        self.conditions: list[FilterCondition | "FilterExpression"] = []
			
 
				 
			
 
				 
			
 
				 class FilterParser:
			
@@ -410,13 +409,13 @@ class SQLFilterBuilder:
 
				 
			
 
				 def apply_filters(
			
 
				     filters: dict, params: list[Any], mode: str = "where_clause"
			
 
				-) -> str:
			
 
				+) -> tuple[str, list[Any]]:
			
 
				     """
			
 
				     Apply filters with consistent WHERE clause handling
			
 
				     """
			
 
				 
			
 
				     if not filters:
			
 
				-        return ""
			
 
				+        return "", params
			
 
				 
			
 
				     parser = FilterParser()
			
 
				     expr = parser.parse(filters)
			
--- a/core/database/graphs.py
+++ b/core/database/graphs.py
@@ -1,12 +1,13 @@
 
				 import asyncio
			
 
				 import contextlib
			
 
				+import csv
			
 
				 import datetime
			
 
				 import json
			
 
				 import logging
			
 
				 import os
			
 
				+import tempfile
			
 
				 import time
			
 
				-from enum import Enum
			
 
				-from typing import Any, AsyncGenerator, Optional, Tuple
			
 
				+from typing import IO, Any, AsyncGenerator, Optional, Tuple
			
 
				 from uuid import UUID
			
 
				 
			
 
				 import asyncpg
			
@@ -20,10 +21,10 @@ from core.base.abstractions import (
 
				     Graph,
			
 
				     KGCreationSettings,
			
 
				     KGEnrichmentSettings,
			
 
				-    KGEntityDeduplicationSettings,
			
 
				     KGExtractionStatus,
			
 
				     R2RException,
			
 
				     Relationship,
			
 
				+    StoreType,
			
 
				     VectorQuantizationType,
			
 
				 )
			
 
				 from core.base.api.models import GraphResponse
			
@@ -37,12 +38,6 @@ from core.base.utils import (
 
				 from .base import PostgresConnectionManager
			
 
				 from .collections import PostgresCollectionsHandler
			
 
				 
			
 
				-
			
 
				-class StoreType(str, Enum):
			
 
				-    GRAPHS = "graphs"
			
 
				-    DOCUMENTS = "documents"
			
 
				-
			
 
				-
			
 
				 logger = logging.getLogger()
			
 
				 
			
 
				 
			
@@ -59,9 +54,7 @@ class PostgresEntitiesHandler(Handler):
 
				 
			
 
				     def _get_entity_table_for_store(self, store_type: StoreType) -> str:
			
 
				         """Get the appropriate table name for the store type."""
			
 
				-        if isinstance(store_type, StoreType):
			
 
				-            store_type = store_type.value
			
 
				-        return f"{store_type}_entities"
			
 
				+        return f"{store_type.value}_entities"
			
 
				 
			
 
				     def _get_parent_constraint(self, store_type: StoreType) -> str:
			
 
				         """Get the appropriate foreign key constraint for the store type."""
			
@@ -376,6 +369,115 @@ class PostgresEntitiesHandler(Handler):
 
				                     404,
			
 
				                 )
			
 
				 
			
 
				+    async def export_to_csv(
			
 
				+        self,
			
 
				+        parent_id: UUID,
			
 
				+        store_type: StoreType,
			
 
				+        columns: Optional[list[str]] = None,
			
 
				+        filters: Optional[dict] = None,
			
 
				+        include_header: bool = True,
			
 
				+    ) -> tuple[str, IO]:
			
 
				+        """
			
 
				+        Creates a CSV file from the PostgreSQL data and returns the path to the temp file.
			
 
				+        """
			
 
				+        valid_columns = {
			
 
				+            "id",
			
 
				+            "name",
			
 
				+            "category",
			
 
				+            "description",
			
 
				+            "parent_id",
			
 
				+            "chunk_ids",
			
 
				+            "metadata",
			
 
				+            "created_at",
			
 
				+            "updated_at",
			
 
				+        }
			
 
				+
			
 
				+        if not columns:
			
 
				+            columns = list(valid_columns)
			
 
				+        elif invalid_cols := set(columns) - valid_columns:
			
 
				+            raise ValueError(f"Invalid columns: {invalid_cols}")
			
 
				+
			
 
				+        select_stmt = f"""
			
 
				+            SELECT
			
 
				+                id::text,
			
 
				+                name,
			
 
				+                category,
			
 
				+                description,
			
 
				+                parent_id::text,
			
 
				+                chunk_ids::text,
			
 
				+                metadata::text,
			
 
				+                to_char(created_at, 'YYYY-MM-DD HH24:MI:SS') AS created_at,
			
 
				+                to_char(updated_at, 'YYYY-MM-DD HH24:MI:SS') AS updated_at
			
 
				+            FROM {self._get_table_name(self._get_entity_table_for_store(store_type))}
			
 
				+        """
			
 
				+
			
 
				+        conditions = ["parent_id = $1"]
			
 
				+        params: list[Any] = [parent_id]
			
 
				+        param_index = 2
			
 
				+
			
 
				+        if filters:
			
 
				+            for field, value in filters.items():
			
 
				+                if field not in valid_columns:
			
 
				+                    continue
			
 
				+
			
 
				+                if isinstance(value, dict):
			
 
				+                    for op, val in value.items():
			
 
				+                        if op == "$eq":
			
 
				+                            conditions.append(f"{field} = ${param_index}")
			
 
				+                            params.append(val)
			
 
				+                            param_index += 1
			
 
				+                        elif op == "$gt":
			
 
				+                            conditions.append(f"{field} > ${param_index}")
			
 
				+                            params.append(val)
			
 
				+                            param_index += 1
			
 
				+                        elif op == "$lt":
			
 
				+                            conditions.append(f"{field} < ${param_index}")
			
 
				+                            params.append(val)
			
 
				+                            param_index += 1
			
 
				+                else:
			
 
				+                    # Direct equality
			
 
				+                    conditions.append(f"{field} = ${param_index}")
			
 
				+                    params.append(value)
			
 
				+                    param_index += 1
			
 
				+
			
 
				+        if conditions:
			
 
				+            select_stmt = f"{select_stmt} WHERE {' AND '.join(conditions)}"
			
 
				+
			
 
				+        select_stmt = f"{select_stmt} ORDER BY created_at DESC"
			
 
				+
			
 
				+        temp_file = None
			
 
				+        try:
			
 
				+            temp_file = tempfile.NamedTemporaryFile(
			
 
				+                mode="w", delete=True, suffix=".csv"
			
 
				+            )
			
 
				+            writer = csv.writer(temp_file, quoting=csv.QUOTE_ALL)
			
 
				+
			
 
				+            async with self.connection_manager.pool.get_connection() as conn:  # type: ignore
			
 
				+                async with conn.transaction():
			
 
				+                    cursor = await conn.cursor(select_stmt, *params)
			
 
				+
			
 
				+                    if include_header:
			
 
				+                        writer.writerow(columns)
			
 
				+
			
 
				+                    chunk_size = 1000
			
 
				+                    while True:
			
 
				+                        rows = await cursor.fetch(chunk_size)
			
 
				+                        if not rows:
			
 
				+                            break
			
 
				+                        for row in rows:
			
 
				+                            writer.writerow(row)
			
 
				+
			
 
				+            temp_file.flush()
			
 
				+            return temp_file.name, temp_file
			
 
				+
			
 
				+        except Exception as e:
			
 
				+            if temp_file:
			
 
				+                temp_file.close()
			
 
				+            raise HTTPException(
			
 
				+                status_code=500,
			
 
				+                detail=f"Failed to export data: {str(e)}",
			
 
				+            ) from e
			
 
				+
			
 
				 
			
 
				 class PostgresRelationshipsHandler(Handler):
			
 
				     def __init__(self, *args: Any, **kwargs: Any) -> None:
			
@@ -390,9 +492,7 @@ class PostgresRelationshipsHandler(Handler):
 
				 
			
 
				     def _get_relationship_table_for_store(self, store_type: StoreType) -> str:
			
 
				         """Get the appropriate table name for the store type."""
			
 
				-        if isinstance(store_type, StoreType):
			
 
				-            store_type = store_type.value
			
 
				-        return f"{store_type}_relationships"
			
 
				+        return f"{store_type.value}_relationships"
			
 
				 
			
 
				     def _get_parent_constraint(self, store_type: StoreType) -> str:
			
 
				         """Get the appropriate foreign key constraint for the store type."""
			
@@ -774,6 +874,123 @@ class PostgresRelationshipsHandler(Handler):
 
				                     404,
			
 
				                 )
			
 
				 
			
 
				+    async def export_to_csv(
			
 
				+        self,
			
 
				+        parent_id: UUID,
			
 
				+        store_type: StoreType,
			
 
				+        columns: Optional[list[str]] = None,
			
 
				+        filters: Optional[dict] = None,
			
 
				+        include_header: bool = True,
			
 
				+    ) -> tuple[str, IO]:
			
 
				+        """
			
 
				+        Creates a CSV file from the PostgreSQL data and returns the path to the temp file.
			
 
				+        """
			
 
				+        valid_columns = {
			
 
				+            "id",
			
 
				+            "subject",
			
 
				+            "predicate",
			
 
				+            "object",
			
 
				+            "description",
			
 
				+            "subject_id",
			
 
				+            "object_id",
			
 
				+            "weight",
			
 
				+            "chunk_ids",
			
 
				+            "parent_id",
			
 
				+            "metadata",
			
 
				+            "created_at",
			
 
				+            "updated_at",
			
 
				+        }
			
 
				+
			
 
				+        if not columns:
			
 
				+            columns = list(valid_columns)
			
 
				+        elif invalid_cols := set(columns) - valid_columns:
			
 
				+            raise ValueError(f"Invalid columns: {invalid_cols}")
			
 
				+
			
 
				+        select_stmt = f"""
			
 
				+            SELECT
			
 
				+                id::text,
			
 
				+                subject,
			
 
				+                predicate,
			
 
				+                object,
			
 
				+                description,
			
 
				+                subject_id::text,
			
 
				+                object_id::text,
			
 
				+                weight,
			
 
				+                chunk_ids::text,
			
 
				+                parent_id::text,
			
 
				+                metadata::text,
			
 
				+                to_char(created_at, 'YYYY-MM-DD HH24:MI:SS') AS created_at,
			
 
				+                to_char(updated_at, 'YYYY-MM-DD HH24:MI:SS') AS updated_at
			
 
				+            FROM {self._get_table_name(self._get_relationship_table_for_store(store_type))}
			
 
				+        """
			
 
				+
			
 
				+        conditions = ["parent_id = $1"]
			
 
				+        params: list[Any] = [parent_id]
			
 
				+        param_index = 2
			
 
				+
			
 
				+        if filters:
			
 
				+            for field, value in filters.items():
			
 
				+                if field not in valid_columns:
			
 
				+                    continue
			
 
				+
			
 
				+                if isinstance(value, dict):
			
 
				+                    for op, val in value.items():
			
 
				+                        if op == "$eq":
			
 
				+                            conditions.append(f"{field} = ${param_index}")
			
 
				+                            params.append(val)
			
 
				+                            param_index += 1
			
 
				+                        elif op == "$gt":
			
 
				+                            conditions.append(f"{field} > ${param_index}")
			
 
				+                            params.append(val)
			
 
				+                            param_index += 1
			
 
				+                        elif op == "$lt":
			
 
				+                            conditions.append(f"{field} < ${param_index}")
			
 
				+                            params.append(val)
			
 
				+                            param_index += 1
			
 
				+                else:
			
 
				+                    # Direct equality
			
 
				+                    conditions.append(f"{field} = ${param_index}")
			
 
				+                    params.append(value)
			
 
				+                    param_index += 1
			
 
				+
			
 
				+        if conditions:
			
 
				+            select_stmt = f"{select_stmt} WHERE {' AND '.join(conditions)}"
			
 
				+
			
 
				+        select_stmt = f"{select_stmt} ORDER BY created_at DESC"
			
 
				+
			
 
				+        temp_file = None
			
 
				+        try:
			
 
				+            temp_file = tempfile.NamedTemporaryFile(
			
 
				+                mode="w", delete=True, suffix=".csv"
			
 
				+            )
			
 
				+            writer = csv.writer(temp_file, quoting=csv.QUOTE_ALL)
			
 
				+
			
 
				+            async with self.connection_manager.pool.get_connection() as conn:  # type: ignore
			
 
				+                async with conn.transaction():
			
 
				+                    cursor = await conn.cursor(select_stmt, *params)
			
 
				+
			
 
				+                    if include_header:
			
 
				+                        writer.writerow(columns)
			
 
				+
			
 
				+                    chunk_size = 1000
			
 
				+                    while True:
			
 
				+                        rows = await cursor.fetch(chunk_size)
			
 
				+                        if not rows:
			
 
				+                            break
			
 
				+                        for row in rows:
			
 
				+                            writer.writerow(row)
			
 
				+
			
 
				+            temp_file.flush()
			
 
				+            return temp_file.name, temp_file
			
 
				+
			
 
				+        except Exception as e:
			
 
				+            if temp_file:
			
 
				+                temp_file.close()
			
 
				+            raise HTTPException(
			
 
				+                status_code=500,
			
 
				+                detail=f"Failed to export data: {str(e)}",
			
 
				+            ) from e
			
 
				+
			
 
				 
			
 
				 class PostgresCommunitiesHandler(Handler):
			
 
				     def __init__(self, *args: Any, **kwargs: Any) -> None:
			
@@ -946,7 +1163,7 @@ class PostgresCommunitiesHandler(Handler):
 
				     async def delete(
			
 
				         self,
			
 
				         parent_id: UUID,
			
 
				-        community_id: UUID = None,
			
 
				+        community_id: UUID,
			
 
				     ) -> None:
			
 
				         table_name = "graphs_communities"
			
 
				 
			
@@ -964,7 +1181,7 @@ class PostgresCommunitiesHandler(Handler):
 
				             raise HTTPException(
			
 
				                 status_code=500,
			
 
				                 detail=f"An error occurred while deleting the community: {e}",
			
 
				-            )
			
 
				+            ) from e
			
 
				 
			
 
				     async def delete_all_communities(
			
 
				         self,
			
@@ -986,7 +1203,7 @@ class PostgresCommunitiesHandler(Handler):
 
				             raise HTTPException(
			
 
				                 status_code=500,
			
 
				                 detail=f"An error occurred while deleting communities: {e}",
			
 
				-            )
			
 
				+            ) from e
			
 
				 
			
 
				     async def get(
			
 
				         self,
			
@@ -1059,6 +1276,123 @@ class PostgresCommunitiesHandler(Handler):
 
				 
			
 
				         return communities, count
			
 
				 
			
 
				+    async def export_to_csv(
			
 
				+        self,
			
 
				+        parent_id: UUID,
			
 
				+        store_type: StoreType,
			
 
				+        columns: Optional[list[str]] = None,
			
 
				+        filters: Optional[dict] = None,
			
 
				+        include_header: bool = True,
			
 
				+    ) -> tuple[str, IO]:
			
 
				+        """
			
 
				+        Creates a CSV file from the PostgreSQL data and returns the path to the temp file.
			
 
				+        """
			
 
				+        valid_columns = {
			
 
				+            "id",
			
 
				+            "collection_id",
			
 
				+            "community_id",
			
 
				+            "level",
			
 
				+            "name",
			
 
				+            "summary",
			
 
				+            "findings",
			
 
				+            "rating",
			
 
				+            "rating_explanation",
			
 
				+            "created_at",
			
 
				+            "updated_at",
			
 
				+            "metadata",
			
 
				+        }
			
 
				+
			
 
				+        if not columns:
			
 
				+            columns = list(valid_columns)
			
 
				+        elif invalid_cols := set(columns) - valid_columns:
			
 
				+            raise ValueError(f"Invalid columns: {invalid_cols}")
			
 
				+
			
 
				+        table_name = "graphs_communities"
			
 
				+
			
 
				+        select_stmt = f"""
			
 
				+            SELECT
			
 
				+                id::text,
			
 
				+                collection_id::text,
			
 
				+                community_id::text,
			
 
				+                level,
			
 
				+                name,
			
 
				+                summary,
			
 
				+                findings::text,
			
 
				+                rating,
			
 
				+                rating_explanation,
			
 
				+                to_char(created_at, 'YYYY-MM-DD HH24:MI:SS') AS created_at,
			
 
				+                to_char(updated_at, 'YYYY-MM-DD HH24:MI:SS') AS updated_at,
			
 
				+                metadata::text
			
 
				+            FROM {self._get_table_name(table_name)}
			
 
				+        """
			
 
				+
			
 
				+        conditions = ["collection_id = $1"]
			
 
				+        params: list[Any] = [parent_id]
			
 
				+        param_index = 2
			
 
				+
			
 
				+        if filters:
			
 
				+            for field, value in filters.items():
			
 
				+                if field not in valid_columns:
			
 
				+                    continue
			
 
				+
			
 
				+                if isinstance(value, dict):
			
 
				+                    for op, val in value.items():
			
 
				+                        if op == "$eq":
			
 
				+                            conditions.append(f"{field} = ${param_index}")
			
 
				+                            params.append(val)
			
 
				+                            param_index += 1
			
 
				+                        elif op == "$gt":
			
 
				+                            conditions.append(f"{field} > ${param_index}")
			
 
				+                            params.append(val)
			
 
				+                            param_index += 1
			
 
				+                        elif op == "$lt":
			
 
				+                            conditions.append(f"{field} < ${param_index}")
			
 
				+                            params.append(val)
			
 
				+                            param_index += 1
			
 
				+                else:
			
 
				+                    # Direct equality
			
 
				+                    conditions.append(f"{field} = ${param_index}")
			
 
				+                    params.append(value)
			
 
				+                    param_index += 1
			
 
				+
			
 
				+        if conditions:
			
 
				+            select_stmt = f"{select_stmt} WHERE {' AND '.join(conditions)}"
			
 
				+
			
 
				+        select_stmt = f"{select_stmt} ORDER BY created_at DESC"
			
 
				+
			
 
				+        temp_file = None
			
 
				+        try:
			
 
				+            temp_file = tempfile.NamedTemporaryFile(
			
 
				+                mode="w", delete=True, suffix=".csv"
			
 
				+            )
			
 
				+            writer = csv.writer(temp_file, quoting=csv.QUOTE_ALL)
			
 
				+
			
 
				+            async with self.connection_manager.pool.get_connection() as conn:  # type: ignore
			
 
				+                async with conn.transaction():
			
 
				+                    cursor = await conn.cursor(select_stmt, *params)
			
 
				+
			
 
				+                    if include_header:
			
 
				+                        writer.writerow(columns)
			
 
				+
			
 
				+                    chunk_size = 1000
			
 
				+                    while True:
			
 
				+                        rows = await cursor.fetch(chunk_size)
			
 
				+                        if not rows:
			
 
				+                            break
			
 
				+                        for row in rows:
			
 
				+                            writer.writerow(row)
			
 
				+
			
 
				+            temp_file.flush()
			
 
				+            return temp_file.name, temp_file
			
 
				+
			
 
				+        except Exception as e:
			
 
				+            if temp_file:
			
 
				+                temp_file.close()
			
 
				+            raise HTTPException(
			
 
				+                status_code=500,
			
 
				+                detail=f"Failed to export data: {str(e)}",
			
 
				+            ) from e
			
 
				+
			
 
				 
			
 
				 class PostgresGraphsHandler(Handler):
			
 
				     """Handler for Knowledge Graph METHODS in PostgreSQL."""
			
@@ -1572,72 +1906,6 @@ class PostgresGraphsHandler(Handler):
 
				             + _get_str_estimation_output(estimated_time),
			
 
				         }
			
 
				 
			
 
				-    async def get_deduplication_estimate(
			
 
				-        self,
			
 
				-        collection_id: UUID,
			
 
				-        kg_deduplication_settings: KGEntityDeduplicationSettings,
			
 
				-    ):
			
 
				-        """Get the estimated cost and time for deduplicating entities in a KG."""
			
 
				-        try:
			
 
				-            query = f"""
			
 
				-                SELECT name, count(name)
			
 
				-                FROM {self._get_table_name("entity")}
			
 
				-                WHERE document_id = ANY(
			
 
				-                    SELECT document_id FROM {self._get_table_name("documents")}
			
 
				-                    WHERE $1 = ANY(collection_ids)
			
 
				-                )
			
 
				-                GROUP BY name
			
 
				-                HAVING count(name) >= 5
			
 
				-            """
			
 
				-            entities = await self.connection_manager.fetch_query(
			
 
				-                query, [collection_id]
			
 
				-            )
			
 
				-            num_entities = len(entities)
			
 
				-
			
 
				-            estimated_llm_calls = (num_entities, num_entities)
			
 
				-            tokens_in_millions = (
			
 
				-                estimated_llm_calls[0] * 1000 / 1000000,
			
 
				-                estimated_llm_calls[1] * 5000 / 1000000,
			
 
				-            )
			
 
				-            cost_per_million = llm_cost_per_million_tokens(
			
 
				-                kg_deduplication_settings.generation_config.model
			
 
				-            )
			
 
				-            estimated_cost = (
			
 
				-                tokens_in_millions[0] * cost_per_million,
			
 
				-                tokens_in_millions[1] * cost_per_million,
			
 
				-            )
			
 
				-            estimated_time = (
			
 
				-                tokens_in_millions[0] * 10 / 60,
			
 
				-                tokens_in_millions[1] * 10 / 60,
			
 
				-            )
			
 
				-
			
 
				-            return {
			
 
				-                "message": "Ran Deduplication Estimate (not the actual run). Note that these are estimated ranges.",
			
 
				-                "num_entities": num_entities,
			
 
				-                "estimated_llm_calls": _get_str_estimation_output(
			
 
				-                    estimated_llm_calls
			
 
				-                ),
			
 
				-                "estimated_total_in_out_tokens_in_millions": _get_str_estimation_output(
			
 
				-                    tokens_in_millions
			
 
				-                ),
			
 
				-                "estimated_cost_in_usd": _get_str_estimation_output(
			
 
				-                    estimated_cost
			
 
				-                ),
			
 
				-                "estimated_total_time_in_minutes": _get_str_estimation_output(
			
 
				-                    estimated_time
			
 
				-                ),
			
 
				-            }
			
 
				-        except UndefinedTableError:
			
 
				-            raise R2RException(
			
 
				-                "Entity embedding table not found. Please run `extract-triples` first.",
			
 
				-                404,
			
 
				-            )
			
 
				-        except Exception as e:
			
 
				-            logger.error(f"Error in get_deduplication_estimate: {str(e)}")
			
 
				-            raise HTTPException(
			
 
				-                500, "Error fetching deduplication estimate."
			
 
				-            ) from e
			
 
				-
			
 
				     async def get_entities(
			
 
				         self,
			
 
				         parent_id: UUID,
			
@@ -1987,7 +2255,6 @@ class PostgresGraphsHandler(Handler):
 
				             QUERY, [tuple(non_null_attrs.values())]
			
 
				         )
			
 
				 
			
 
				-    # async def delete(self, collection_id: UUID, cascade: bool = False) -> None:
			
 
				     async def delete(self, collection_id: UUID) -> None:
			
 
				         graphs = await self.get(graph_id=collection_id, offset=0, limit=-1)
			
 
				 
			
@@ -2009,33 +2276,6 @@ class PostgresGraphsHandler(Handler):
 
				             DELETE FROM {self._get_table_name("graphs")} WHERE collection_id = $1
			
 
				         """
			
 
				 
			
 
				-        # if cascade:
			
 
				-        #     documents = []
			
 
				-        #     document_response = (
			
 
				-        #         await self.collections_handler.documents_in_collection(
			
 
				-        #             offset=0,
			
 
				-        #             limit=100,
			
 
				-        #             collection_id=collection_id,
			
 
				-        #         )
			
 
				-        #     )["results"]
			
 
				-        #     documents.extend(document_response)
			
 
				-        #     document_ids = [doc.id for doc in documents]
			
 
				-        #     for document_id in document_ids:
			
 
				-        #         self.entities.delete(
			
 
				-        #             parent_id=document_id, store_type=StoreType.DOCUMENTS
			
 
				-        #         )
			
 
				-        #         self.relationships.delete(
			
 
				-        #             parent_id=document_id, store_type=StoreType.DOCUMENTS
			
 
				-        #         )
			
 
				-
			
 
				-        #     # setting the extraction status to PENDING for the documents in this collection.
			
 
				-        #     QUERY = f"""
			
 
				-        #         UPDATE {self._get_table_name("documents")} SET extraction_status = $1 WHERE $2::uuid = ANY(collection_ids)
			
 
				-        #     """
			
 
				-        #     await self.connection_manager.execute_query(
			
 
				-        #         QUERY, [KGExtractionStatus.PENDING, collection_id]
			
 
				-        #     )
			
 
				-
			
 
				     async def perform_graph_clustering(
			
 
				         self,
			
 
				         collection_id: UUID,
			
@@ -2224,13 +2464,13 @@ class PostgresGraphsHandler(Handler):
 
				                 relationship_ids_cache.setdefault(relationship.subject, [])
			
 
				                 if relationship.id is not None:
			
 
				                     relationship_ids_cache[relationship.subject].append(
			
 
				-                        relationship.id
			
 
				+                        int(relationship.id)
			
 
				                     )
			
 
				             if relationship.object is not None:
			
 
				                 relationship_ids_cache.setdefault(relationship.object, [])
			
 
				                 if relationship.id is not None:
			
 
				                     relationship_ids_cache[relationship.object].append(
			
 
				-                        relationship.id
			
 
				+                        int(relationship.id)
			
 
				                     )
			
 
				 
			
 
				         return relationship_ids_cache
			
--- a/core/database/limits.py
+++ b/core/database/limits.py
@@ -4,7 +4,7 @@ from typing import Optional
 
				 from uuid import UUID
			
 
				 
			
 
				 from core.base import Handler
			
 
				-from shared.abstractions import User  # your domain user model
			
 
				+from shared.abstractions import User
			
 
				 
			
 
				 from ..base.providers.database import DatabaseConfig, LimitSettings
			
 
				 from .base import PostgresConnectionManager
			
@@ -87,6 +87,38 @@ class PostgresLimitsHandler(Handler):
 
				         )
			
 
				         return await self._count_requests(user_id, None, start_of_month)
			
 
				 
			
 
				+        return await self._count_requests(
			
 
				+            user_id, route=None, since=start_of_month
			
 
				+        )
			
 
				+
			
 
				+    def _determine_limits_for(
			
 
				+        self, user_id: UUID, route: str
			
 
				+    ) -> LimitSettings:
			
 
				+        # Start with base limits
			
 
				+        limits = self.config.limits
			
 
				+
			
 
				+        # Route-specific limits - directly override if present
			
 
				+        if route_limits := self.config.route_limits.get(route):
			
 
				+            # Only override non-None values from route_limits
			
 
				+            if route_limits.global_per_min is not None:
			
 
				+                limits.global_per_min = route_limits.global_per_min
			
 
				+            if route_limits.route_per_min is not None:
			
 
				+                limits.route_per_min = route_limits.route_per_min
			
 
				+            if route_limits.monthly_limit is not None:
			
 
				+                limits.monthly_limit = route_limits.monthly_limit
			
 
				+
			
 
				+        # User-specific limits - directly override if present
			
 
				+        if user_limits := self.config.user_limits.get(user_id):
			
 
				+            # Only override non-None values from user_limits
			
 
				+            if user_limits.global_per_min is not None:
			
 
				+                limits.global_per_min = user_limits.global_per_min
			
 
				+            if user_limits.route_per_min is not None:
			
 
				+                limits.route_per_min = user_limits.route_per_min
			
 
				+            if user_limits.monthly_limit is not None:
			
 
				+                limits.monthly_limit = user_limits.monthly_limit
			
 
				+
			
 
				+        return limits
			
 
				+
			
 
				     async def check_limits(self, user: User, route: str):
			
 
				         """
			
 
				         Perform rate limit checks for a user on a specific route.
			
--- a/core/database/postgres.py
+++ b/core/database/postgres.py
@@ -1,7 +1,6 @@
 
				 # TODO: Clean this up and make it more congruent across the vector database and the relational database.
			
 
				 import logging
			
 
				 import os
			
 
				-import warnings
			
 
				 from typing import TYPE_CHECKING, Any, Optional
			
 
				 
			
 
				 from ..base.abstractions import VectorQuantizationType
			
@@ -28,18 +27,11 @@ from .tokens import PostgresTokensHandler
 
				 from .users import PostgresUserHandler
			
 
				 
			
 
				 if TYPE_CHECKING:
			
 
				-    from ..providers.crypto import NaClCryptoProvider
			
 
				-
			
 
				-logger = logging.getLogger()
			
 
				+    from ..providers.crypto import BCryptCryptoProvider, NaClCryptoProvider
			
 
				 
			
 
				+    CryptoProviderType = BCryptCryptoProvider | NaClCryptoProvider
			
 
				 
			
 
				-def get_env_var(new_var, old_var, config_value):
			
 
				-    value = config_value or os.getenv(new_var) or os.getenv(old_var)
			
 
				-    if os.getenv(old_var) and not os.getenv(new_var):
			
 
				-        warnings.warn(
			
 
				-            f"{old_var} is deprecated and support for it will be removed in release 3.5.0. Use {new_var} instead."
			
 
				-        )
			
 
				-    return value
			
 
				+logger = logging.getLogger()
			
 
				 
			
 
				 
			
 
				 class PostgresDatabaseProvider(DatabaseProvider):
			
@@ -57,7 +49,7 @@ class PostgresDatabaseProvider(DatabaseProvider):
 
				     dimension: int
			
 
				     conn: Optional[Any]
			
 
				 
			
 
				-    crypto_provider: "NaClCryptoProvider"
			
 
				+    crypto_provider: "CryptoProviderType"
			
 
				     postgres_configuration_settings: PostgresConfigurationSettings
			
 
				     default_collection_name: str
			
 
				     default_collection_description: str
			
@@ -81,7 +73,7 @@ class PostgresDatabaseProvider(DatabaseProvider):
 
				         self,
			
 
				         config: DatabaseConfig,
			
 
				         dimension: int,
			
 
				-        crypto_provider: "NaClCryptoProvider",
			
 
				+        crypto_provider: "BCryptCryptoProvider | NaClCryptoProvider",
			
 
				         quantization_type: VectorQuantizationType = VectorQuantizationType.FP32,
			
 
				         *args,
			
 
				         **kwargs,
			
@@ -89,29 +81,26 @@ class PostgresDatabaseProvider(DatabaseProvider):
 
				         super().__init__(config)
			
 
				 
			
 
				         env_vars = [
			
 
				-            ("user", "R2R_POSTGRES_USER", "POSTGRES_USER"),
			
 
				-            ("password", "R2R_POSTGRES_PASSWORD", "POSTGRES_PASSWORD"),
			
 
				-            ("host", "R2R_POSTGRES_HOST", "POSTGRES_HOST"),
			
 
				-            ("port", "R2R_POSTGRES_PORT", "POSTGRES_PORT"),
			
 
				-            ("db_name", "R2R_POSTGRES_DBNAME", "POSTGRES_DBNAME"),
			
 
				+            ("user", "R2R_POSTGRES_USER"),
			
 
				+            ("password", "R2R_POSTGRES_PASSWORD"),
			
 
				+            ("host", "R2R_POSTGRES_HOST"),
			
 
				+            ("port", "R2R_POSTGRES_PORT"),
			
 
				+            ("db_name", "R2R_POSTGRES_DBNAME"),
			
 
				         ]
			
 
				 
			
 
				-        for attr, new_var, old_var in env_vars:
			
 
				-            if value := get_env_var(new_var, old_var, getattr(config, attr)):
			
 
				+        for attr, env_var in env_vars:
			
 
				+            if value := (getattr(config, attr) or os.getenv(env_var)):
			
 
				                 setattr(self, attr, value)
			
 
				             else:
			
 
				                 raise ValueError(
			
 
				-                    f"Error, please set a valid {new_var} environment variable or set a '{attr}' in the 'database' settings of your `r2r.toml`."
			
 
				+                    f"Error, please set a valid {env_var} environment variable or set a '{attr}' in the 'database' settings of your `r2r.toml`."
			
 
				                 )
			
 
				 
			
 
				         self.port = int(self.port)
			
 
				 
			
 
				         self.project_name = (
			
 
				-            get_env_var(
			
 
				-                "R2R_PROJECT_NAME",
			
 
				-                "R2R_POSTGRES_PROJECT_NAME",  # Remove this after deprecation
			
 
				-                config.app.project_name,
			
 
				-            )
			
 
				+            config.app.project_name
			
 
				+            or os.getenv("R2R_PROJECT_NAME")
			
 
				             or "r2r_default"
			
 
				         )
			
 
				 
			
--- a/core/database/users.py
+++ b/core/database/users.py
@@ -1,6 +1,8 @@
 
				+import csv
			
 
				 import json
			
 
				+import tempfile
			
 
				 from datetime import datetime
			
 
				-from typing import Any, Dict, List, Optional
			
 
				+from typing import IO, Optional
			
 
				 from uuid import UUID
			
 
				 
			
 
				 from fastapi import HTTPException
			
@@ -372,7 +374,7 @@ class PostgresUserHandler(Handler):
 
				             query, [new_hashed_password, id]
			
 
				         )
			
 
				 
			
 
				-    async def get_all_users(self) -> List[User]:
			
 
				+    async def get_all_users(self) -> list[User]:
			
 
				         """Get all users with minimal information."""
			
 
				         query, params = (
			
 
				             QueryBuilder(self._get_table_name(self.TABLE_NAME))
			
@@ -897,3 +899,111 @@ class PostgresUserHandler(Handler):
 
				         if result is None:
			
 
				             raise R2RException(status_code=404, message="API key not found")
			
 
				         return True
			
 
				+
			
 
				+    async def export_to_csv(
			
 
				+        self,
			
 
				+        columns: Optional[list[str]] = None,
			
 
				+        filters: Optional[dict] = None,
			
 
				+        include_header: bool = True,
			
 
				+    ) -> tuple[str, IO]:
			
 
				+        """
			
 
				+        Creates a CSV file from the PostgreSQL data and returns the path to the temp file.
			
 
				+        """
			
 
				+        valid_columns = {
			
 
				+            "id",
			
 
				+            "email",
			
 
				+            "is_superuser",
			
 
				+            "is_active",
			
 
				+            "is_verified",
			
 
				+            "name",
			
 
				+            "bio",
			
 
				+            "collection_ids",
			
 
				+            "created_at",
			
 
				+            "updated_at",
			
 
				+        }
			
 
				+
			
 
				+        if not columns:
			
 
				+            columns = list(valid_columns)
			
 
				+        elif invalid_cols := set(columns) - valid_columns:
			
 
				+            raise ValueError(f"Invalid columns: {invalid_cols}")
			
 
				+
			
 
				+        select_stmt = f"""
			
 
				+            SELECT
			
 
				+                id::text,
			
 
				+                email,
			
 
				+                is_superuser,
			
 
				+                is_active,
			
 
				+                is_verified,
			
 
				+                name,
			
 
				+                bio,
			
 
				+                to_char(created_at, 'YYYY-MM-DD HH24:MI:SS') AS created_at,
			
 
				+                to_char(updated_at, 'YYYY-MM-DD HH24:MI:SS') AS updated_at
			
 
				+            FROM {self._get_table_name(self.TABLE_NAME)}
			
 
				+        """
			
 
				+
			
 
				+        params = []
			
 
				+        if filters:
			
 
				+            conditions = []
			
 
				+            param_index = 1
			
 
				+
			
 
				+            for field, value in filters.items():
			
 
				+                if field not in valid_columns:
			
 
				+                    continue
			
 
				+
			
 
				+                if isinstance(value, dict):
			
 
				+                    for op, val in value.items():
			
 
				+                        if op == "$eq":
			
 
				+                            conditions.append(f"{field} = ${param_index}")
			
 
				+                            params.append(val)
			
 
				+                            param_index += 1
			
 
				+                        elif op == "$gt":
			
 
				+                            conditions.append(f"{field} > ${param_index}")
			
 
				+                            params.append(val)
			
 
				+                            param_index += 1
			
 
				+                        elif op == "$lt":
			
 
				+                            conditions.append(f"{field} < ${param_index}")
			
 
				+                            params.append(val)
			
 
				+                            param_index += 1
			
 
				+                else:
			
 
				+                    # Direct equality
			
 
				+                    conditions.append(f"{field} = ${param_index}")
			
 
				+                    params.append(value)
			
 
				+                    param_index += 1
			
 
				+
			
 
				+            if conditions:
			
 
				+                select_stmt = f"{select_stmt} WHERE {' AND '.join(conditions)}"
			
 
				+
			
 
				+        select_stmt = f"{select_stmt} ORDER BY created_at DESC"
			
 
				+
			
 
				+        temp_file = None
			
 
				+        try:
			
 
				+            temp_file = tempfile.NamedTemporaryFile(
			
 
				+                mode="w", delete=True, suffix=".csv"
			
 
				+            )
			
 
				+            writer = csv.writer(temp_file, quoting=csv.QUOTE_ALL)
			
 
				+
			
 
				+            async with self.connection_manager.pool.get_connection() as conn:  # type: ignore
			
 
				+                async with conn.transaction():
			
 
				+                    cursor = await conn.cursor(select_stmt, *params)
			
 
				+
			
 
				+                    if include_header:
			
 
				+                        writer.writerow(columns)
			
 
				+
			
 
				+                    chunk_size = 1000
			
 
				+                    while True:
			
 
				+                        rows = await cursor.fetch(chunk_size)
			
 
				+                        if not rows:
			
 
				+                            break
			
 
				+                        for row in rows:
			
 
				+                            writer.writerow(row)
			
 
				+
			
 
				+            temp_file.flush()
			
 
				+            return temp_file.name, temp_file
			
 
				+
			
 
				+        except Exception as e:
			
 
				+            if temp_file:
			
 
				+                temp_file.close()
			
 
				+            raise HTTPException(
			
 
				+                status_code=500,
			
 
				+                detail=f"Failed to export data: {str(e)}",
			
 
				+            )
			
--- a/core/database/vecs/adapter/base.py
+++ b/core/database/vecs/adapter/base.py
@@ -8,14 +8,14 @@ All public classes, enums, and functions are re-exported by `vecs.adapters` modu
 
				 
			
 
				 from abc import ABC, abstractmethod
			
 
				 from enum import Enum
			
 
				-from typing import Any, Generator, Iterable, Optional, Tuple, Union
			
 
				+from typing import Any, Generator, Iterable, Optional, Tuple
			
 
				 from uuid import UUID
			
 
				 
			
 
				 from vecs.exc import ArgError
			
 
				 
			
 
				-MetadataValues = Union[str, int, float, bool, list[str]]
			
 
				+MetadataValues = str | int | float | bool | list[str]
			
 
				 Metadata = dict[str, MetadataValues]
			
 
				-Numeric = Union[int, float, complex]
			
 
				+Numeric = int | float | complex
			
 
				 
			
 
				 Record = Tuple[
			
 
				     UUID,
			
--- a/core/main/abstractions.py
+++ b/core/main/abstractions.py
@@ -10,8 +10,6 @@ from core.pipes import (
 
				     EmbeddingPipe,
			
 
				     GraphClusteringPipe,
			
 
				     GraphCommunitySummaryPipe,
			
 
				-    GraphDeduplicationPipe,
			
 
				-    GraphDeduplicationSummaryPipe,
			
 
				     GraphDescriptionPipe,
			
 
				     GraphExtractionPipe,
			
 
				     GraphSearchSearchPipe,
			
@@ -76,8 +74,6 @@ class R2RPipes(BaseModel):
 
				     graph_storage_pipe: GraphStoragePipe
			
 
				     graph_description_pipe: GraphDescriptionPipe
			
 
				     graph_clustering_pipe: GraphClusteringPipe
			
 
				-    graph_deduplication_pipe: GraphDeduplicationPipe
			
 
				-    graph_deduplication_summary_pipe: GraphDeduplicationSummaryPipe
			
 
				     graph_community_summary_pipe: GraphCommunitySummaryPipe
			
 
				     rag_pipe: RAGPipe
			
 
				     streaming_rag_pipe: StreamingRAGPipe
			
--- a/core/main/api/v3/base_router.py
+++ b/core/main/api/v3/base_router.py
@@ -4,7 +4,7 @@ from abc import abstractmethod
 
				 from typing import Callable, Optional
			
 
				 
			
 
				 from fastapi import APIRouter, Depends, HTTPException, Request, WebSocket
			
 
				-from fastapi.responses import StreamingResponse
			
 
				+from fastapi.responses import FileResponse, StreamingResponse
			
 
				 
			
 
				 from core.base import R2RException, manage_run
			
 
				 
			
@@ -64,7 +64,7 @@ class BaseRouterV3:
 
				                     else:
			
 
				                         results, outer_kwargs = func_result, {}
			
 
				 
			
 
				-                    if isinstance(results, StreamingResponse):
			
 
				+                    if isinstance(results, (StreamingResponse, FileResponse)):
			
 
				                         return results
			
 
				                     return {"results": results, **outer_kwargs}
			
 
				 
			
--- a/core/main/api/v3/collections_router.py
+++ b/core/main/api/v3/collections_router.py
@@ -1,10 +1,12 @@
 
				 import logging
			
 
				 import textwrap
			
 
				-import time
			
 
				+from tempfile import NamedTemporaryFile
			
 
				 from typing import Optional
			
 
				 from uuid import UUID
			
 
				 
			
 
				 from fastapi import Body, Depends, Path, Query
			
 
				+from fastapi.background import BackgroundTasks
			
 
				+from fastapi.responses import FileResponse
			
 
				 
			
 
				 from core.base import KGCreationSettings, KGRunType, R2RException
			
 
				 from core.base.api.models import (
			
@@ -177,6 +179,112 @@ class CollectionsRouter(BaseRouterV3):
 
				             )
			
 
				             return collection
			
 
				 
			
 
				+        @self.router.post(
			
 
				+            "/collections/export",
			
 
				+            summary="Export collections to CSV",
			
 
				+            dependencies=[Depends(self.rate_limit_dependency)],
			
 
				+            openapi_extra={
			
 
				+                "x-codeSamples": [
			
 
				+                    {
			
 
				+                        "lang": "Python",
			
 
				+                        "source": textwrap.dedent(
			
 
				+                            """
			
 
				+                            from r2r import R2RClient
			
 
				+
			
 
				+                            client = R2RClient("http://localhost:7272")
			
 
				+                            # when using auth, do client.login(...)
			
 
				+
			
 
				+                            response = client.collections.export(
			
 
				+                                output_path="export.csv",
			
 
				+                                columns=["id", "name", "created_at"],
			
 
				+                                include_header=True,
			
 
				+                            )
			
 
				+                            """
			
 
				+                        ),
			
 
				+                    },
			
 
				+                    {
			
 
				+                        "lang": "JavaScript",
			
 
				+                        "source": textwrap.dedent(
			
 
				+                            """
			
 
				+                            const { r2rClient } = require("r2r-js");
			
 
				+
			
 
				+                            const client = new r2rClient("http://localhost:7272");
			
 
				+
			
 
				+                            function main() {
			
 
				+                                await client.collections.export({
			
 
				+                                    outputPath: "export.csv",
			
 
				+                                    columns: ["id", "name", "created_at"],
			
 
				+                                    includeHeader: true,
			
 
				+                                });
			
 
				+                            }
			
 
				+
			
 
				+                            main();
			
 
				+                            """
			
 
				+                        ),
			
 
				+                    },
			
 
				+                    {
			
 
				+                        "lang": "CLI",
			
 
				+                        "source": textwrap.dedent(
			
 
				+                            """
			
 
				+                            """
			
 
				+                        ),
			
 
				+                    },
			
 
				+                    {
			
 
				+                        "lang": "cURL",
			
 
				+                        "source": textwrap.dedent(
			
 
				+                            """
			
 
				+                            curl -X POST "http://127.0.0.1:7272/v3/collections/export" \
			
 
				+                            -H "Authorization: Bearer YOUR_API_KEY" \
			
 
				+                            -H "Content-Type: application/json" \
			
 
				+                            -H "Accept: text/csv" \
			
 
				+                            -d '{ "columns": ["id", "name", "created_at"], "include_header": true }' \
			
 
				+                            --output export.csv
			
 
				+                            """
			
 
				+                        ),
			
 
				+                    },
			
 
				+                ]
			
 
				+            },
			
 
				+        )
			
 
				+        @self.base_endpoint
			
 
				+        async def export_collections(
			
 
				+            background_tasks: BackgroundTasks,
			
 
				+            columns: Optional[list[str]] = Body(
			
 
				+                None, description="Specific columns to export"
			
 
				+            ),
			
 
				+            filters: Optional[dict] = Body(
			
 
				+                None, description="Filters to apply to the export"
			
 
				+            ),
			
 
				+            include_header: Optional[bool] = Body(
			
 
				+                True, description="Whether to include column headers"
			
 
				+            ),
			
 
				+            auth_user=Depends(self.providers.auth.auth_wrapper()),
			
 
				+        ) -> FileResponse:
			
 
				+            """
			
 
				+            Export collections as a CSV file.
			
 
				+            """
			
 
				+
			
 
				+            if not auth_user.is_superuser:
			
 
				+                raise R2RException(
			
 
				+                    "Only a superuser can export data.",
			
 
				+                    403,
			
 
				+                )
			
 
				+
			
 
				+            csv_file_path, temp_file = (
			
 
				+                await self.services.management.export_collections(
			
 
				+                    columns=columns,
			
 
				+                    filters=filters,
			
 
				+                    include_header=include_header,
			
 
				+                )
			
 
				+            )
			
 
				+
			
 
				+            background_tasks.add_task(temp_file.close)
			
 
				+
			
 
				+            return FileResponse(
			
 
				+                path=csv_file_path,
			
 
				+                media_type="text/csv",
			
 
				+                filename="collections_export.csv",
			
 
				+            )
			
 
				+
			
 
				         @self.router.get(
			
 
				             "/collections",
			
 
				             summary="List collections",
			
--- a/core/main/api/v3/conversations_router.py
+++ b/core/main/api/v3/conversations_router.py
@@ -4,6 +4,8 @@ from typing import Optional
 
				 from uuid import UUID
			
 
				 
			
 
				 from fastapi import Body, Depends, Path, Query
			
 
				+from fastapi.background import BackgroundTasks
			
 
				+from fastapi.responses import FileResponse
			
 
				 
			
 
				 from core.base import Message, R2RException
			
 
				 from core.base.api.models import (
			
@@ -206,6 +208,218 @@ class ConversationsRouter(BaseRouterV3):
 
				                 "total_entries": conversations_response["total_entries"]
			
 
				             }
			
 
				 
			
 
				+        @self.router.post(
			
 
				+            "/conversations/export",
			
 
				+            summary="Export conversations to CSV",
			
 
				+            dependencies=[Depends(self.rate_limit_dependency)],
			
 
				+            openapi_extra={
			
 
				+                "x-codeSamples": [
			
 
				+                    {
			
 
				+                        "lang": "Python",
			
 
				+                        "source": textwrap.dedent(
			
 
				+                            """
			
 
				+                            from r2r import R2RClient
			
 
				+
			
 
				+                            client = R2RClient("http://localhost:7272")
			
 
				+                            # when using auth, do client.login(...)
			
 
				+
			
 
				+                            response = client.conversations.export(
			
 
				+                                output_path="export.csv",
			
 
				+                                columns=["id", "created_at"],
			
 
				+                                include_header=True,
			
 
				+                            )
			
 
				+                            """
			
 
				+                        ),
			
 
				+                    },
			
 
				+                    {
			
 
				+                        "lang": "JavaScript",
			
 
				+                        "source": textwrap.dedent(
			
 
				+                            """
			
 
				+                            const { r2rClient } = require("r2r-js");
			
 
				+
			
 
				+                            const client = new r2rClient("http://localhost:7272");
			
 
				+
			
 
				+                            function main() {
			
 
				+                                await client.conversations.export({
			
 
				+                                    outputPath: "export.csv",
			
 
				+                                    columns: ["id", "created_at"],
			
 
				+                                    includeHeader: true,
			
 
				+                                });
			
 
				+                            }
			
 
				+
			
 
				+                            main();
			
 
				+                            """
			
 
				+                        ),
			
 
				+                    },
			
 
				+                    {
			
 
				+                        "lang": "CLI",
			
 
				+                        "source": textwrap.dedent(
			
 
				+                            """
			
 
				+                            """
			
 
				+                        ),
			
 
				+                    },
			
 
				+                    {
			
 
				+                        "lang": "cURL",
			
 
				+                        "source": textwrap.dedent(
			
 
				+                            """
			
 
				+                            curl -X POST "http://127.0.0.1:7272/v3/conversations/export" \
			
 
				+                            -H "Authorization: Bearer YOUR_API_KEY" \
			
 
				+                            -H "Content-Type: application/json" \
			
 
				+                            -H "Accept: text/csv" \
			
 
				+                            -d '{ "columns": ["id", "created_at"], "include_header": true }' \
			
 
				+                            --output export.csv
			
 
				+                            """
			
 
				+                        ),
			
 
				+                    },
			
 
				+                ]
			
 
				+            },
			
 
				+        )
			
 
				+        @self.base_endpoint
			
 
				+        async def export_conversations(
			
 
				+            background_tasks: BackgroundTasks,
			
 
				+            columns: Optional[list[str]] = Body(
			
 
				+                None, description="Specific columns to export"
			
 
				+            ),
			
 
				+            filters: Optional[dict] = Body(
			
 
				+                None, description="Filters to apply to the export"
			
 
				+            ),
			
 
				+            include_header: Optional[bool] = Body(
			
 
				+                True, description="Whether to include column headers"
			
 
				+            ),
			
 
				+            auth_user=Depends(self.providers.auth.auth_wrapper()),
			
 
				+        ) -> FileResponse:
			
 
				+            """
			
 
				+            Export conversations as a downloadable CSV file.
			
 
				+            """
			
 
				+
			
 
				+            if not auth_user.is_superuser:
			
 
				+                raise R2RException(
			
 
				+                    "Only a superuser can export data.",
			
 
				+                    403,
			
 
				+                )
			
 
				+
			
 
				+            csv_file_path, temp_file = (
			
 
				+                await self.services.management.export_conversations(
			
 
				+                    columns=columns,
			
 
				+                    filters=filters,
			
 
				+                    include_header=include_header,
			
 
				+                )
			
 
				+            )
			
 
				+
			
 
				+            background_tasks.add_task(temp_file.close)
			
 
				+
			
 
				+            return FileResponse(
			
 
				+                path=csv_file_path,
			
 
				+                media_type="text/csv",
			
 
				+                filename="documents_export.csv",
			
 
				+            )
			
 
				+
			
 
				+        @self.router.post(
			
 
				+            "/conversations/export_messages",
			
 
				+            summary="Export messages to CSV",
			
 
				+            dependencies=[Depends(self.rate_limit_dependency)],
			
 
				+            openapi_extra={
			
 
				+                "x-codeSamples": [
			
 
				+                    {
			
 
				+                        "lang": "Python",
			
 
				+                        "source": textwrap.dedent(
			
 
				+                            """
			
 
				+                            from r2r import R2RClient
			
 
				+
			
 
				+                            client = R2RClient("http://localhost:7272")
			
 
				+                            # when using auth, do client.login(...)
			
 
				+
			
 
				+                            response = client.conversations.export_messages(
			
 
				+                                output_path="export.csv",
			
 
				+                                columns=["id", "created_at"],
			
 
				+                                include_header=True,
			
 
				+                            )
			
 
				+                            """
			
 
				+                        ),
			
 
				+                    },
			
 
				+                    {
			
 
				+                        "lang": "JavaScript",
			
 
				+                        "source": textwrap.dedent(
			
 
				+                            """
			
 
				+                            const { r2rClient } = require("r2r-js");
			
 
				+
			
 
				+                            const client = new r2rClient("http://localhost:7272");
			
 
				+
			
 
				+                            function main() {
			
 
				+                                await client.conversations.exportMessages({
			
 
				+                                    outputPath: "export.csv",
			
 
				+                                    columns: ["id", "created_at"],
			
 
				+                                    includeHeader: true,
			
 
				+                                });
			
 
				+                            }
			
 
				+
			
 
				+                            main();
			
 
				+                            """
			
 
				+                        ),
			
 
				+                    },
			
 
				+                    {
			
 
				+                        "lang": "CLI",
			
 
				+                        "source": textwrap.dedent(
			
 
				+                            """
			
 
				+                            """
			
 
				+                        ),
			
 
				+                    },
			
 
				+                    {
			
 
				+                        "lang": "cURL",
			
 
				+                        "source": textwrap.dedent(
			
 
				+                            """
			
 
				+                            curl -X POST "http://127.0.0.1:7272/v3/conversations/export_messages" \
			
 
				+                            -H "Authorization: Bearer YOUR_API_KEY" \
			
 
				+                            -H "Content-Type: application/json" \
			
 
				+                            -H "Accept: text/csv" \
			
 
				+                            -d '{ "columns": ["id", "created_at"], "include_header": true }' \
			
 
				+                            --output export.csv
			
 
				+                            """
			
 
				+                        ),
			
 
				+                    },
			
 
				+                ]
			
 
				+            },
			
 
				+        )
			
 
				+        @self.base_endpoint
			
 
				+        async def export_messages(
			
 
				+            background_tasks: BackgroundTasks,
			
 
				+            columns: Optional[list[str]] = Body(
			
 
				+                None, description="Specific columns to export"
			
 
				+            ),
			
 
				+            filters: Optional[dict] = Body(
			
 
				+                None, description="Filters to apply to the export"
			
 
				+            ),
			
 
				+            include_header: Optional[bool] = Body(
			
 
				+                True, description="Whether to include column headers"
			
 
				+            ),
			
 
				+            auth_user=Depends(self.providers.auth.auth_wrapper()),
			
 
				+        ) -> FileResponse:
			
 
				+            """
			
 
				+            Export conversations as a downloadable CSV file.
			
 
				+            """
			
 
				+
			
 
				+            if not auth_user.is_superuser:
			
 
				+                raise R2RException(
			
 
				+                    "Only a superuser can export data.",
			
 
				+                    403,
			
 
				+                )
			
 
				+
			
 
				+            csv_file_path, temp_file = (
			
 
				+                await self.services.management.export_messages(
			
 
				+                    columns=columns,
			
 
				+                    filters=filters,
			
 
				+                    include_header=include_header,
			
 
				+                )
			
 
				+            )
			
 
				+
			
 
				+            background_tasks.add_task(temp_file.close)
			
 
				+
			
 
				+            return FileResponse(
			
 
				+                path=csv_file_path,
			
 
				+                media_type="text/csv",
			
 
				+                filename="documents_export.csv",
			
 
				+            )
			
 
				+
			
 
				         @self.router.get(
			
 
				             "/conversations/{id}",
			
 
				             summary="Get conversation details",
			
--- a/core/main/api/v3/documents_router.py
+++ b/core/main/api/v3/documents_router.py
@@ -3,12 +3,14 @@ import json
 
				 import logging
			
 
				 import mimetypes
			
 
				 import textwrap
			
 
				+from datetime import datetime
			
 
				 from io import BytesIO
			
 
				 from typing import Any, Optional
			
 
				 from uuid import UUID
			
 
				 
			
 
				 from fastapi import Body, Depends, File, Form, Path, Query, UploadFile
			
 
				-from fastapi.responses import StreamingResponse
			
 
				+from fastapi.background import BackgroundTasks
			
 
				+from fastapi.responses import FileResponse, StreamingResponse
			
 
				 from pydantic import Json
			
 
				 
			
 
				 from core.base import (
			
@@ -23,7 +25,7 @@ from core.base import (
 
				     generate_id,
			
 
				     select_search_filters,
			
 
				 )
			
 
				-from core.base.abstractions import KGCreationSettings, KGRunType
			
 
				+from core.base.abstractions import KGCreationSettings, KGRunType, StoreType
			
 
				 from core.base.api.models import (
			
 
				     GenericBooleanResponse,
			
 
				     WrappedBooleanResponse,
			
@@ -444,7 +446,7 @@ class DocumentsRouter(BaseRouterV3):
 
				                         )
			
 
				                     )
			
 
				                     raw_message["document_id"] = str(document_id)
			
 
				-                    return raw_message
			
 
				+                    return raw_message  # type: ignore
			
 
				 
			
 
				                 else:
			
 
				                     logger.info(
			
@@ -546,6 +548,211 @@ class DocumentsRouter(BaseRouterV3):
 
				                     "task_id": None,
			
 
				                 }
			
 
				 
			
 
				+        @self.router.post(
			
 
				+            "/documents/export",
			
 
				+            summary="Export documents to CSV",
			
 
				+            dependencies=[Depends(self.rate_limit_dependency)],
			
 
				+            openapi_extra={
			
 
				+                "x-codeSamples": [
			
 
				+                    {
			
 
				+                        "lang": "Python",
			
 
				+                        "source": textwrap.dedent(
			
 
				+                            """
			
 
				+                            from r2r import R2RClient
			
 
				+
			
 
				+                            client = R2RClient("http://localhost:7272")
			
 
				+                            # when using auth, do client.login(...)
			
 
				+
			
 
				+                            response = client.documents.export(
			
 
				+                                output_path="export.csv",
			
 
				+                                columns=["id", "title", "created_at"],
			
 
				+                                include_header=True,
			
 
				+                            )
			
 
				+                            """
			
 
				+                        ),
			
 
				+                    },
			
 
				+                    {
			
 
				+                        "lang": "JavaScript",
			
 
				+                        "source": textwrap.dedent(
			
 
				+                            """
			
 
				+                            const { r2rClient } = require("r2r-js");
			
 
				+
			
 
				+                            const client = new r2rClient("http://localhost:7272");
			
 
				+
			
 
				+                            function main() {
			
 
				+                                await client.documents.export({
			
 
				+                                    outputPath: "export.csv",
			
 
				+                                    columns: ["id", "title", "created_at"],
			
 
				+                                    includeHeader: true,
			
 
				+                                });
			
 
				+                            }
			
 
				+
			
 
				+                            main();
			
 
				+                            """
			
 
				+                        ),
			
 
				+                    },
			
 
				+                    {
			
 
				+                        "lang": "CLI",
			
 
				+                        "source": textwrap.dedent(
			
 
				+                            """
			
 
				+                            """
			
 
				+                        ),
			
 
				+                    },
			
 
				+                    {
			
 
				+                        "lang": "cURL",
			
 
				+                        "source": textwrap.dedent(
			
 
				+                            """
			
 
				+                            curl -X POST "http://127.0.0.1:7272/v3/documents/export" \
			
 
				+                            -H "Authorization: Bearer YOUR_API_KEY" \
			
 
				+                            -H "Content-Type: application/json" \
			
 
				+                            -H "Accept: text/csv" \
			
 
				+                            -d '{ "columns": ["id", "title", "created_at"], "include_header": true }' \
			
 
				+                            --output export.csv
			
 
				+                            """
			
 
				+                        ),
			
 
				+                    },
			
 
				+                ]
			
 
				+            },
			
 
				+        )
			
 
				+        @self.base_endpoint
			
 
				+        async def export_documents(
			
 
				+            background_tasks: BackgroundTasks,
			
 
				+            columns: Optional[list[str]] = Body(
			
 
				+                None, description="Specific columns to export"
			
 
				+            ),
			
 
				+            filters: Optional[dict] = Body(
			
 
				+                None, description="Filters to apply to the export"
			
 
				+            ),
			
 
				+            include_header: Optional[bool] = Body(
			
 
				+                True, description="Whether to include column headers"
			
 
				+            ),
			
 
				+            auth_user=Depends(self.providers.auth.auth_wrapper()),
			
 
				+        ) -> FileResponse:
			
 
				+            """
			
 
				+            Export documents as a downloadable CSV file.
			
 
				+            """
			
 
				+
			
 
				+            if not auth_user.is_superuser:
			
 
				+                raise R2RException(
			
 
				+                    "Only a superuser can export data.",
			
 
				+                    403,
			
 
				+                )
			
 
				+
			
 
				+            csv_file_path, temp_file = (
			
 
				+                await self.services.management.export_documents(
			
 
				+                    columns=columns,
			
 
				+                    filters=filters,
			
 
				+                    include_header=include_header,
			
 
				+                )
			
 
				+            )
			
 
				+
			
 
				+            background_tasks.add_task(temp_file.close)
			
 
				+
			
 
				+            return FileResponse(
			
 
				+                path=csv_file_path,
			
 
				+                media_type="text/csv",
			
 
				+                filename="documents_export.csv",
			
 
				+            )
			
 
				+
			
 
				+        @self.router.get(
			
 
				+            "/documents/download_zip",
			
 
				+            dependencies=[Depends(self.rate_limit_dependency)],
			
 
				+            response_class=StreamingResponse,
			
 
				+            summary="Export multiple documents as zip",
			
 
				+            openapi_extra={
			
 
				+                "x-codeSamples": [
			
 
				+                    {
			
 
				+                        "lang": "Python",
			
 
				+                        "source": textwrap.dedent(
			
 
				+                            """
			
 
				+                            client.documents.download_zip(
			
 
				+                                document_ids=["uuid1", "uuid2"],
			
 
				+                                start_date="2024-01-01",
			
 
				+                                end_date="2024-12-31"
			
 
				+                            )
			
 
				+                            """
			
 
				+                        ),
			
 
				+                    },
			
 
				+                    {
			
 
				+                        "lang": "cURL",
			
 
				+                        "source": textwrap.dedent(
			
 
				+                            """
			
 
				+                            curl -X GET "https://api.example.com/v3/documents/download_zip?document_ids=uuid1,uuid2&start_date=2024-01-01&end_date=2024-12-31" \\
			
 
				+                            -H "Authorization: Bearer YOUR_API_KEY"
			
 
				+                            """
			
 
				+                        ),
			
 
				+                    },
			
 
				+                ]
			
 
				+            },
			
 
				+        )
			
 
				+        @self.base_endpoint
			
 
				+        async def export_files(
			
 
				+            document_ids: Optional[list[UUID]] = Query(
			
 
				+                None,
			
 
				+                description="List of document IDs to include in the export. If not provided, all accessible documents will be included.",
			
 
				+            ),
			
 
				+            start_date: Optional[datetime] = Query(
			
 
				+                None,
			
 
				+                description="Filter documents created on or after this date.",
			
 
				+            ),
			
 
				+            end_date: Optional[datetime] = Query(
			
 
				+                None,
			
 
				+                description="Filter documents created before this date.",
			
 
				+            ),
			
 
				+            auth_user=Depends(self.providers.auth.auth_wrapper()),
			
 
				+        ) -> StreamingResponse:
			
 
				+            """
			
 
				+            Export multiple documents as a zip file. Documents can be filtered by IDs and/or date range.
			
 
				+
			
 
				+            The endpoint allows downloading:
			
 
				+            - Specific documents by providing their IDs
			
 
				+            - Documents within a date range
			
 
				+            - All accessible documents if no filters are provided
			
 
				+
			
 
				+            Files are streamed as a zip archive to handle potentially large downloads efficiently.
			
 
				+            """
			
 
				+            if not auth_user.is_superuser:
			
 
				+                # For non-superusers, verify access to requested documents
			
 
				+                if document_ids:
			
 
				+                    documents_overview = (
			
 
				+                        await self.services.management.documents_overview(
			
 
				+                            user_ids=[auth_user.id],
			
 
				+                            document_ids=document_ids,
			
 
				+                            offset=0,
			
 
				+                            limit=len(document_ids),
			
 
				+                        )
			
 
				+                    )
			
 
				+                    if len(documents_overview["results"]) != len(document_ids):
			
 
				+                        raise R2RException(
			
 
				+                            status_code=403,
			
 
				+                            message="You don't have access to one or more requested documents.",
			
 
				+                        )
			
 
				+                if not document_ids:
			
 
				+                    raise R2RException(
			
 
				+                        status_code=403,
			
 
				+                        message="Non-superusers must provide document IDs to export.",
			
 
				+                    )
			
 
				+
			
 
				+            zip_name, zip_content, zip_size = (
			
 
				+                await self.services.management.export_files(
			
 
				+                    document_ids=document_ids,
			
 
				+                    start_date=start_date,
			
 
				+                    end_date=end_date,
			
 
				+                )
			
 
				+            )
			
 
				+
			
 
				+            async def stream_file():
			
 
				+                yield zip_content.getvalue()
			
 
				+
			
 
				+            return StreamingResponse(
			
 
				+                stream_file(),
			
 
				+                media_type="application/zip",
			
 
				+                headers={
			
 
				+                    "Content-Disposition": f'attachment; filename="{zip_name}"',
			
 
				+                    "Content-Length": str(zip_size),
			
 
				+                },
			
 
				+            )
			
 
				+
			
 
				         @self.router.get(
			
 
				             "/documents",
			
 
				             dependencies=[Depends(self.rate_limit_dependency)],
			
@@ -1377,7 +1584,7 @@ class DocumentsRouter(BaseRouterV3):
 
				                     "user": auth_user.json(),
			
 
				                 }
			
 
				 
			
 
				-                return await self.providers.orchestration.run_workflow(
			
 
				+                return await self.providers.orchestration.run_workflow(  # type: ignore
			
 
				                     "extract-triples", {"request": workflow_input}, {}
			
 
				                 )
			
 
				             else:
			
@@ -1482,7 +1689,7 @@ class DocumentsRouter(BaseRouterV3):
 
				                 count,
			
 
				             ) = await self.providers.database.graphs_handler.entities.get(
			
 
				                 parent_id=id,
			
 
				-                store_type="documents",
			
 
				+                store_type=StoreType.DOCUMENTS,
			
 
				                 offset=offset,
			
 
				                 limit=limit,
			
 
				                 include_embeddings=include_embeddings,
			
@@ -1490,6 +1697,119 @@ class DocumentsRouter(BaseRouterV3):
 
				 
			
 
				             return entities, {"total_entries": count}  # type: ignore
			
 
				 
			
 
				+        @self.router.post(
			
 
				+            "/documents/{id}/entities/export",
			
 
				+            summary="Export document entities to CSV",
			
 
				+            dependencies=[Depends(self.rate_limit_dependency)],
			
 
				+            openapi_extra={
			
 
				+                "x-codeSamples": [
			
 
				+                    {
			
 
				+                        "lang": "Python",
			
 
				+                        "source": textwrap.dedent(
			
 
				+                            """
			
 
				+                            from r2r import R2RClient
			
 
				+
			
 
				+                            client = R2RClient("http://localhost:7272")
			
 
				+                            # when using auth, do client.login(...)
			
 
				+
			
 
				+                            response = client.documents.export_entities(
			
 
				+                                id="b4ac4dd6-5f27-596e-a55b-7cf242ca30aa",
			
 
				+                                output_path="export.csv",
			
 
				+                                columns=["id", "title", "created_at"],
			
 
				+                                include_header=True,
			
 
				+                            )
			
 
				+                            """
			
 
				+                        ),
			
 
				+                    },
			
 
				+                    {
			
 
				+                        "lang": "JavaScript",
			
 
				+                        "source": textwrap.dedent(
			
 
				+                            """
			
 
				+                            const { r2rClient } = require("r2r-js");
			
 
				+
			
 
				+                            const client = new r2rClient("http://localhost:7272");
			
 
				+
			
 
				+                            function main() {
			
 
				+                                await client.documents.exportEntities({
			
 
				+                                    id: "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa",
			
 
				+                                    outputPath: "export.csv",
			
 
				+                                    columns: ["id", "title", "created_at"],
			
 
				+                                    includeHeader: true,
			
 
				+                                });
			
 
				+                            }
			
 
				+
			
 
				+                            main();
			
 
				+                            """
			
 
				+                        ),
			
 
				+                    },
			
 
				+                    {
			
 
				+                        "lang": "CLI",
			
 
				+                        "source": textwrap.dedent(
			
 
				+                            """
			
 
				+                            """
			
 
				+                        ),
			
 
				+                    },
			
 
				+                    {
			
 
				+                        "lang": "cURL",
			
 
				+                        "source": textwrap.dedent(
			
 
				+                            """
			
 
				+                            curl -X POST "http://127.0.0.1:7272/v3/documents/export_entities" \
			
 
				+                            -H "Authorization: Bearer YOUR_API_KEY" \
			
 
				+                            -H "Content-Type: application/json" \
			
 
				+                            -H "Accept: text/csv" \
			
 
				+                            -d '{ "columns": ["id", "title", "created_at"], "include_header": true }' \
			
 
				+                            --output export.csv
			
 
				+                            """
			
 
				+                        ),
			
 
				+                    },
			
 
				+                ]
			
 
				+            },
			
 
				+        )
			
 
				+        @self.base_endpoint
			
 
				+        async def export_entities(
			
 
				+            background_tasks: BackgroundTasks,
			
 
				+            id: UUID = Path(
			
 
				+                ...,
			
 
				+                description="The ID of the document to export entities from.",
			
 
				+            ),
			
 
				+            columns: Optional[list[str]] = Body(
			
 
				+                None, description="Specific columns to export"
			
 
				+            ),
			
 
				+            filters: Optional[dict] = Body(
			
 
				+                None, description="Filters to apply to the export"
			
 
				+            ),
			
 
				+            include_header: Optional[bool] = Body(
			
 
				+                True, description="Whether to include column headers"
			
 
				+            ),
			
 
				+            auth_user=Depends(self.providers.auth.auth_wrapper()),
			
 
				+        ) -> FileResponse:
			
 
				+            """
			
 
				+            Export documents as a downloadable CSV file.
			
 
				+            """
			
 
				+
			
 
				+            if not auth_user.is_superuser:
			
 
				+                raise R2RException(
			
 
				+                    "Only a superuser can export data.",
			
 
				+                    403,
			
 
				+                )
			
 
				+
			
 
				+            csv_file_path, temp_file = (
			
 
				+                await self.services.management.export_document_entities(
			
 
				+                    id=id,
			
 
				+                    columns=columns,
			
 
				+                    filters=filters,
			
 
				+                    include_header=include_header,
			
 
				+                )
			
 
				+            )
			
 
				+
			
 
				+            background_tasks.add_task(temp_file.close)
			
 
				+
			
 
				+            return FileResponse(
			
 
				+                path=csv_file_path,
			
 
				+                media_type="text/csv",
			
 
				+                filename="documents_export.csv",
			
 
				+            )
			
 
				+
			
 
				         @self.router.get(
			
 
				             "/documents/{id}/relationships",
			
 
				             dependencies=[Depends(self.rate_limit_dependency)],
			
@@ -1624,7 +1944,7 @@ class DocumentsRouter(BaseRouterV3):
 
				                 count,
			
 
				             ) = await self.providers.database.graphs_handler.relationships.get(
			
 
				                 parent_id=id,
			
 
				-                store_type="documents",
			
 
				+                store_type=StoreType.DOCUMENTS,
			
 
				                 entity_names=entity_names,
			
 
				                 relationship_types=relationship_types,
			
 
				                 offset=offset,
			
@@ -1633,6 +1953,119 @@ class DocumentsRouter(BaseRouterV3):
 
				 
			
 
				             return relationships, {"total_entries": count}  # type: ignore
			
 
				 
			
 
				+        @self.router.post(
			
 
				+            "/documents/{id}/relationships/export",
			
 
				+            summary="Export document relationships to CSV",
			
 
				+            dependencies=[Depends(self.rate_limit_dependency)],
			
 
				+            openapi_extra={
			
 
				+                "x-codeSamples": [
			
 
				+                    {
			
 
				+                        "lang": "Python",
			
 
				+                        "source": textwrap.dedent(
			
 
				+                            """
			
 
				+                            from r2r import R2RClient
			
 
				+
			
 
				+                            client = R2RClient("http://localhost:7272")
			
 
				+                            # when using auth, do client.login(...)
			
 
				+
			
 
				+                            response = client.documents.export_entities(
			
 
				+                                id="b4ac4dd6-5f27-596e-a55b-7cf242ca30aa",
			
 
				+                                output_path="export.csv",
			
 
				+                                columns=["id", "title", "created_at"],
			
 
				+                                include_header=True,
			
 
				+                            )
			
 
				+                            """
			
 
				+                        ),
			
 
				+                    },
			
 
				+                    {
			
 
				+                        "lang": "JavaScript",
			
 
				+                        "source": textwrap.dedent(
			
 
				+                            """
			
 
				+                            const { r2rClient } = require("r2r-js");
			
 
				+
			
 
				+                            const client = new r2rClient("http://localhost:7272");
			
 
				+
			
 
				+                            function main() {
			
 
				+                                await client.documents.exportEntities({
			
 
				+                                    id: "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa",
			
 
				+                                    outputPath: "export.csv",
			
 
				+                                    columns: ["id", "title", "created_at"],
			
 
				+                                    includeHeader: true,
			
 
				+                                });
			
 
				+                            }
			
 
				+
			
 
				+                            main();
			
 
				+                            """
			
 
				+                        ),
			
 
				+                    },
			
 
				+                    {
			
 
				+                        "lang": "CLI",
			
 
				+                        "source": textwrap.dedent(
			
 
				+                            """
			
 
				+                            """
			
 
				+                        ),
			
 
				+                    },
			
 
				+                    {
			
 
				+                        "lang": "cURL",
			
 
				+                        "source": textwrap.dedent(
			
 
				+                            """
			
 
				+                            curl -X POST "http://127.0.0.1:7272/v3/documents/export_entities" \
			
 
				+                            -H "Authorization: Bearer YOUR_API_KEY" \
			
 
				+                            -H "Content-Type: application/json" \
			
 
				+                            -H "Accept: text/csv" \
			
 
				+                            -d '{ "columns": ["id", "title", "created_at"], "include_header": true }' \
			
 
				+                            --output export.csv
			
 
				+                            """
			
 
				+                        ),
			
 
				+                    },
			
 
				+                ]
			
 
				+            },
			
 
				+        )
			
 
				+        @self.base_endpoint
			
 
				+        async def export_relationships(
			
 
				+            background_tasks: BackgroundTasks,
			
 
				+            id: UUID = Path(
			
 
				+                ...,
			
 
				+                description="The ID of the document to export entities from.",
			
 
				+            ),
			
 
				+            columns: Optional[list[str]] = Body(
			
 
				+                None, description="Specific columns to export"
			
 
				+            ),
			
 
				+            filters: Optional[dict] = Body(
			
 
				+                None, description="Filters to apply to the export"
			
 
				+            ),
			
 
				+            include_header: Optional[bool] = Body(
			
 
				+                True, description="Whether to include column headers"
			
 
				+            ),
			
 
				+            auth_user=Depends(self.providers.auth.auth_wrapper()),
			
 
				+        ) -> FileResponse:
			
 
				+            """
			
 
				+            Export documents as a downloadable CSV file.
			
 
				+            """
			
 
				+
			
 
				+            if not auth_user.is_superuser:
			
 
				+                raise R2RException(
			
 
				+                    "Only a superuser can export data.",
			
 
				+                    403,
			
 
				+                )
			
 
				+
			
 
				+            csv_file_path, temp_file = (
			
 
				+                await self.services.management.export_document_relationships(
			
 
				+                    id=id,
			
 
				+                    columns=columns,
			
 
				+                    filters=filters,
			
 
				+                    include_header=include_header,
			
 
				+                )
			
 
				+            )
			
 
				+
			
 
				+            background_tasks.add_task(temp_file.close)
			
 
				+
			
 
				+            return FileResponse(
			
 
				+                path=csv_file_path,
			
 
				+                media_type="text/csv",
			
 
				+                filename="documents_export.csv",
			
 
				+            )
			
 
				+
			
 
				         @self.router.post(
			
 
				             "/documents/search",
			
 
				             dependencies=[Depends(self.rate_limit_dependency)],
			
--- a/core/main/api/v3/graph_router.py
+++ b/core/main/api/v3/graph_router.py
@@ -4,9 +4,11 @@ from typing import Optional
 
				 from uuid import UUID
			
 
				 
			
 
				 from fastapi import Body, Depends, Path, Query
			
 
				+from fastapi.background import BackgroundTasks
			
 
				+from fastapi.responses import FileResponse
			
 
				 
			
 
				 from core.base import KGEnrichmentStatus, R2RException, Workflow
			
 
				-from core.base.abstractions import KGRunType
			
 
				+from core.base.abstractions import KGRunType, StoreType
			
 
				 from core.base.api.models import (
			
 
				     GenericBooleanResponse,
			
 
				     WrappedBooleanResponse,
			
@@ -48,9 +50,6 @@ class GraphRouter(BaseRouterV3):
 
				             workflow_messages["build-communities"] = (
			
 
				                 "Graph enrichment task queued successfully."
			
 
				             )
			
 
				-            workflow_messages["entity-deduplication"] = (
			
 
				-                "KG Entity Deduplication task queued successfully."
			
 
				-            )
			
 
				         else:
			
 
				             workflow_messages["extract-triples"] = (
			
 
				                 "Document entities and relationships extracted successfully."
			
@@ -58,9 +57,6 @@ class GraphRouter(BaseRouterV3):
 
				             workflow_messages["build-communities"] = (
			
 
				                 "Graph communities created successfully."
			
 
				             )
			
 
				-            workflow_messages["entity-deduplication"] = (
			
 
				-                "KG Entity Deduplication completed successfully."
			
 
				-            )
			
 
				 
			
 
				         self.providers.orchestration.register_workflows(
			
 
				             Workflow.KG,
			
@@ -68,80 +64,6 @@ class GraphRouter(BaseRouterV3):
 
				             workflow_messages,
			
 
				         )
			
 
				 
			
 
				-    async def _deduplicate_entities(
			
 
				-        self,
			
 
				-        collection_id: UUID,
			
 
				-        settings,
			
 
				-        run_type: Optional[KGRunType] = KGRunType.ESTIMATE,
			
 
				-        run_with_orchestration: bool = True,
			
 
				-        auth_user=None,
			
 
				-    ):
			
 
				-        """Deduplicates entities in the knowledge graph using LLM-based analysis.
			
 
				-
			
 
				-        The deduplication process:
			
 
				-        1. Groups potentially duplicate entities by name/type
			
 
				-        2. Uses LLM analysis to determine if entities refer to same thing
			
 
				-        3. Merges duplicate entities while preserving relationships
			
 
				-        4. Updates all references to use canonical entity IDs
			
 
				-
			
 
				-        Args:
			
 
				-            id (UUID): Graph containing the entities
			
 
				-            settings (dict, optional): Deduplication settings including:
			
 
				-                - graph_entity_deduplication_type (str): Deduplication method (e.g. "by_name")
			
 
				-                - graph_entity_deduplication_prompt (str): Custom prompt for analysis
			
 
				-                - max_description_input_length (int): Max chars for entity descriptions
			
 
				-                - generation_config (dict): LLM generation parameters
			
 
				-            run_type (KGRunType): Whether to estimate cost or run deduplication
			
 
				-            run_with_orchestration (bool): Whether to run async with task queue
			
 
				-            auth_user: Authenticated user making request
			
 
				-
			
 
				-        Returns:
			
 
				-            Result containing:
			
 
				-                message (str): Status message
			
 
				-                task_id (UUID): Async task ID if run with orchestration
			
 
				-
			
 
				-        Raises:
			
 
				-            R2RException: If user unauthorized or deduplication fails
			
 
				-        """
			
 
				-        if not auth_user.is_superuser:
			
 
				-            raise R2RException(
			
 
				-                "Only superusers can deduplicate a graphs entities", 403
			
 
				-            )
			
 
				-
			
 
				-        server_settings = (
			
 
				-            self.providers.database.config.graph_entity_deduplication_settings
			
 
				-        )
			
 
				-        if settings:
			
 
				-            server_settings = update_settings_from_dict(
			
 
				-                server_settings, settings
			
 
				-            )
			
 
				-
			
 
				-        # Return cost estimate if requested
			
 
				-        if run_type == KGRunType.ESTIMATE:
			
 
				-            return await self.services.graph.get_deduplication_estimate(
			
 
				-                collection_id, server_settings
			
 
				-            )
			
 
				-
			
 
				-        workflow_input = {
			
 
				-            "graph_id": str(collection_id),
			
 
				-            "graph_entity_deduplication_settings": server_settings.model_dump_json(),
			
 
				-            "user": auth_user.model_dump_json(),
			
 
				-        }
			
 
				-
			
 
				-        if run_with_orchestration:
			
 
				-            return await self.providers.orchestration.run_workflow(  # type: ignore
			
 
				-                "entity-deduplication", {"request": workflow_input}, {}
			
 
				-            )
			
 
				-        else:
			
 
				-            from core.main.orchestration import simple_kg_factory
			
 
				-
			
 
				-            simple_kg = simple_kg_factory(self.services.graph)
			
 
				-            await simple_kg["entity-deduplication"](workflow_input)
			
 
				-            return {  # type: ignore
			
 
				-                "message": "Entity deduplication completed successfully.",
			
 
				-                "task_id": None,
			
 
				-            }
			
 
				-
			
 
				     async def _get_collection_id(
			
 
				         self, collection_id: Optional[UUID], auth_user
			
 
				     ) -> UUID:
			
@@ -307,7 +229,7 @@ class GraphRouter(BaseRouterV3):
 
				                 offset=0,
			
 
				                 limit=1,
			
 
				             )
			
 
				-            return list_graphs_response["results"][0]
			
 
				+            return list_graphs_response["results"][0]  # type: ignore
			
 
				 
			
 
				         @self.router.post(
			
 
				             "/graphs/{collection_id}/communities/build",
			
@@ -645,6 +567,119 @@ class GraphRouter(BaseRouterV3):
 
				                 "total_entries": count,
			
 
				             }
			
 
				 
			
 
				+        @self.router.post(
			
 
				+            "/graphs/{collection_id}/entities/export",
			
 
				+            summary="Export graph entities to CSV",
			
 
				+            dependencies=[Depends(self.rate_limit_dependency)],
			
 
				+            openapi_extra={
			
 
				+                "x-codeSamples": [
			
 
				+                    {
			
 
				+                        "lang": "Python",
			
 
				+                        "source": textwrap.dedent(
			
 
				+                            """
			
 
				+                            from r2r import R2RClient
			
 
				+
			
 
				+                            client = R2RClient("http://localhost:7272")
			
 
				+                            # when using auth, do client.login(...)
			
 
				+
			
 
				+                            response = client.graphs.export_entities(
			
 
				+                                collection_id="b4ac4dd6-5f27-596e-a55b-7cf242ca30aa",
			
 
				+                                output_path="export.csv",
			
 
				+                                columns=["id", "title", "created_at"],
			
 
				+                                include_header=True,
			
 
				+                            )
			
 
				+                            """
			
 
				+                        ),
			
 
				+                    },
			
 
				+                    {
			
 
				+                        "lang": "JavaScript",
			
 
				+                        "source": textwrap.dedent(
			
 
				+                            """
			
 
				+                            const { r2rClient } = require("r2r-js");
			
 
				+
			
 
				+                            const client = new r2rClient("http://localhost:7272");
			
 
				+
			
 
				+                            function main() {
			
 
				+                                await client.graphs.exportEntities({
			
 
				+                                    collectionId: "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa",
			
 
				+                                    outputPath: "export.csv",
			
 
				+                                    columns: ["id", "title", "created_at"],
			
 
				+                                    includeHeader: true,
			
 
				+                                });
			
 
				+                            }
			
 
				+
			
 
				+                            main();
			
 
				+                            """
			
 
				+                        ),
			
 
				+                    },
			
 
				+                    {
			
 
				+                        "lang": "CLI",
			
 
				+                        "source": textwrap.dedent(
			
 
				+                            """
			
 
				+                            """
			
 
				+                        ),
			
 
				+                    },
			
 
				+                    {
			
 
				+                        "lang": "cURL",
			
 
				+                        "source": textwrap.dedent(
			
 
				+                            """
			
 
				+                            curl -X POST "http://127.0.0.1:7272/v3/graphs/export_entities" \
			
 
				+                            -H "Authorization: Bearer YOUR_API_KEY" \
			
 
				+                            -H "Content-Type: application/json" \
			
 
				+                            -H "Accept: text/csv" \
			
 
				+                            -d '{ "columns": ["id", "title", "created_at"], "include_header": true }' \
			
 
				+                            --output export.csv
			
 
				+                            """
			
 
				+                        ),
			
 
				+                    },
			
 
				+                ]
			
 
				+            },
			
 
				+        )
			
 
				+        @self.base_endpoint
			
 
				+        async def export_entities(
			
 
				+            background_tasks: BackgroundTasks,
			
 
				+            collection_id: UUID = Path(
			
 
				+                ...,
			
 
				+                description="The ID of the collection to export entities from.",
			
 
				+            ),
			
 
				+            columns: Optional[list[str]] = Body(
			
 
				+                None, description="Specific columns to export"
			
 
				+            ),
			
 
				+            filters: Optional[dict] = Body(
			
 
				+                None, description="Filters to apply to the export"
			
 
				+            ),
			
 
				+            include_header: Optional[bool] = Body(
			
 
				+                True, description="Whether to include column headers"
			
 
				+            ),
			
 
				+            auth_user=Depends(self.providers.auth.auth_wrapper()),
			
 
				+        ) -> FileResponse:
			
 
				+            """
			
 
				+            Export documents as a downloadable CSV file.
			
 
				+            """
			
 
				+
			
 
				+            if not auth_user.is_superuser:
			
 
				+                raise R2RException(
			
 
				+                    "Only a superuser can export data.",
			
 
				+                    403,
			
 
				+                )
			
 
				+
			
 
				+            csv_file_path, temp_file = (
			
 
				+                await self.services.management.export_graph_entities(
			
 
				+                    id=collection_id,
			
 
				+                    columns=columns,
			
 
				+                    filters=filters,
			
 
				+                    include_header=include_header,
			
 
				+                )
			
 
				+            )
			
 
				+
			
 
				+            background_tasks.add_task(temp_file.close)
			
 
				+
			
 
				+            return FileResponse(
			
 
				+                path=csv_file_path,
			
 
				+                media_type="text/csv",
			
 
				+                filename="documents_export.csv",
			
 
				+            )
			
 
				+
			
 
				         @self.router.post(
			
 
				             "/graphs/{collection_id}/entities",
			
 
				             dependencies=[Depends(self.rate_limit_dependency)],
			
@@ -754,6 +789,119 @@ class GraphRouter(BaseRouterV3):
 
				                 parent_id=collection_id,
			
 
				             )
			
 
				 
			
 
				+        @self.router.post(
			
 
				+            "/graphs/{collection_id}/relationships/export",
			
 
				+            summary="Export graph relationships to CSV",
			
 
				+            dependencies=[Depends(self.rate_limit_dependency)],
			
 
				+            openapi_extra={
			
 
				+                "x-codeSamples": [
			
 
				+                    {
			
 
				+                        "lang": "Python",
			
 
				+                        "source": textwrap.dedent(
			
 
				+                            """
			
 
				+                            from r2r import R2RClient
			
 
				+
			
 
				+                            client = R2RClient("http://localhost:7272")
			
 
				+                            # when using auth, do client.login(...)
			
 
				+
			
 
				+                            response = client.graphs.export_entities(
			
 
				+                                collection_id="b4ac4dd6-5f27-596e-a55b-7cf242ca30aa",
			
 
				+                                output_path="export.csv",
			
 
				+                                columns=["id", "title", "created_at"],
			
 
				+                                include_header=True,
			
 
				+                            )
			
 
				+                            """
			
 
				+                        ),
			
 
				+                    },
			
 
				+                    {
			
 
				+                        "lang": "JavaScript",
			
 
				+                        "source": textwrap.dedent(
			
 
				+                            """
			
 
				+                            const { r2rClient } = require("r2r-js");
			
 
				+
			
 
				+                            const client = new r2rClient("http://localhost:7272");
			
 
				+
			
 
				+                            function main() {
			
 
				+                                await client.graphs.exportEntities({
			
 
				+                                    collectionId: "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa",
			
 
				+                                    outputPath: "export.csv",
			
 
				+                                    columns: ["id", "title", "created_at"],
			
 
				+                                    includeHeader: true,
			
 
				+                                });
			
 
				+                            }
			
 
				+
			
 
				+                            main();
			
 
				+                            """
			
 
				+                        ),
			
 
				+                    },
			
 
				+                    {
			
 
				+                        "lang": "CLI",
			
 
				+                        "source": textwrap.dedent(
			
 
				+                            """
			
 
				+                            """
			
 
				+                        ),
			
 
				+                    },
			
 
				+                    {
			
 
				+                        "lang": "cURL",
			
 
				+                        "source": textwrap.dedent(
			
 
				+                            """
			
 
				+                            curl -X POST "http://127.0.0.1:7272/v3/graphs/export_relationships" \
			
 
				+                            -H "Authorization: Bearer YOUR_API_KEY" \
			
 
				+                            -H "Content-Type: application/json" \
			
 
				+                            -H "Accept: text/csv" \
			
 
				+                            -d '{ "columns": ["id", "title", "created_at"], "include_header": true }' \
			
 
				+                            --output export.csv
			
 
				+                            """
			
 
				+                        ),
			
 
				+                    },
			
 
				+                ]
			
 
				+            },
			
 
				+        )
			
 
				+        @self.base_endpoint
			
 
				+        async def export_relationships(
			
 
				+            background_tasks: BackgroundTasks,
			
 
				+            collection_id: UUID = Path(
			
 
				+                ...,
			
 
				+                description="The ID of the document to export entities from.",
			
 
				+            ),
			
 
				+            columns: Optional[list[str]] = Body(
			
 
				+                None, description="Specific columns to export"
			
 
				+            ),
			
 
				+            filters: Optional[dict] = Body(
			
 
				+                None, description="Filters to apply to the export"
			
 
				+            ),
			
 
				+            include_header: Optional[bool] = Body(
			
 
				+                True, description="Whether to include column headers"
			
 
				+            ),
			
 
				+            auth_user=Depends(self.providers.auth.auth_wrapper()),
			
 
				+        ) -> FileResponse:
			
 
				+            """
			
 
				+            Export documents as a downloadable CSV file.
			
 
				+            """
			
 
				+
			
 
				+            if not auth_user.is_superuser:
			
 
				+                raise R2RException(
			
 
				+                    "Only a superuser can export data.",
			
 
				+                    403,
			
 
				+                )
			
 
				+
			
 
				+            csv_file_path, temp_file = (
			
 
				+                await self.services.management.export_graph_relationships(
			
 
				+                    id=collection_id,
			
 
				+                    columns=columns,
			
 
				+                    filters=filters,
			
 
				+                    include_header=include_header,
			
 
				+                )
			
 
				+            )
			
 
				+
			
 
				+            background_tasks.add_task(temp_file.close)
			
 
				+
			
 
				+            return FileResponse(
			
 
				+                path=csv_file_path,
			
 
				+                media_type="text/csv",
			
 
				+                filename="documents_export.csv",
			
 
				+            )
			
 
				+
			
 
				         @self.router.get(
			
 
				             "/graphs/{collection_id}/entities/{entity_id}",
			
 
				             dependencies=[Depends(self.rate_limit_dependency)],
			
@@ -821,7 +969,7 @@ class GraphRouter(BaseRouterV3):
 
				 
			
 
				             result = await self.providers.database.graphs_handler.entities.get(
			
 
				                 parent_id=collection_id,
			
 
				-                store_type="graphs",
			
 
				+                store_type=StoreType.GRAPHS,
			
 
				                 offset=0,
			
 
				                 limit=1,
			
 
				                 entity_ids=[entity_id],
			
@@ -1110,7 +1258,7 @@ class GraphRouter(BaseRouterV3):
 
				             results = (
			
 
				                 await self.providers.database.graphs_handler.relationships.get(
			
 
				                     parent_id=collection_id,
			
 
				-                    store_type="graphs",
			
 
				+                    store_type=StoreType.GRAPHS,
			
 
				                     offset=0,
			
 
				                     limit=1,
			
 
				                     relationship_ids=[relationship_id],
			
@@ -1526,7 +1674,7 @@ class GraphRouter(BaseRouterV3):
 
				                 await self.providers.database.graphs_handler.communities.get(
			
 
				                     parent_id=collection_id,
			
 
				                     community_ids=[community_id],
			
 
				-                    store_type="graphs",
			
 
				+                    store_type=StoreType.GRAPHS,
			
 
				                     offset=0,
			
 
				                     limit=1,
			
 
				                 )
			
@@ -1615,6 +1763,119 @@ class GraphRouter(BaseRouterV3):
 
				             )
			
 
				             return GenericBooleanResponse(success=True)  # type: ignore
			
 
				 
			
 
				+        @self.router.post(
			
 
				+            "/graphs/{collection_id}/communities/export",
			
 
				+            summary="Export document communities to CSV",
			
 
				+            dependencies=[Depends(self.rate_limit_dependency)],
			
 
				+            openapi_extra={
			
 
				+                "x-codeSamples": [
			
 
				+                    {
			
 
				+                        "lang": "Python",
			
 
				+                        "source": textwrap.dedent(
			
 
				+                            """
			
 
				+                            from r2r import R2RClient
			
 
				+
			
 
				+                            client = R2RClient("http://localhost:7272")
			
 
				+                            # when using auth, do client.login(...)
			
 
				+
			
 
				+                            response = client.graphs.export_communities(
			
 
				+                                collection_id="b4ac4dd6-5f27-596e-a55b-7cf242ca30aa",
			
 
				+                                output_path="export.csv",
			
 
				+                                columns=["id", "title", "created_at"],
			
 
				+                                include_header=True,
			
 
				+                            )
			
 
				+                            """
			
 
				+                        ),
			
 
				+                    },
			
 
				+                    {
			
 
				+                        "lang": "JavaScript",
			
 
				+                        "source": textwrap.dedent(
			
 
				+                            """
			
 
				+                            const { r2rClient } = require("r2r-js");
			
 
				+
			
 
				+                            const client = new r2rClient("http://localhost:7272");
			
 
				+
			
 
				+                            function main() {
			
 
				+                                await client.graphs.exportCommunities({
			
 
				+                                    collectionId: "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa",
			
 
				+                                    outputPath: "export.csv",
			
 
				+                                    columns: ["id", "title", "created_at"],
			
 
				+                                    includeHeader: true,
			
 
				+                                });
			
 
				+                            }
			
 
				+
			
 
				+                            main();
			
 
				+                            """
			
 
				+                        ),
			
 
				+                    },
			
 
				+                    {
			
 
				+                        "lang": "CLI",
			
 
				+                        "source": textwrap.dedent(
			
 
				+                            """
			
 
				+                            """
			
 
				+                        ),
			
 
				+                    },
			
 
				+                    {
			
 
				+                        "lang": "cURL",
			
 
				+                        "source": textwrap.dedent(
			
 
				+                            """
			
 
				+                            curl -X POST "http://127.0.0.1:7272/v3/graphs/export_communities" \
			
 
				+                            -H "Authorization: Bearer YOUR_API_KEY" \
			
 
				+                            -H "Content-Type: application/json" \
			
 
				+                            -H "Accept: text/csv" \
			
 
				+                            -d '{ "columns": ["id", "title", "created_at"], "include_header": true }' \
			
 
				+                            --output export.csv
			
 
				+                            """
			
 
				+                        ),
			
 
				+                    },
			
 
				+                ]
			
 
				+            },
			
 
				+        )
			
 
				+        @self.base_endpoint
			
 
				+        async def export_relationships(
			
 
				+            background_tasks: BackgroundTasks,
			
 
				+            collection_id: UUID = Path(
			
 
				+                ...,
			
 
				+                description="The ID of the document to export entities from.",
			
 
				+            ),
			
 
				+            columns: Optional[list[str]] = Body(
			
 
				+                None, description="Specific columns to export"
			
 
				+            ),
			
 
				+            filters: Optional[dict] = Body(
			
 
				+                None, description="Filters to apply to the export"
			
 
				+            ),
			
 
				+            include_header: Optional[bool] = Body(
			
 
				+                True, description="Whether to include column headers"
			
 
				+            ),
			
 
				+            auth_user=Depends(self.providers.auth.auth_wrapper()),
			
 
				+        ) -> FileResponse:
			
 
				+            """
			
 
				+            Export documents as a downloadable CSV file.
			
 
				+            """
			
 
				+
			
 
				+            if not auth_user.is_superuser:
			
 
				+                raise R2RException(
			
 
				+                    "Only a superuser can export data.",
			
 
				+                    403,
			
 
				+                )
			
 
				+
			
 
				+            csv_file_path, temp_file = (
			
 
				+                await self.services.management.export_graph_communities(
			
 
				+                    id=collection_id,
			
 
				+                    columns=columns,
			
 
				+                    filters=filters,
			
 
				+                    include_header=include_header,
			
 
				+                )
			
 
				+            )
			
 
				+
			
 
				+            background_tasks.add_task(temp_file.close)
			
 
				+
			
 
				+            return FileResponse(
			
 
				+                path=csv_file_path,
			
 
				+                media_type="text/csv",
			
 
				+                filename="documents_export.csv",
			
 
				+            )
			
 
				+
			
 
				         @self.router.post(
			
 
				             "/graphs/{collection_id}/communities/{community_id}",
			
 
				             dependencies=[Depends(self.rate_limit_dependency)],
			
@@ -1839,7 +2100,7 @@ class GraphRouter(BaseRouterV3):
 
				                 entities = (
			
 
				                     await self.providers.database.graphs_handler.entities.get(
			
 
				                         parent_id=document.id,
			
 
				-                        store_type="documents",
			
 
				+                        store_type=StoreType.DOCUMENTS,
			
 
				                         offset=0,
			
 
				                         limit=100,
			
 
				                     )
			
--- a/core/main/api/v3/indices_router.py
+++ b/core/main/api/v3/indices_router.py
@@ -230,7 +230,7 @@ class IndicesRouter(BaseRouterV3):
 
				                 },
			
 
				             )
			
 
				 
			
 
				-            return result
			
 
				+            return result  # type: ignore
			
 
				 
			
 
				         @self.router.get(
			
 
				             "/indices",
			
@@ -599,7 +599,7 @@ class IndicesRouter(BaseRouterV3):
 
				                 f"Deleting vector index {index_name} from table {table_name}"
			
 
				             )
			
 
				 
			
 
				-            return await self.providers.orchestration.run_workflow(
			
 
				+            return await self.providers.orchestration.run_workflow(  # type: ignore
			
 
				                 "delete-vector-index",
			
 
				                 {
			
 
				                     "request": {
			
--- a/core/main/api/v3/users_router.py
+++ b/core/main/api/v3/users_router.py
@@ -1,8 +1,10 @@
 
				 import textwrap
			
 
				-from typing import Optional, Union
			
 
				+from typing import Optional
			
 
				 from uuid import UUID
			
 
				 
			
 
				 from fastapi import Body, Depends, Path, Query
			
 
				+from fastapi.background import BackgroundTasks
			
 
				+from fastapi.responses import FileResponse
			
 
				 from fastapi.security import OAuth2PasswordBearer, OAuth2PasswordRequestForm
			
 
				 from pydantic import EmailStr
			
 
				 
			
@@ -139,6 +141,112 @@ class UsersRouter(BaseRouterV3):
 
				 
			
 
				             return registration_response
			
 
				 
			
 
				+        @self.router.post(
			
 
				+            "/users/export",
			
 
				+            summary="Export users to CSV",
			
 
				+            dependencies=[Depends(self.rate_limit_dependency)],
			
 
				+            openapi_extra={
			
 
				+                "x-codeSamples": [
			
 
				+                    {
			
 
				+                        "lang": "Python",
			
 
				+                        "source": textwrap.dedent(
			
 
				+                            """
			
 
				+                            from r2r import R2RClient
			
 
				+
			
 
				+                            client = R2RClient("http://localhost:7272")
			
 
				+                            # when using auth, do client.login(...)
			
 
				+
			
 
				+                            response = client.users.export(
			
 
				+                                output_path="export.csv",
			
 
				+                                columns=["id", "name", "created_at"],
			
 
				+                                include_header=True,
			
 
				+                            )
			
 
				+                            """
			
 
				+                        ),
			
 
				+                    },
			
 
				+                    {
			
 
				+                        "lang": "JavaScript",
			
 
				+                        "source": textwrap.dedent(
			
 
				+                            """
			
 
				+                            const { r2rClient } = require("r2r-js");
			
 
				+
			
 
				+                            const client = new r2rClient("http://localhost:7272");
			
 
				+
			
 
				+                            function main() {
			
 
				+                                await client.users.export({
			
 
				+                                    outputPath: "export.csv",
			
 
				+                                    columns: ["id", "name", "created_at"],
			
 
				+                                    includeHeader: true,
			
 
				+                                });
			
 
				+                            }
			
 
				+
			
 
				+                            main();
			
 
				+                            """
			
 
				+                        ),
			
 
				+                    },
			
 
				+                    {
			
 
				+                        "lang": "CLI",
			
 
				+                        "source": textwrap.dedent(
			
 
				+                            """
			
 
				+                            """
			
 
				+                        ),
			
 
				+                    },
			
 
				+                    {
			
 
				+                        "lang": "cURL",
			
 
				+                        "source": textwrap.dedent(
			
 
				+                            """
			
 
				+                            curl -X POST "http://127.0.0.1:7272/v3/users/export" \
			
 
				+                            -H "Authorization: Bearer YOUR_API_KEY" \
			
 
				+                            -H "Content-Type: application/json" \
			
 
				+                            -H "Accept: text/csv" \
			
 
				+                            -d '{ "columns": ["id", "name", "created_at"], "include_header": true }' \
			
 
				+                            --output export.csv
			
 
				+                            """
			
 
				+                        ),
			
 
				+                    },
			
 
				+                ]
			
 
				+            },
			
 
				+        )
			
 
				+        @self.base_endpoint
			
 
				+        async def export_users(
			
 
				+            background_tasks: BackgroundTasks,
			
 
				+            columns: Optional[list[str]] = Body(
			
 
				+                None, description="Specific columns to export"
			
 
				+            ),
			
 
				+            filters: Optional[dict] = Body(
			
 
				+                None, description="Filters to apply to the export"
			
 
				+            ),
			
 
				+            include_header: Optional[bool] = Body(
			
 
				+                True, description="Whether to include column headers"
			
 
				+            ),
			
 
				+            auth_user=Depends(self.providers.auth.auth_wrapper()),
			
 
				+        ) -> FileResponse:
			
 
				+            """
			
 
				+            Export users as a CSV file.
			
 
				+            """
			
 
				+
			
 
				+            if not auth_user.is_superuser:
			
 
				+                raise R2RException(
			
 
				+                    "Only a superuser can export data.",
			
 
				+                    403,
			
 
				+                )
			
 
				+
			
 
				+            csv_file_path, temp_file = (
			
 
				+                await self.services.management.export_users(
			
 
				+                    columns=columns,
			
 
				+                    filters=filters,
			
 
				+                    include_header=include_header,
			
 
				+                )
			
 
				+            )
			
 
				+
			
 
				+            background_tasks.add_task(temp_file.close)
			
 
				+
			
 
				+            return FileResponse(
			
 
				+                path=csv_file_path,
			
 
				+                media_type="text/csv",
			
 
				+                filename="users_export.csv",
			
 
				+            )
			
 
				+
			
 
				         # TODO: deprecated, remove in next release
			
 
				         @self.router.post(
			
 
				             "/users/register",
			
--- a/core/main/assembly/factory.py
+++ b/core/main/assembly/factory.py
@@ -17,7 +17,23 @@ from core.base import (
 
				     OrchestrationConfig,
			
 
				 )
			
 
				 from core.pipelines import RAGPipeline, SearchPipeline
			
 
				-from core.pipes import GeneratorPipe, MultiSearchPipe, SearchPipe
			
 
				+from core.pipes import (
			
 
				+    EmbeddingPipe,
			
 
				+    GeneratorPipe,
			
 
				+    GraphClusteringPipe,
			
 
				+    GraphCommunitySummaryPipe,
			
 
				+    GraphDescriptionPipe,
			
 
				+    GraphExtractionPipe,
			
 
				+    GraphSearchSearchPipe,
			
 
				+    GraphStoragePipe,
			
 
				+    MultiSearchPipe,
			
 
				+    ParsingPipe,
			
 
				+    RAGPipe,
			
 
				+    SearchPipe,
			
 
				+    StreamingRAGPipe,
			
 
				+    VectorSearchPipe,
			
 
				+    VectorStoragePipe,
			
 
				+)
			
 
				 from core.providers.email.sendgrid import SendGridEmailProvider
			
 
				 
			
 
				 from ..abstractions import R2RAgents, R2RPipelines, R2RPipes, R2RProviders
			
@@ -366,20 +382,20 @@ class R2RPipeFactory:
 
				 
			
 
				     def create_pipes(
			
 
				         self,
			
 
				-        parsing_pipe_override: Optional[AsyncPipe] = None,
			
 
				-        embedding_pipe_override: Optional[AsyncPipe] = None,
			
 
				-        graph_extraction_pipe_override: Optional[AsyncPipe] = None,
			
 
				-        graph_storage_pipe_override: Optional[AsyncPipe] = None,
			
 
				-        graph_search_pipe_override: Optional[AsyncPipe] = None,
			
 
				-        vector_storage_pipe_override: Optional[AsyncPipe] = None,
			
 
				-        vector_search_pipe_override: Optional[AsyncPipe] = None,
			
 
				-        rag_pipe_override: Optional[AsyncPipe] = None,
			
 
				-        streaming_rag_pipe_override: Optional[AsyncPipe] = None,
			
 
				-        graph_description_pipe: Optional[AsyncPipe] = None,
			
 
				-        graph_clustering_pipe: Optional[AsyncPipe] = None,
			
 
				-        graph_deduplication_pipe: Optional[AsyncPipe] = None,
			
 
				-        graph_deduplication_summary_pipe: Optional[AsyncPipe] = None,
			
 
				-        graph_community_summary_pipe: Optional[AsyncPipe] = None,
			
 
				+        parsing_pipe_override: Optional[ParsingPipe] = None,
			
 
				+        embedding_pipe_override: Optional[EmbeddingPipe] = None,
			
 
				+        graph_extraction_pipe_override: Optional[GraphExtractionPipe] = None,
			
 
				+        graph_storage_pipe_override: Optional[GraphStoragePipe] = None,
			
 
				+        graph_search_pipe_override: Optional[GraphSearchSearchPipe] = None,
			
 
				+        vector_storage_pipe_override: Optional[VectorStoragePipe] = None,
			
 
				+        vector_search_pipe_override: Optional[VectorSearchPipe] = None,
			
 
				+        rag_pipe_override: Optional[RAGPipe] = None,
			
 
				+        streaming_rag_pipe_override: Optional[StreamingRAGPipe] = None,
			
 
				+        graph_description_pipe: Optional[GraphDescriptionPipe] = None,
			
 
				+        graph_clustering_pipe: Optional[GraphClusteringPipe] = None,
			
 
				+        graph_community_summary_pipe: Optional[
			
 
				+            GraphCommunitySummaryPipe
			
 
				+        ] = None,
			
 
				         *args,
			
 
				         **kwargs,
			
 
				     ) -> R2RPipes:
			
@@ -410,10 +426,6 @@ class R2RPipeFactory:
 
				             or self.create_graph_description_pipe(*args, **kwargs),
			
 
				             graph_clustering_pipe=graph_clustering_pipe
			
 
				             or self.create_graph_clustering_pipe(*args, **kwargs),
			
 
				-            graph_deduplication_pipe=graph_deduplication_pipe
			
 
				-            or self.create_graph_deduplication_pipe(*args, **kwargs),
			
 
				-            graph_deduplication_summary_pipe=graph_deduplication_summary_pipe
			
 
				-            or self.create_graph_deduplication_summary_pipe(*args, **kwargs),
			
 
				             graph_community_summary_pipe=graph_community_summary_pipe
			
 
				             or self.create_graph_community_summary_pipe(*args, **kwargs),
			
 
				         )
			
@@ -598,16 +610,6 @@ class R2RPipeFactory:
 
				             config=AsyncPipe.PipeConfig(name="graph_clustering_pipe"),
			
 
				         )
			
 
				 
			
 
				-    def create_kg_deduplication_summary_pipe(self, *args, **kwargs) -> Any:
			
 
				-        from core.pipes import GraphDeduplicationSummaryPipe
			
 
				-
			
 
				-        return GraphDeduplicationSummaryPipe(
			
 
				-            database_provider=self.providers.database,
			
 
				-            llm_provider=self.providers.llm,
			
 
				-            embedding_provider=self.providers.embedding,
			
 
				-            config=AsyncPipe.PipeConfig(name="kg_deduplication_summary_pipe"),
			
 
				-        )
			
 
				-
			
 
				     def create_graph_community_summary_pipe(self, *args, **kwargs) -> Any:
			
 
				         from core.pipes import GraphCommunitySummaryPipe
			
 
				 
			
@@ -618,28 +620,6 @@ class R2RPipeFactory:
 
				             config=AsyncPipe.PipeConfig(name="graph_community_summary_pipe"),
			
 
				         )
			
 
				 
			
 
				-    def create_graph_deduplication_pipe(self, *args, **kwargs) -> Any:
			
 
				-        from core.pipes import GraphDeduplicationPipe
			
 
				-
			
 
				-        return GraphDeduplicationPipe(
			
 
				-            database_provider=self.providers.database,
			
 
				-            llm_provider=self.providers.llm,
			
 
				-            embedding_provider=self.providers.embedding,
			
 
				-            config=AsyncPipe.PipeConfig(name="graph_deduplication_pipe"),
			
 
				-        )
			
 
				-
			
 
				-    def create_graph_deduplication_summary_pipe(self, *args, **kwargs) -> Any:
			
 
				-        from core.pipes import GraphDeduplicationSummaryPipe
			
 
				-
			
 
				-        return GraphDeduplicationSummaryPipe(
			
 
				-            database_provider=self.providers.database,
			
 
				-            llm_provider=self.providers.llm,
			
 
				-            embedding_provider=self.providers.embedding,
			
 
				-            config=AsyncPipe.PipeConfig(
			
 
				-                name="graph_deduplication_summary_pipe"
			
 
				-            ),
			
 
				-        )
			
 
				-
			
 
				 
			
 
				 class R2RPipelineFactory:
			
 
				     def __init__(
			
--- a/core/main/orchestration/hatchet/kg_workflow.py
+++ b/core/main/orchestration/hatchet/kg_workflow.py
@@ -97,25 +97,6 @@ def hatchet_kg_factory(
 
				                 except:
			
 
				                     pass
			
 
				 
			
 
				-            if key == "graph_entity_deduplication_settings":
			
 
				-                try:
			
 
				-                    input_data[key] = json.loads(value)
			
 
				-                except:
			
 
				-                    pass
			
 
				-
			
 
				-                if isinstance(input_data[key]["generation_config"], str):
			
 
				-                    input_data[key]["generation_config"] = json.loads(
			
 
				-                        input_data[key]["generation_config"]
			
 
				-                    )
			
 
				-
			
 
				-                input_data[key]["generation_config"] = GenerationConfig(
			
 
				-                    **input_data[key]["generation_config"]
			
 
				-                )
			
 
				-
			
 
				-                logger.info(
			
 
				-                    f"KG Entity Deduplication Settings: {input_data[key]}"
			
 
				-                )
			
 
				-
			
 
				             if key == "generation_config":
			
 
				                 input_data[key] = GenerationConfig(**input_data[key])
			
 
				         return input_data
			
@@ -383,104 +364,6 @@ def hatchet_kg_factory(
 
				                     f"Failed to update document status for {document_id}: {e}"
			
 
				                 )
			
 
				 
			
 
				-    @orchestration_provider.workflow(
			
 
				-        name="entity-deduplication", timeout="360m"
			
 
				-    )
			
 
				-    class EntityDeduplicationWorkflow:
			
 
				-        def __init__(self, kg_service: GraphService):
			
 
				-            self.kg_service = kg_service
			
 
				-
			
 
				-        @orchestration_provider.step(retries=0, timeout="360m")
			
 
				-        async def kg_entity_deduplication_setup(
			
 
				-            self, context: Context
			
 
				-        ) -> dict:
			
 
				-            input_data = get_input_data_dict(
			
 
				-                context.workflow_input()["request"]
			
 
				-            )
			
 
				-
			
 
				-            graph_id = input_data["graph_id"]
			
 
				-
			
 
				-            logger.info(
			
 
				-                f"Running KG Entity Deduplication for collection {graph_id}"
			
 
				-            )
			
 
				-            logger.info(f"Input data: {input_data}")
			
 
				-            logger.info(
			
 
				-                f"KG Entity Deduplication Settings: {input_data['graph_entity_deduplication_settings']}"
			
 
				-            )
			
 
				-
			
 
				-            number_of_distinct_entities = (
			
 
				-                await self.kg_service.kg_entity_deduplication(
			
 
				-                    graph_id=graph_id,
			
 
				-                    **input_data["graph_entity_deduplication_settings"],
			
 
				-                )
			
 
				-            )[0]["num_entities"]
			
 
				-
			
 
				-            input_data["graph_entity_deduplication_settings"][
			
 
				-                "generation_config"
			
 
				-            ] = input_data["graph_entity_deduplication_settings"][
			
 
				-                "generation_config"
			
 
				-            ].model_dump_json()
			
 
				-
			
 
				-            # run 100 entities in one workflow
			
 
				-            total_workflows = math.ceil(number_of_distinct_entities / 100)
			
 
				-            workflows = []
			
 
				-            for i in range(total_workflows):
			
 
				-                offset = i * 100
			
 
				-                workflows.append(
			
 
				-                    context.aio.spawn_workflow(
			
 
				-                        "kg-entity-deduplication-summary",
			
 
				-                        {
			
 
				-                            "request": {
			
 
				-                                "graph_id": graph_id,
			
 
				-                                "offset": offset,
			
 
				-                                "limit": 100,
			
 
				-                                "graph_entity_deduplication_settings": json.dumps(
			
 
				-                                    input_data[
			
 
				-                                        "graph_entity_deduplication_settings"
			
 
				-                                    ]
			
 
				-                                ),
			
 
				-                            }
			
 
				-                        },
			
 
				-                        key=f"{i}/{total_workflows}_entity_deduplication_part",
			
 
				-                    )
			
 
				-                )
			
 
				-
			
 
				-            await asyncio.gather(*workflows)
			
 
				-            return {
			
 
				-                "result": f"successfully queued kg entity deduplication for collection {graph_id} with {number_of_distinct_entities} distinct entities"
			
 
				-            }
			
 
				-
			
 
				-    @orchestration_provider.workflow(
			
 
				-        name="kg-entity-deduplication-summary", timeout="360m"
			
 
				-    )
			
 
				-    class EntityDeduplicationSummaryWorkflow:
			
 
				-        def __init__(self, kg_service: GraphService):
			
 
				-            self.kg_service = kg_service
			
 
				-
			
 
				-        @orchestration_provider.step(retries=0, timeout="360m")
			
 
				-        async def kg_entity_deduplication_summary(
			
 
				-            self, context: Context
			
 
				-        ) -> dict:
			
 
				-            logger.info(
			
 
				-                f"Running KG Entity Deduplication Summary for input data: {context.workflow_input()['request']}"
			
 
				-            )
			
 
				-
			
 
				-            input_data = get_input_data_dict(
			
 
				-                context.workflow_input()["request"]
			
 
				-            )
			
 
				-            graph_id = input_data["graph_id"]
			
 
				-
			
 
				-            await self.kg_service.kg_entity_deduplication_summary(
			
 
				-                graph_id=graph_id,
			
 
				-                offset=input_data["offset"],
			
 
				-                limit=input_data["limit"],
			
 
				-                **input_data["graph_entity_deduplication_settings"],
			
 
				-            )
			
 
				-
			
 
				-            return {
			
 
				-                "result": f"successfully queued kg entity deduplication summary for collection {graph_id}"
			
 
				-            }
			
 
				-
			
 
				     @orchestration_provider.workflow(name="build-communities", timeout="360m")
			
 
				     class EnrichGraphWorkflow:
			
 
				         def __init__(self, kg_service: GraphService):
			
@@ -676,8 +559,4 @@ def hatchet_kg_factory(
 
				         "extract-triples": CreateGraphWorkflow(service),
			
 
				         "build-communities": EnrichGraphWorkflow(service),
			
 
				         "kg-community-summary": KGCommunitySummaryWorkflow(service),
			
 
				-        "kg-entity-deduplication": EntityDeduplicationWorkflow(service),
			
 
				-        "kg-entity-deduplication-summary": EntityDeduplicationSummaryWorkflow(
			
 
				-            service
			
 
				-        ),
			
 
				     }
			
--- a/core/main/orchestration/simple/kg_workflow.py
+++ b/core/main/orchestration/simple/kg_workflow.py
@@ -174,34 +174,8 @@ def simple_kg_factory(service: GraphService):
 
				             **input_data["graph_enrichment_settings"],
			
 
				         )
			
 
				 
			
 
				-    async def entity_deduplication_workflow(input_data):
			
 
				-        # TODO: We should determine how we want to handle the input here and syncronize it across all simple orchestration methods
			
 
				-        if isinstance(input_data["graph_entity_deduplication_settings"], str):
			
 
				-            input_data["graph_entity_deduplication_settings"] = json.loads(
			
 
				-                input_data["graph_entity_deduplication_settings"]
			
 
				-            )
			
 
				-
			
 
				-        collection_id = input_data.get("collection_id", None)
			
 
				-        graph_id = input_data.get("graph_id", None)
			
 
				-
			
 
				-        number_of_distinct_entities = (
			
 
				-            await service.kg_entity_deduplication(
			
 
				-                collection_id=collection_id,
			
 
				-                graph_id=graph_id,
			
 
				-                **input_data["graph_entity_deduplication_settings"],
			
 
				-            )
			
 
				-        )[0]["num_entities"]
			
 
				-
			
 
				-        await service.kg_entity_deduplication_summary(
			
 
				-            collection_id=collection_id,
			
 
				-            offset=0,
			
 
				-            limit=number_of_distinct_entities,
			
 
				-            **input_data["graph_entity_deduplication_settings"],
			
 
				-        )
			
 
				-
			
 
				     return {
			
 
				         "extract-triples": extract_triples,
			
 
				         "build-communities": enrich_graph,
			
 
				         "kg-community-summary": kg_community_summary,
			
 
				-        "entity-deduplication": entity_deduplication_workflow,
			
 
				     }
			
--- a/core/main/services/auth_service.py
+++ b/core/main/services/auth_service.py
@@ -283,7 +283,7 @@ class AuthService(Service):
 
				         """
			
 
				         return await self.providers.auth.create_user_api_key(user_id)
			
 
				 
			
 
				-    async def delete_user_api_key(self, user_id: UUID, key_id: UUID) -> dict:
			
 
				+    async def delete_user_api_key(self, user_id: UUID, key_id: UUID) -> bool:
			
 
				         """
			
 
				         Delete the API key for the user.
			
 
				 
			
@@ -292,7 +292,7 @@ class AuthService(Service):
 
				             key_id (str): The ID of the API key
			
 
				 
			
 
				         Returns:
			
 
				-            dict: Contains the message
			
 
				+            bool: True if the API key was deleted successfully
			
 
				         """
			
 
				         return await self.providers.auth.delete_user_api_key(
			
 
				             user_id=user_id, key_id=key_id
			
--- a/core/main/services/graph_service.py
+++ b/core/main/services/graph_service.py
@@ -21,10 +21,9 @@ from core.base.abstractions import (
 
				     KGCreationSettings,
			
 
				     KGEnrichmentSettings,
			
 
				     KGEnrichmentStatus,
			
 
				-    KGEntityDeduplicationSettings,
			
 
				-    KGEntityDeduplicationType,
			
 
				     R2RException,
			
 
				     Relationship,
			
 
				+    StoreType,
			
 
				 )
			
 
				 from core.base.api.models import GraphResponse
			
 
				 from core.telemetry.telemetry_decorator import telemetry_event
			
@@ -144,7 +143,7 @@ class GraphService(Service):
 
				         return await self.providers.database.graphs_handler.entities.create(
			
 
				             name=name,
			
 
				             parent_id=parent_id,
			
 
				-            store_type="graphs",  # type: ignore
			
 
				+            store_type=StoreType.GRAPHS,
			
 
				             category=category,
			
 
				             description=description,
			
 
				             description_embedding=description_embedding,
			
@@ -168,7 +167,7 @@ class GraphService(Service):
 
				 
			
 
				         return await self.providers.database.graphs_handler.entities.update(
			
 
				             entity_id=entity_id,
			
 
				-            store_type="graphs",  # type: ignore
			
 
				+            store_type=StoreType.GRAPHS,
			
 
				             name=name,
			
 
				             description=description,
			
 
				             description_embedding=description_embedding,
			
@@ -185,7 +184,7 @@ class GraphService(Service):
 
				         return await self.providers.database.graphs_handler.entities.delete(
			
 
				             parent_id=parent_id,
			
 
				             entity_ids=[entity_id],
			
 
				-            store_type="graphs",  # type: ignore
			
 
				+            store_type=StoreType.GRAPHS,
			
 
				         )
			
 
				 
			
 
				     @telemetry_event("get_entities")
			
@@ -238,7 +237,7 @@ class GraphService(Service):
 
				                 description_embedding=description_embedding,
			
 
				                 weight=weight,
			
 
				                 metadata=metadata,
			
 
				-                store_type="graphs",  # type: ignore
			
 
				+                store_type=StoreType.GRAPHS,
			
 
				             )
			
 
				         )
			
 
				 
			
@@ -252,7 +251,7 @@ class GraphService(Service):
 
				             await self.providers.database.graphs_handler.relationships.delete(
			
 
				                 parent_id=parent_id,
			
 
				                 relationship_ids=[relationship_id],
			
 
				-                store_type="graphs",  # type: ignore
			
 
				+                store_type=StoreType.GRAPHS,
			
 
				             )
			
 
				         )
			
 
				 
			
@@ -287,7 +286,7 @@ class GraphService(Service):
 
				                 description_embedding=description_embedding,
			
 
				                 weight=weight,
			
 
				                 metadata=metadata,
			
 
				-                store_type="graphs",  # type: ignore
			
 
				+                store_type=StoreType.GRAPHS,
			
 
				             )
			
 
				         )
			
 
				 
			
@@ -302,7 +301,7 @@ class GraphService(Service):
 
				     ):
			
 
				         return await self.providers.database.graphs_handler.relationships.get(
			
 
				             parent_id=parent_id,
			
 
				-            store_type="graphs",  # type: ignore
			
 
				+            store_type=StoreType.GRAPHS,
			
 
				             offset=offset,
			
 
				             limit=limit,
			
 
				             relationship_ids=relationship_ids,
			
@@ -324,7 +323,7 @@ class GraphService(Service):
 
				         )
			
 
				         return await self.providers.database.graphs_handler.communities.create(
			
 
				             parent_id=parent_id,
			
 
				-            store_type="graphs",  # type: ignore
			
 
				+            store_type=StoreType.GRAPHS,
			
 
				             name=name,
			
 
				             summary=summary,
			
 
				             description_embedding=description_embedding,
			
@@ -351,7 +350,7 @@ class GraphService(Service):
 
				 
			
 
				         return await self.providers.database.graphs_handler.communities.update(
			
 
				             community_id=community_id,
			
 
				-            store_type="graphs",  # type: ignore
			
 
				+            store_type=StoreType.GRAPHS,
			
 
				             name=name,
			
 
				             summary=summary,
			
 
				             summary_embedding=summary_embedding,
			
@@ -380,7 +379,7 @@ class GraphService(Service):
 
				     ):
			
 
				         return await self.providers.database.graphs_handler.communities.get(
			
 
				             parent_id=collection_id,
			
 
				-            store_type="graphs",  # type: ignore
			
 
				+            store_type=StoreType.GRAPHS,
			
 
				             offset=offset,
			
 
				             limit=limit,
			
 
				         )
			
@@ -622,21 +621,17 @@ class GraphService(Service):
 
				     async def delete_graph(
			
 
				         self,
			
 
				         collection_id: UUID,
			
 
				-        cascade: bool,
			
 
				-        **kwargs,
			
 
				     ):
			
 
				-        return await self.delete(collection_id=collection_id, cascade=cascade)
			
 
				+        return await self.delete(collection_id=collection_id)
			
 
				 
			
 
				     @telemetry_event("delete")
			
 
				     async def delete(
			
 
				         self,
			
 
				         collection_id: UUID,
			
 
				-        cascade: bool,
			
 
				         **kwargs,
			
 
				     ):
			
 
				         return await self.providers.database.graphs_handler.delete(
			
 
				             collection_id=collection_id,
			
 
				-            cascade=cascade,
			
 
				         )
			
 
				 
			
 
				     @telemetry_event("get_creation_estimate")
			
@@ -674,75 +669,6 @@ class GraphService(Service):
 
				             graph_enrichment_settings=graph_enrichment_settings,
			
 
				         )
			
 
				 
			
 
				-    @telemetry_event("get_deduplication_estimate")
			
 
				-    async def get_deduplication_estimate(
			
 
				-        self,
			
 
				-        collection_id: UUID,
			
 
				-        kg_deduplication_settings: KGEntityDeduplicationSettings,
			
 
				-        **kwargs,
			
 
				-    ):
			
 
				-        return await self.providers.database.graphs_handler.get_deduplication_estimate(
			
 
				-            collection_id=collection_id,
			
 
				-            kg_deduplication_settings=kg_deduplication_settings,
			
 
				-        )
			
 
				-
			
 
				-    @telemetry_event("kg_entity_deduplication")
			
 
				-    async def kg_entity_deduplication(
			
 
				-        self,
			
 
				-        collection_id: UUID,
			
 
				-        graph_id: UUID,
			
 
				-        graph_entity_deduplication_type: KGEntityDeduplicationType,
			
 
				-        graph_entity_deduplication_prompt: str,
			
 
				-        generation_config: GenerationConfig,
			
 
				-        **kwargs,
			
 
				-    ):
			
 
				-        deduplication_results = await self.pipes.graph_deduplication_pipe.run(
			
 
				-            input=self.pipes.graph_deduplication_pipe.Input(
			
 
				-                message={
			
 
				-                    "collection_id": collection_id,
			
 
				-                    "graph_id": graph_id,
			
 
				-                    "graph_entity_deduplication_type": graph_entity_deduplication_type,
			
 
				-                    "graph_entity_deduplication_prompt": graph_entity_deduplication_prompt,
			
 
				-                    "generation_config": generation_config,
			
 
				-                    **kwargs,
			
 
				-                }
			
 
				-            ),
			
 
				-            state=None,
			
 
				-            run_manager=self.run_manager,
			
 
				-        )
			
 
				-        return await _collect_results(deduplication_results)
			
 
				-
			
 
				-    @telemetry_event("kg_entity_deduplication_summary")
			
 
				-    async def kg_entity_deduplication_summary(
			
 
				-        self,
			
 
				-        collection_id: UUID,
			
 
				-        offset: int,
			
 
				-        limit: int,
			
 
				-        graph_entity_deduplication_type: KGEntityDeduplicationType,
			
 
				-        graph_entity_deduplication_prompt: str,
			
 
				-        generation_config: GenerationConfig,
			
 
				-        **kwargs,
			
 
				-    ):
			
 
				-        logger.info(
			
 
				-            f"Running kg_entity_deduplication_summary for collection {collection_id} with settings {kwargs}"
			
 
				-        )
			
 
				-        deduplication_summary_results = await self.pipes.graph_deduplication_summary_pipe.run(
			
 
				-            input=self.pipes.graph_deduplication_summary_pipe.Input(
			
 
				-                message={
			
 
				-                    "collection_id": collection_id,
			
 
				-                    "offset": offset,
			
 
				-                    "limit": limit,
			
 
				-                    "graph_entity_deduplication_type": graph_entity_deduplication_type,
			
 
				-                    "graph_entity_deduplication_prompt": graph_entity_deduplication_prompt,
			
 
				-                    "generation_config": generation_config,
			
 
				-                }
			
 
				-            ),
			
 
				-            state=None,
			
 
				-            run_manager=self.run_manager,
			
 
				-        )
			
 
				-
			
 
				-        return await _collect_results(deduplication_summary_results)
			
 
				-
			
 
				     async def kg_extraction(  # type: ignore
			
 
				         self,
			
 
				         document_id: UUID,
			
@@ -1045,7 +971,7 @@ class GraphService(Service):
 
				                 result = await self.providers.database.graphs_handler.entities.create(
			
 
				                     name=entity.name,
			
 
				                     parent_id=entity.parent_id,
			
 
				-                    store_type="documents",  # type: ignore
			
 
				+                    store_type=StoreType.DOCUMENTS,
			
 
				                     category=entity.category,
			
 
				                     description=entity.description,
			
 
				                     description_embedding=entity.description_embedding,
			
@@ -1067,5 +993,5 @@ class GraphService(Service):
 
				                         description_embedding=relationship.description_embedding,
			
 
				                         weight=relationship.weight,
			
 
				                         metadata=relationship.metadata,
			
 
				-                        store_type="documents",  # type: ignore
			
 
				+                        store_type=StoreType.DOCUMENTS,
			
 
				                     )
			
--- a/core/main/services/management_service.py
+++ b/core/main/services/management_service.py
@@ -1,7 +1,8 @@
 
				 import logging
			
 
				 import os
			
 
				 from collections import defaultdict
			
 
				-from typing import Any, BinaryIO, Optional, Tuple
			
 
				+from datetime import datetime
			
 
				+from typing import IO, Any, BinaryIO, Optional, Tuple
			
 
				 from uuid import UUID
			
 
				 
			
 
				 import toml
			
@@ -16,6 +17,7 @@ from core.base import (
 
				     Prompt,
			
 
				     R2RException,
			
 
				     RunManager,
			
 
				+    StoreType,
			
 
				     User,
			
 
				 )
			
 
				 from core.telemetry.telemetry_decorator import telemetry_event
			
@@ -189,10 +191,12 @@ class ManagementService(Service):
 
				         for doc_id in docs_to_delete:
			
 
				             # Delete related entities & relationships if needed:
			
 
				             await self.providers.database.graphs_handler.entities.delete(
			
 
				-                parent_id=doc_id, store_type="documents"
			
 
				+                parent_id=doc_id,
			
 
				+                store_type=StoreType.DOCUMENTS,
			
 
				             )
			
 
				             await self.providers.database.graphs_handler.relationships.delete(
			
 
				-                parent_id=doc_id, store_type="documents"
			
 
				+                parent_id=doc_id,
			
 
				+                store_type=StoreType.DOCUMENTS,
			
 
				             )
			
 
				 
			
 
				             # Finally, delete the document from documents_overview:
			
@@ -218,6 +222,166 @@ class ManagementService(Service):
 
				             return result
			
 
				         return None
			
 
				 
			
 
				+    @telemetry_event("ExportFiles")
			
 
				+    async def export_files(
			
 
				+        self,
			
 
				+        document_ids: Optional[list[UUID]] = None,
			
 
				+        start_date: Optional[datetime] = None,
			
 
				+        end_date: Optional[datetime] = None,
			
 
				+    ) -> tuple[str, BinaryIO, int]:
			
 
				+        return (
			
 
				+            await self.providers.database.files_handler.retrieve_files_as_zip(
			
 
				+                document_ids=document_ids,
			
 
				+                start_date=start_date,
			
 
				+                end_date=end_date,
			
 
				+            )
			
 
				+        )
			
 
				+
			
 
				+    @telemetry_event("ExportCollections")
			
 
				+    async def export_collections(
			
 
				+        self,
			
 
				+        columns: Optional[list[str]] = None,
			
 
				+        filters: Optional[dict] = None,
			
 
				+        include_header: bool = True,
			
 
				+    ) -> tuple[str, IO]:
			
 
				+        return await self.providers.database.collections_handler.export_to_csv(
			
 
				+            columns=columns,
			
 
				+            filters=filters,
			
 
				+            include_header=include_header,
			
 
				+        )
			
 
				+
			
 
				+    @telemetry_event("ExportDocuments")
			
 
				+    async def export_documents(
			
 
				+        self,
			
 
				+        columns: Optional[list[str]] = None,
			
 
				+        filters: Optional[dict] = None,
			
 
				+        include_header: bool = True,
			
 
				+    ) -> tuple[str, IO]:
			
 
				+        return await self.providers.database.documents_handler.export_to_csv(
			
 
				+            columns=columns,
			
 
				+            filters=filters,
			
 
				+            include_header=include_header,
			
 
				+        )
			
 
				+
			
 
				+    @telemetry_event("ExportDocumentEntities")
			
 
				+    async def export_document_entities(
			
 
				+        self,
			
 
				+        id: UUID,
			
 
				+        columns: Optional[list[str]] = None,
			
 
				+        filters: Optional[dict] = None,
			
 
				+        include_header: bool = True,
			
 
				+    ) -> tuple[str, IO]:
			
 
				+        return await self.providers.database.graphs_handler.entities.export_to_csv(
			
 
				+            parent_id=id,
			
 
				+            store_type=StoreType.DOCUMENTS,
			
 
				+            columns=columns,
			
 
				+            filters=filters,
			
 
				+            include_header=include_header,
			
 
				+        )
			
 
				+
			
 
				+    @telemetry_event("ExportDocumentRelationships")
			
 
				+    async def export_document_relationships(
			
 
				+        self,
			
 
				+        id: UUID,
			
 
				+        columns: Optional[list[str]] = None,
			
 
				+        filters: Optional[dict] = None,
			
 
				+        include_header: bool = True,
			
 
				+    ) -> tuple[str, IO]:
			
 
				+        return await self.providers.database.graphs_handler.relationships.export_to_csv(
			
 
				+            parent_id=id,
			
 
				+            store_type=StoreType.DOCUMENTS,
			
 
				+            columns=columns,
			
 
				+            filters=filters,
			
 
				+            include_header=include_header,
			
 
				+        )
			
 
				+
			
 
				+    @telemetry_event("ExportConversations")
			
 
				+    async def export_conversations(
			
 
				+        self,
			
 
				+        columns: Optional[list[str]] = None,
			
 
				+        filters: Optional[dict] = None,
			
 
				+        include_header: bool = True,
			
 
				+    ) -> tuple[str, IO]:
			
 
				+        return await self.providers.database.conversations_handler.export_conversations_to_csv(
			
 
				+            columns=columns,
			
 
				+            filters=filters,
			
 
				+            include_header=include_header,
			
 
				+        )
			
 
				+
			
 
				+    @telemetry_event("ExportGraphEntities")
			
 
				+    async def export_graph_entities(
			
 
				+        self,
			
 
				+        id: UUID,
			
 
				+        columns: Optional[list[str]] = None,
			
 
				+        filters: Optional[dict] = None,
			
 
				+        include_header: bool = True,
			
 
				+    ) -> tuple[str, IO]:
			
 
				+        return await self.providers.database.graphs_handler.entities.export_to_csv(
			
 
				+            parent_id=id,
			
 
				+            store_type=StoreType.GRAPHS,
			
 
				+            columns=columns,
			
 
				+            filters=filters,
			
 
				+            include_header=include_header,
			
 
				+        )
			
 
				+
			
 
				+    @telemetry_event("ExportGraphRelationships")
			
 
				+    async def export_graph_relationships(
			
 
				+        self,
			
 
				+        id: UUID,
			
 
				+        columns: Optional[list[str]] = None,
			
 
				+        filters: Optional[dict] = None,
			
 
				+        include_header: bool = True,
			
 
				+    ) -> tuple[str, IO]:
			
 
				+        return await self.providers.database.graphs_handler.relationships.export_to_csv(
			
 
				+            parent_id=id,
			
 
				+            store_type=StoreType.GRAPHS,
			
 
				+            columns=columns,
			
 
				+            filters=filters,
			
 
				+            include_header=include_header,
			
 
				+        )
			
 
				+
			
 
				+    @telemetry_event("ExportGraphCommunities")
			
 
				+    async def export_graph_communities(
			
 
				+        self,
			
 
				+        id: UUID,
			
 
				+        columns: Optional[list[str]] = None,
			
 
				+        filters: Optional[dict] = None,
			
 
				+        include_header: bool = True,
			
 
				+    ) -> tuple[str, IO]:
			
 
				+        return await self.providers.database.graphs_handler.communities.export_to_csv(
			
 
				+            parent_id=id,
			
 
				+            store_type=StoreType.GRAPHS,
			
 
				+            columns=columns,
			
 
				+            filters=filters,
			
 
				+            include_header=include_header,
			
 
				+        )
			
 
				+
			
 
				+    @telemetry_event("ExportMessages")
			
 
				+    async def export_messages(
			
 
				+        self,
			
 
				+        columns: Optional[list[str]] = None,
			
 
				+        filters: Optional[dict] = None,
			
 
				+        include_header: bool = True,
			
 
				+    ) -> tuple[str, IO]:
			
 
				+        return await self.providers.database.conversations_handler.export_messages_to_csv(
			
 
				+            columns=columns,
			
 
				+            filters=filters,
			
 
				+            include_header=include_header,
			
 
				+        )
			
 
				+
			
 
				+    @telemetry_event("ExportUsers")
			
 
				+    async def export_users(
			
 
				+        self,
			
 
				+        columns: Optional[list[str]] = None,
			
 
				+        filters: Optional[dict] = None,
			
 
				+        include_header: bool = True,
			
 
				+    ) -> tuple[str, IO]:
			
 
				+        return await self.providers.database.users_handler.export_to_csv(
			
 
				+            columns=columns,
			
 
				+            filters=filters,
			
 
				+            include_header=include_header,
			
 
				+        )
			
 
				+
			
 
				     @telemetry_event("DocumentsOverview")
			
 
				     async def documents_overview(
			
 
				         self,
			
@@ -538,7 +702,9 @@ class ManagementService(Service):
 
				             return {
			
 
				                 "message": (
			
 
				                     await self.providers.database.prompts_handler.get_cached_prompt(
			
 
				-                        prompt_name, inputs, prompt_override
			
 
				+                        prompt_name=prompt_name,
			
 
				+                        inputs=inputs,
			
 
				+                        prompt_override=prompt_override,
			
 
				                     )
			
 
				                 )
			
 
				             }
			
@@ -674,11 +840,11 @@ class ManagementService(Service):
 
				             filter_user_ids=user_ids,
			
 
				         )
			
 
				 
			
 
				-    async def get_user_max_documents(self, user_id: UUID) -> int:
			
 
				+    async def get_user_max_documents(self, user_id: UUID) -> int | None:
			
 
				         return self.config.app.default_max_documents_per_user
			
 
				 
			
 
				-    async def get_user_max_chunks(self, user_id: UUID) -> int:
			
 
				+    async def get_user_max_chunks(self, user_id: UUID) -> int | None:
			
 
				         return self.config.app.default_max_chunks_per_user
			
 
				 
			
 
				-    async def get_user_max_collections(self, user_id: UUID) -> int:
			
 
				+    async def get_user_max_collections(self, user_id: UUID) -> int | None:
			
 
				         return self.config.app.default_max_collections_per_user
			
--- a/core/parsers/structured/csv_parser.py
+++ b/core/parsers/structured/csv_parser.py
@@ -1,5 +1,5 @@
 
				 # type: ignore
			
 
				-from typing import IO, AsyncGenerator, Optional, Union
			
 
				+from typing import IO, AsyncGenerator, Optional
			
 
				 
			
 
				 from core.base.parsers.base_parser import AsyncParser
			
 
				 from core.base.providers import (
			
@@ -29,7 +29,7 @@ class CSVParser(AsyncParser[str | bytes]):
 
				         self.StringIO = StringIO
			
 
				 
			
 
				     async def ingest(
			
 
				-        self, data: Union[str, bytes], *args, **kwargs
			
 
				+        self, data: str | bytes, *args, **kwargs
			
 
				     ) -> AsyncGenerator[str, None]:
			
 
				         """Ingest CSV data and yield text from each row."""
			
 
				         if isinstance(data, bytes):
			
@@ -72,7 +72,7 @@ class CSVParserAdvanced(AsyncParser[str | bytes]):
 
				 
			
 
				     async def ingest(
			
 
				         self,
			
 
				-        data: Union[str, bytes],
			
 
				+        data: str | bytes,
			
 
				         num_col_times_num_rows: int = 100,
			
 
				         *args,
			
 
				         **kwargs,
			
--- a/core/pipes/__init__.py
+++ b/core/pipes/__init__.py
@@ -5,8 +5,6 @@ from .ingestion.parsing_pipe import ParsingPipe
 
				 from .ingestion.vector_storage_pipe import VectorStoragePipe
			
 
				 from .kg.clustering import GraphClusteringPipe
			
 
				 from .kg.community_summary import GraphCommunitySummaryPipe
			
 
				-from .kg.deduplication import GraphDeduplicationPipe
			
 
				-from .kg.deduplication_summary import GraphDeduplicationSummaryPipe
			
 
				 from .kg.description import GraphDescriptionPipe
			
 
				 from .kg.extraction import GraphExtractionPipe
			
 
				 from .kg.storage import GraphStoragePipe
			
@@ -36,6 +34,4 @@ __all__ = [
 
				     "MultiSearchPipe",
			
 
				     "GraphCommunitySummaryPipe",
			
 
				     "RoutingSearchPipe",
			
 
				-    "GraphDeduplicationPipe",
			
 
				-    "GraphDeduplicationSummaryPipe",
			
 
				 ]
			
--- a/core/pipes/abstractions/search_pipe.py
+++ b/core/pipes/abstractions/search_pipe.py
@@ -1,6 +1,6 @@
 
				 import logging
			
 
				 from abc import abstractmethod
			
 
				-from typing import Any, AsyncGenerator, Optional, Union
			
 
				+from typing import Any, AsyncGenerator
			
 
				 from uuid import UUID
			
 
				 
			
 
				 from core.base import AsyncPipe, AsyncState, ChunkSearchResult
			
@@ -15,7 +15,7 @@ class SearchPipe(AsyncPipe[ChunkSearchResult]):
 
				         limit: int = 10
			
 
				 
			
 
				     class Input(AsyncPipe.Input):
			
 
				-        message: Union[AsyncGenerator[str, None], str]
			
 
				+        message: AsyncGenerator[str, None] | str
			
 
				 
			
 
				     def __init__(
			
 
				         self,
			
--- a/core/pipes/ingestion/embedding_pipe.py
+++ b/core/pipes/ingestion/embedding_pipe.py
@@ -1,6 +1,6 @@
 
				 import asyncio
			
 
				 import logging
			
 
				-from typing import Any, AsyncGenerator, Optional, Union
			
 
				+from typing import Any, AsyncGenerator
			
 
				 
			
 
				 from core.base import (
			
 
				     AsyncState,
			
@@ -113,7 +113,7 @@ class EmbeddingPipe(AsyncPipe[VectorEntry]):
 
				 
			
 
				     async def _process_extraction(
			
 
				         self, extraction: DocumentChunk
			
 
				-    ) -> Union[VectorEntry, R2RDocumentProcessingError]:
			
 
				+    ) -> VectorEntry | R2RDocumentProcessingError:
			
 
				         try:
			
 
				             if isinstance(extraction.data, bytes):
			
 
				                 raise ValueError(
			
--- a/core/pipes/kg/community_summary.py
+++ b/core/pipes/kg/community_summary.py
@@ -300,7 +300,7 @@ class GraphCommunitySummaryPipe(AsyncPipe):
 
				         )
			
 
				 
			
 
				         # Organize clusters
			
 
				-        clusters: dict[Any] = {}
			
 
				+        clusters: dict[Any, Any] = {}
			
 
				         for item in community_clusters:
			
 
				             cluster_id = (
			
 
				                 item["cluster"]
			
--- a/core/pipes/kg/extraction.py
+++ b/core/pipes/kg/extraction.py
@@ -3,7 +3,7 @@ import json
 
				 import logging
			
 
				 import re
			
 
				 import time
			
 
				-from typing import Any, AsyncGenerator, Optional, Union
			
 
				+from typing import Any, AsyncGenerator, Optional
			
 
				 
			
 
				 from core.base import (
			
 
				     AsyncState,
			
@@ -211,7 +211,7 @@ class GraphExtractionPipe(AsyncPipe[dict]):
 
				         run_id: Any,
			
 
				         *args: Any,
			
 
				         **kwargs: Any,
			
 
				-    ) -> AsyncGenerator[Union[KGExtraction, R2RDocumentProcessingError], None]:
			
 
				+    ) -> AsyncGenerator[KGExtraction | R2RDocumentProcessingError, None]:
			
 
				         start_time = time.time()
			
 
				 
			
 
				         document_id = input.message["document_id"]
			
--- a/core/providers/auth/r2r_auth.py
+++ b/core/providers/auth/r2r_auth.py
@@ -106,17 +106,24 @@ class R2RAuthProvider(AuthProvider):
 
				                 status_code=401, message="Invalid or expired token"
			
 
				             )
			
 
				 
			
 
				-        email: str = payload.get("sub")
			
 
				-        token_type: str = payload.get("token_type")
			
 
				-        exp: float = payload.get("exp")
			
 
				+        email = payload.get("sub")
			
 
				+        token_type = payload.get("token_type")
			
 
				+        exp = payload.get("exp")
			
 
				+
			
 
				         if email is None or token_type is None or exp is None:
			
 
				             raise R2RException(status_code=401, message="Invalid token claims")
			
 
				 
			
 
				-        exp_datetime = datetime.fromtimestamp(exp, tz=timezone.utc)
			
 
				+        email_str: str = email
			
 
				+        token_type_str: str = token_type
			
 
				+        exp_float: float = exp
			
 
				+
			
 
				+        exp_datetime = datetime.fromtimestamp(exp_float, tz=timezone.utc)
			
 
				         if exp_datetime < datetime.now(timezone.utc):
			
 
				             raise R2RException(status_code=401, message="Token has expired")
			
 
				 
			
 
				-        return TokenData(email=email, token_type=token_type, exp=exp_datetime)
			
 
				+        return TokenData(
			
 
				+            email=email_str, token_type=token_type_str, exp=exp_datetime
			
 
				+        )
			
 
				 
			
 
				     async def authenticate_api_key(self, api_key: str) -> Optional[User]:
			
 
				         """
			
@@ -479,7 +486,7 @@ class R2RAuthProvider(AuthProvider):
 
				             user_id=user_id
			
 
				         )
			
 
				 
			
 
				-    async def delete_user_api_key(self, user_id: UUID, key_id: UUID) -> bool:
			
 
				+    async def delete_user_api_key(self, user_id: UUID, key_id: UUID) -> dict:
			
 
				         return await self.database_provider.users_handler.delete_api_key(
			
 
				             user_id=user_id,
			
 
				             key_id=key_id,
			
--- a/core/providers/auth/supabase.py
+++ b/core/providers/auth/supabase.py
@@ -1,5 +1,7 @@
 
				 import logging
			
 
				 import os
			
 
				+from typing import Optional
			
 
				+from uuid import UUID
			
 
				 
			
 
				 from fastapi import Depends, HTTPException
			
 
				 from fastapi.security import OAuth2PasswordBearer
			
@@ -216,3 +218,20 @@ class SupabaseAuthProvider(AuthProvider):
 
				 
			
 
				     async def send_reset_email(self, email: str) -> dict[str, str]:
			
 
				         raise NotImplementedError("send_reset_email is not used with Supabase")
			
 
				+
			
 
				+    async def create_user_api_key(
			
 
				+        self, user_id: UUID, name: Optional[str] = None
			
 
				+    ) -> dict[str, str]:
			
 
				+        raise NotImplementedError(
			
 
				+            "API key management is not supported with Supabase authentication"
			
 
				+        )
			
 
				+
			
 
				+    async def list_user_api_keys(self, user_id: UUID) -> list[dict]:
			
 
				+        raise NotImplementedError(
			
 
				+            "API key management is not supported with Supabase authentication"
			
 
				+        )
			
 
				+
			
 
				+    async def delete_user_api_key(self, user_id: UUID, key_id: UUID) -> dict:
			
 
				+        raise NotImplementedError(
			
 
				+            "API key management is not supported with Supabase authentication"
			
 
				+        )
			
--- a/core/providers/ingestion/unstructured/base.py
+++ b/core/providers/ingestion/unstructured/base.py
@@ -63,7 +63,7 @@ class UnstructuredIngestionConfig(IngestionConfig):
 
				     split_pdf_page: Optional[bool] = None
			
 
				     starting_page_number: Optional[int] = None
			
 
				     strategy: Optional[str] = None
			
 
				-    chunking_strategy: Optional[ChunkingStrategy] = None
			
 
				+    chunking_strategy: Optional[str | ChunkingStrategy] = None
			
 
				     unique_element_ids: Optional[bool] = None
			
 
				     xml_keep_tags: Optional[bool] = None
			
 
				 
			
@@ -99,8 +99,8 @@ class UnstructuredIngestionProvider(IngestionProvider):
 
				     EXTRA_PARSERS = {
			
 
				         DocumentType.CSV: {"advanced": parsers.CSVParserAdvanced},  # type: ignore
			
 
				         DocumentType.PDF: {
			
 
				-            "unstructured": parsers.PDFParserUnstructured,
			
 
				-            "zerox": parsers.VLMPDFParser,
			
 
				+            "unstructured": parsers.PDFParserUnstructured,  # type: ignore
			
 
				+            "zerox": parsers.VLMPDFParser,  # type: ignore
			
 
				         },
			
 
				         DocumentType.XLSX: {"advanced": parsers.XLSXParserAdvanced},  # type: ignore
			
 
				     }
			
--- a/core/telemetry/events.py
+++ b/core/telemetry/events.py
@@ -9,11 +9,6 @@ class BaseTelemetryEvent:
 
				         self.event_id = str(uuid.uuid4())
			
 
				 
			
 
				 
			
 
				-class DailyActiveUserEvent(BaseTelemetryEvent):
			
 
				-    def __init__(self, user_id: str):
			
 
				-        super().__init__("DailyActiveUser", {"user_id": user_id})
			
 
				-
			
 
				-
			
 
				 class FeatureUsageEvent(BaseTelemetryEvent):
			
 
				     def __init__(
			
 
				         self,
			
@@ -48,67 +43,3 @@ class ErrorEvent(BaseTelemetryEvent):
 
				                 "properties": properties or {},
			
 
				             },
			
 
				         )
			
 
				-
			
 
				-
			
 
				-class RequestLatencyEvent(BaseTelemetryEvent):
			
 
				-    def __init__(
			
 
				-        self,
			
 
				-        endpoint: str,
			
 
				-        latency: float,
			
 
				-        properties: Optional[dict[str, Any]] = None,
			
 
				-    ):
			
 
				-        super().__init__(
			
 
				-            "RequestLatency",
			
 
				-            {
			
 
				-                "endpoint": endpoint,
			
 
				-                "latency": latency,
			
 
				-                "properties": properties or {},
			
 
				-            },
			
 
				-        )
			
 
				-
			
 
				-
			
 
				-class GeographicDistributionEvent(BaseTelemetryEvent):
			
 
				-    def __init__(
			
 
				-        self,
			
 
				-        user_id: str,
			
 
				-        country: str,
			
 
				-        properties: Optional[dict[str, Any]] = None,
			
 
				-    ):
			
 
				-        super().__init__(
			
 
				-            "GeographicDistribution",
			
 
				-            {
			
 
				-                "user_id": user_id,
			
 
				-                "country": country,
			
 
				-                "properties": properties or {},
			
 
				-            },
			
 
				-        )
			
 
				-
			
 
				-
			
 
				-class SessionDurationEvent(BaseTelemetryEvent):
			
 
				-    def __init__(
			
 
				-        self,
			
 
				-        user_id: str,
			
 
				-        duration: float,
			
 
				-        properties: Optional[dict[str, Any]] = None,
			
 
				-    ):
			
 
				-        super().__init__(
			
 
				-            "SessionDuration",
			
 
				-            {
			
 
				-                "user_id": user_id,
			
 
				-                "duration": duration,
			
 
				-                "properties": properties or {},
			
 
				-            },
			
 
				-        )
			
 
				-
			
 
				-
			
 
				-class UserPathEvent(BaseTelemetryEvent):
			
 
				-    def __init__(
			
 
				-        self,
			
 
				-        user_id: str,
			
 
				-        path: str,
			
 
				-        properties: Optional[dict[str, Any]] = None,
			
 
				-    ):
			
 
				-        super().__init__(
			
 
				-            "UserPath",
			
 
				-            {"user_id": user_id, "path": path, "properties": properties or {}},
			
 
				-        )
			
--- a/sdk/async_client.py
+++ b/sdk/async_client.py
@@ -7,14 +7,6 @@ import httpx
 
				 from shared.abstractions import R2RException
			
 
				 
			
 
				 from .base.base_client import BaseClient
			
 
				-from .v2 import (
			
 
				-    AuthMixins,
			
 
				-    IngestionMixins,
			
 
				-    KGMixins,
			
 
				-    ManagementMixins,
			
 
				-    RetrievalMixins,
			
 
				-    ServerMixins,
			
 
				-)
			
 
				 from .v3 import (
			
 
				     ChunksSDK,
			
 
				     CollectionsSDK,
			
@@ -29,15 +21,7 @@ from .v3 import (
 
				 )
			
 
				 
			
 
				 
			
 
				-class R2RAsyncClient(
			
 
				-    BaseClient,
			
 
				-    AuthMixins,
			
 
				-    IngestionMixins,
			
 
				-    KGMixins,
			
 
				-    ManagementMixins,
			
 
				-    RetrievalMixins,
			
 
				-    ServerMixins,
			
 
				-):
			
 
				+class R2RAsyncClient(BaseClient):
			
 
				     """
			
 
				     Asynchronous client for interacting with the R2R API.
			
 
				     """
			
--- a/sdk/models.py
+++ b/sdk/models.py
@@ -8,9 +8,7 @@ from shared.abstractions import (
 
				     KGCommunityResult,
			
 
				     KGCreationSettings,
			
 
				     KGEnrichmentSettings,
			
 
				-    KGEntityDeduplicationSettings,
			
 
				     KGEntityResult,
			
 
				-    KGGlobalResult,
			
 
				     KGRelationshipResult,
			
 
				     KGRunType,
			
 
				     KGSearchResultType,
			
@@ -33,7 +31,6 @@ __all__ = [
 
				     "KGCreationSettings",
			
 
				     "KGEnrichmentSettings",
			
 
				     "KGEntityResult",
			
 
				-    "KGGlobalResult",
			
 
				     "KGRelationshipResult",
			
 
				     "KGRunType",
			
 
				     "GraphSearchResult",
			
@@ -48,7 +45,6 @@ __all__ = [
 
				     "SearchSettings",
			
 
				     "select_search_filters",
			
 
				     "SearchMode",
			
 
				-    "KGEntityDeduplicationSettings",
			
 
				     "RAGResponse",
			
 
				     "CombinedSearchResponse",
			
 
				     "User",
			
--- a/sdk/sync_client.py
+++ b/sdk/sync_client.py
@@ -5,14 +5,6 @@ import inspect
 
				 from typing import Any, Callable, Coroutine, TypeVar
			
 
				 
			
 
				 from .async_client import R2RAsyncClient
			
 
				-from .v2 import (
			
 
				-    SyncAuthMixins,
			
 
				-    SyncIngestionMixins,
			
 
				-    SyncKGMixins,
			
 
				-    SyncManagementMixins,
			
 
				-    SyncRetrievalMixins,
			
 
				-    SyncServerMixins,
			
 
				-)
			
 
				 
			
 
				 T = TypeVar("T")
			
 
				 
			
@@ -29,52 +21,12 @@ class R2RClient(R2RAsyncClient):
 
				         # Only wrap v3 methods since they're already working
			
 
				         self._wrap_v3_methods()
			
 
				 
			
 
				-        # Override v2 methods with sync versions
			
 
				-        self._override_v2_methods()
			
 
				-
			
 
				     def _make_sync_request(self, *args, **kwargs):
			
 
				         """Sync version of _make_request for v2 methods"""
			
 
				         return self._loop.run_until_complete(
			
 
				             self._async_make_request(*args, **kwargs)
			
 
				         )
			
 
				 
			
 
				-    def _override_v2_methods(self):
			
 
				-        """
			
 
				-        Replace async v2 methods with sync versions
			
 
				-        This is really ugly, but it's the only way to make it work once we
			
 
				-        remove v2, we can just resort to the metaclass approach that is in utils
			
 
				-        """
			
 
				-        sync_mixins = {
			
 
				-            SyncAuthMixins: ["auth_methods"],
			
 
				-            SyncIngestionMixins: ["ingestion_methods"],
			
 
				-            SyncKGMixins: ["kg_methods"],
			
 
				-            SyncManagementMixins: ["management_methods"],
			
 
				-            SyncRetrievalMixins: ["retrieval_methods"],
			
 
				-            SyncServerMixins: ["server_methods"],
			
 
				-        }
			
 
				-
			
 
				-        for sync_class in sync_mixins:
			
 
				-            for name, method in sync_class.__dict__.items():
			
 
				-                if not name.startswith("_") and inspect.isfunction(method):
			
 
				-                    # Create a wrapper that uses sync _make_request
			
 
				-                    def wrap_method(m):
			
 
				-                        def wrapped(self, *args, **kwargs):
			
 
				-                            # Temporarily swap _make_request
			
 
				-                            original_make_request = self._make_request
			
 
				-                            self._make_request = self._make_sync_request
			
 
				-                            try:
			
 
				-                                return m(self, *args, **kwargs)
			
 
				-                            finally:
			
 
				-                                # Restore original _make_request
			
 
				-                                self._make_request = original_make_request
			
 
				-
			
 
				-                        return wrapped
			
 
				-
			
 
				-                    bound_method = wrap_method(method).__get__(
			
 
				-                        self, self.__class__
			
 
				-                    )
			
 
				-                    setattr(self, name, bound_method)
			
 
				-
			
 
				     def _wrap_v3_methods(self) -> None:
			
 
				         """Wraps only v3 SDK object methods"""
			
 
				         sdk_objects = [
			
--- a/sdk/v3/chunks.py
+++ b/sdk/v3/chunks.py
@@ -1,5 +1,5 @@
 
				 import json
			
 
				-from typing import Optional
			
 
				+from typing import Any, Optional
			
 
				 from uuid import UUID
			
 
				 
			
 
				 from shared.api.models.base import WrappedBooleanResponse
			
@@ -102,7 +102,7 @@ class ChunksSDK:
 
				         Delete a specific chunk.
			
 
				 
			
 
				         Args:
			
 
				-            id (Union[str, UUID]): ID of chunk to delete
			
 
				+            id (str | UUID): ID of chunk to delete
			
 
				         """
			
 
				         return await self.client._make_request(
			
 
				             "DELETE",
			
@@ -168,7 +168,7 @@ class ChunksSDK:
 
				         if search_settings and not isinstance(search_settings, dict):
			
 
				             search_settings = search_settings.model_dump()
			
 
				 
			
 
				-        data = {
			
 
				+        data: dict[str, Any] = {
			
 
				             "query": query,
			
 
				             "search_settings": search_settings,
			
 
				         }
			
--- a/sdk/v3/collections.py
+++ b/sdk/v3/collections.py
@@ -1,4 +1,4 @@
 
				-from typing import Optional
			
 
				+from typing import Any, Optional
			
 
				 from uuid import UUID
			
 
				 
			
 
				 from shared.api.models.base import (
			
@@ -32,7 +32,7 @@ class CollectionsSDK:
 
				         Returns:
			
 
				             dict: Created collection information
			
 
				         """
			
 
				-        data = {"name": name, "description": description}
			
 
				+        data: dict[str, Any] = {"name": name, "description": description}
			
 
				         return await self.client._make_request(
			
 
				             "POST",
			
 
				             "collections",
			
@@ -104,7 +104,7 @@ class CollectionsSDK:
 
				         Returns:
			
 
				             dict: Updated collection information
			
 
				         """
			
 
				-        data = {}
			
 
				+        data: dict[str, Any] = {}
			
 
				         if name is not None:
			
 
				             data["name"] = name
			
 
				         if description is not None:
			
@@ -304,7 +304,7 @@ class CollectionsSDK:
 
				             "run_with_orchestration": run_with_orchestration
			
 
				         }
			
 
				 
			
 
				-        data = {}
			
 
				+        data: dict[str, Any] = {}
			
 
				         if settings is not None:
			
 
				             data["settings"] = settings
			
 
				 
			
@@ -312,6 +312,6 @@ class CollectionsSDK:
 
				             "POST",
			
 
				             f"collections/{str(id)}/extract",
			
 
				             params=params,
			
 
				-            json=data if data else None,
			
 
				+            json=data or None,
			
 
				             version="v3",
			
 
				         )
			
--- a/sdk/v3/conversations.py
+++ b/sdk/v3/conversations.py
@@ -1,6 +1,10 @@
 
				+from builtins import list as _list
			
 
				+from pathlib import Path
			
 
				 from typing import Any, Optional
			
 
				 from uuid import UUID
			
 
				 
			
 
				+import aiofiles
			
 
				+
			
 
				 from shared.api.models.base import WrappedBooleanResponse
			
 
				 from shared.api.models.management.responses import (
			
 
				     WrappedConversationMessagesResponse,
			
@@ -24,7 +28,9 @@ class ConversationsSDK:
 
				         Returns:
			
 
				             dict: Created conversation information
			
 
				         """
			
 
				-        data = {"name": name} if name else None
			
 
				+        data: dict[str, Any] = {}
			
 
				+        if name:
			
 
				+            data["name"] = name
			
 
				 
			
 
				         return await self.client._make_request(
			
 
				             "POST",
			
@@ -43,7 +49,7 @@ class ConversationsSDK:
 
				         List conversations with pagination and sorting options.
			
 
				 
			
 
				         Args:
			
 
				-            ids (Optional[list[Union[str, UUID]]]): List of conversation IDs to retrieve
			
 
				+            ids (Optional[list[str | UUID]]): List of conversation IDs to retrieve
			
 
				             offset (int, optional): Specifies the number of objects to skip. Defaults to 0.
			
 
				             limit (int, optional): Specifies a limit on the number of objects to return, ranging between 1 and 100. Defaults to 100.
			
 
				 
			
@@ -72,7 +78,7 @@ class ConversationsSDK:
 
				         Get detailed information about a specific conversation.
			
 
				 
			
 
				         Args:
			
 
				-            id (Union[str, UUID]): The ID of the conversation to retrieve
			
 
				+            id (str | UUID): The ID of the conversation to retrieve
			
 
				 
			
 
				         Returns:
			
 
				             dict: Detailed conversation information
			
@@ -92,7 +98,7 @@ class ConversationsSDK:
 
				         Update an existing conversation.
			
 
				 
			
 
				         Args:
			
 
				-            id (Union[str, UUID]): The ID of the conversation to update
			
 
				+            id (str | UUID): The ID of the conversation to update
			
 
				             name (str): The new name of the conversation
			
 
				 
			
 
				         Returns:
			
@@ -117,7 +123,7 @@ class ConversationsSDK:
 
				         Delete a conversation.
			
 
				 
			
 
				         Args:
			
 
				-            id (Union[str, UUID]): The ID of the conversation to delete
			
 
				+            id (str | UUID): The ID of the conversation to delete
			
 
				 
			
 
				         Returns:
			
 
				             bool: True if deletion was successful
			
@@ -140,7 +146,7 @@ class ConversationsSDK:
 
				         Add a new message to a conversation.
			
 
				 
			
 
				         Args:
			
 
				-            id (Union[str, UUID]): The ID of the conversation to add the message to
			
 
				+            id (str | UUID): The ID of the conversation to add the message to
			
 
				             content (str): The content of the message
			
 
				             role (str): The role of the message (e.g., "user" or "assistant")
			
 
				             parent_id (Optional[str]): The ID of the parent message
			
@@ -193,3 +199,99 @@ class ConversationsSDK:
 
				             json=data,
			
 
				             version="v3",
			
 
				         )
			
 
				+
			
 
				+    async def export(
			
 
				+        self,
			
 
				+        output_path: str | Path,
			
 
				+        columns: Optional[_list[str]] = None,
			
 
				+        filters: Optional[dict] = None,
			
 
				+        include_header: bool = True,
			
 
				+    ) -> None:
			
 
				+        """
			
 
				+        Export conversations to a CSV file, streaming the results directly to disk.
			
 
				+
			
 
				+        Args:
			
 
				+            output_path (str | Path): Local path where the CSV file should be saved
			
 
				+            columns (Optional[list[str]]): Specific columns to export. If None, exports default columns
			
 
				+            filters (Optional[dict]): Optional filters to apply when selecting conversations
			
 
				+            include_header (bool): Whether to include column headers in the CSV (default: True)
			
 
				+        """
			
 
				+        # Convert path to string if it's a Path object
			
 
				+        output_path = (
			
 
				+            str(output_path) if isinstance(output_path, Path) else output_path
			
 
				+        )
			
 
				+
			
 
				+        # Prepare request data
			
 
				+        data: dict[str, Any] = {"include_header": include_header}
			
 
				+        if columns:
			
 
				+            data["columns"] = columns
			
 
				+        if filters:
			
 
				+            data["filters"] = filters
			
 
				+
			
 
				+        # Stream response directly to file
			
 
				+        async with aiofiles.open(output_path, "wb") as f:
			
 
				+            async with self.client.session.post(
			
 
				+                f"{self.client.base_url}/v3/conversations/export",
			
 
				+                json=data,
			
 
				+                headers={
			
 
				+                    "Accept": "text/csv",
			
 
				+                    **self.client._get_auth_headers(),
			
 
				+                },
			
 
				+            ) as response:
			
 
				+                if response.status != 200:
			
 
				+                    raise ValueError(
			
 
				+                        f"Export failed with status {response.status}",
			
 
				+                        response,
			
 
				+                    )
			
 
				+
			
 
				+                async for chunk in response.content.iter_chunks():
			
 
				+                    if chunk:
			
 
				+                        await f.write(chunk[0])
			
 
				+
			
 
				+    async def export_messages(
			
 
				+        self,
			
 
				+        output_path: str | Path,
			
 
				+        columns: Optional[_list[str]] = None,
			
 
				+        filters: Optional[dict] = None,
			
 
				+        include_header: bool = True,
			
 
				+    ) -> None:
			
 
				+        """
			
 
				+        Export messages to a CSV file, streaming the results directly to disk.
			
 
				+
			
 
				+        Args:
			
 
				+            output_path (str | Path): Local path where the CSV file should be saved
			
 
				+            columns (Optional[list[str]]): Specific columns to export. If None, exports default columns
			
 
				+            filters (Optional[dict]): Optional filters to apply when selecting messages
			
 
				+            include_header (bool): Whether to include column headers in the CSV (default: True)
			
 
				+        """
			
 
				+        # Convert path to string if it's a Path object
			
 
				+        output_path = (
			
 
				+            str(output_path) if isinstance(output_path, Path) else output_path
			
 
				+        )
			
 
				+
			
 
				+        # Prepare request data
			
 
				+        data: dict[str, Any] = {"include_header": include_header}
			
 
				+        if columns:
			
 
				+            data["columns"] = columns
			
 
				+        if filters:
			
 
				+            data["filters"] = filters
			
 
				+
			
 
				+        # Stream response directly to file
			
 
				+        async with aiofiles.open(output_path, "wb") as f:
			
 
				+            async with self.client.session.post(
			
 
				+                f"{self.client.base_url}/v3/conversations/export_messages",
			
 
				+                json=data,
			
 
				+                headers={
			
 
				+                    "Accept": "text/csv",
			
 
				+                    **self.client._get_auth_headers(),
			
 
				+                },
			
 
				+            ) as response:
			
 
				+                if response.status != 200:
			
 
				+                    raise ValueError(
			
 
				+                        f"Export failed with status {response.status}",
			
 
				+                        response,
			
 
				+                    )
			
 
				+
			
 
				+                async for chunk in response.content.iter_chunks():
			
 
				+                    if chunk:
			
 
				+                        await f.write(chunk[0])
			
--- a/sdk/v3/documents.py
+++ b/sdk/v3/documents.py
@@ -1,8 +1,12 @@
 
				 import json
			
 
				+from datetime import datetime
			
 
				 from io import BytesIO
			
 
				+from pathlib import Path
			
 
				 from typing import Any, Optional
			
 
				 from uuid import UUID
			
 
				 
			
 
				+import aiofiles
			
 
				+
			
 
				 from shared.api.models.base import WrappedBooleanResponse
			
 
				 from shared.api.models.ingestion.responses import WrappedIngestionResponse
			
 
				 from shared.api.models.management.responses import (
			
@@ -41,8 +45,8 @@ class DocumentsSDK:
 
				         Args:
			
 
				             file_path (Optional[str]): The file to upload, if any
			
 
				             content (Optional[str]): Optional text content to upload, if no file path is provided
			
 
				-            id (Optional[Union[str, UUID]]): Optional ID to assign to the document
			
 
				-            collection_ids (Optional[list[Union[str, UUID]]]): Collection IDs to associate with the document. If none are provided, the document will be assigned to the user's default collection.
			
 
				+            id (Optional[str | UUID]): Optional ID to assign to the document
			
 
				+            collection_ids (Optional[list[str | UUID]]): Collection IDs to associate with the document. If none are provided, the document will be assigned to the user's default collection.
			
 
				             metadata (Optional[dict]): Optional metadata to assign to the document
			
 
				             ingestion_config (Optional[dict]): Optional ingestion configuration to use
			
 
				             run_with_orchestration (Optional[bool]): Whether to run with orchestration
			
@@ -60,17 +64,23 @@ class DocumentsSDK:
 
				                 "Only one of `file_path`, `raw_text` or `chunks` may be provided"
			
 
				             )
			
 
				 
			
 
				-        data = {}
			
 
				+        data: dict[str, Any] = {}
			
 
				         files = None
			
 
				 
			
 
				         if id:
			
 
				-            data["id"] = str(id)  # json.dumps(str(id))
			
 
				+            data["id"] = str(id)
			
 
				         if metadata:
			
 
				             data["metadata"] = json.dumps(metadata)
			
 
				         if ingestion_config:
			
 
				-            if not isinstance(ingestion_config, dict):
			
 
				-                ingestion_config = ingestion_config.model_dump()
			
 
				-            ingestion_config["app"] = {}
			
 
				+            if isinstance(ingestion_config, IngestionMode):
			
 
				+                ingestion_config = {"mode": ingestion_config.value}
			
 
				+            app_config: dict[str, Any] = (
			
 
				+                {}
			
 
				+                if isinstance(ingestion_config, dict)
			
 
				+                else ingestion_config["app"]
			
 
				+            )
			
 
				+            ingestion_config = dict(ingestion_config)
			
 
				+            ingestion_config["app"] = app_config
			
 
				             data["ingestion_config"] = json.dumps(ingestion_config)
			
 
				         if collection_ids:
			
 
				             collection_ids = [str(collection_id) for collection_id in collection_ids]  # type: ignore
			
@@ -125,7 +135,7 @@ class DocumentsSDK:
 
				         Get a specific document by ID.
			
 
				 
			
 
				         Args:
			
 
				-            id (Union[str, UUID]): ID of document to retrieve
			
 
				+            id (str | UUID): ID of document to retrieve
			
 
				 
			
 
				         Returns:
			
 
				             dict: Document information
			
@@ -136,7 +146,6 @@ class DocumentsSDK:
 
				             version="v3",
			
 
				         )
			
 
				 
			
 
				-    # you could do something like:
			
 
				     async def download(
			
 
				         self,
			
 
				         id: str | UUID,
			
@@ -145,12 +154,196 @@ class DocumentsSDK:
 
				             "GET",
			
 
				             f"documents/{str(id)}/download",
			
 
				             version="v3",
			
 
				-            # No json parsing here, if possible
			
 
				         )
			
 
				         if not isinstance(response, BytesIO):
			
 
				             raise ValueError("Expected BytesIO response")
			
 
				         return response
			
 
				 
			
 
				+    async def download_zip(
			
 
				+        self,
			
 
				+        document_ids: Optional[list[str | UUID]] = None,
			
 
				+        start_date: Optional[datetime] = None,
			
 
				+        end_date: Optional[datetime] = None,
			
 
				+        output_path: Optional[str | Path] = None,
			
 
				+    ) -> BytesIO | None:
			
 
				+        """
			
 
				+        Download multiple documents as a zip file.
			
 
				+        """
			
 
				+        params: dict[str, Any] = {}
			
 
				+        if document_ids:
			
 
				+            params["document_ids"] = [str(doc_id) for doc_id in document_ids]
			
 
				+        if start_date:
			
 
				+            params["start_date"] = start_date.isoformat()
			
 
				+        if end_date:
			
 
				+            params["end_date"] = end_date.isoformat()
			
 
				+
			
 
				+        response = await self.client._make_request(
			
 
				+            "GET",
			
 
				+            "documents/download_zip",
			
 
				+            params=params,
			
 
				+            version="v3",
			
 
				+        )
			
 
				+
			
 
				+        if not isinstance(response, BytesIO):
			
 
				+            raise ValueError("Expected BytesIO response")
			
 
				+
			
 
				+        if output_path:
			
 
				+            output_path = (
			
 
				+                Path(output_path)
			
 
				+                if isinstance(output_path, str)
			
 
				+                else output_path
			
 
				+            )
			
 
				+            async with aiofiles.open(output_path, "wb") as f:
			
 
				+                await f.write(response.getvalue())
			
 
				+            return None
			
 
				+
			
 
				+        return response
			
 
				+
			
 
				+    async def export(
			
 
				+        self,
			
 
				+        output_path: str | Path,
			
 
				+        columns: Optional[list[str]] = None,
			
 
				+        filters: Optional[dict] = None,
			
 
				+        include_header: bool = True,
			
 
				+    ) -> None:
			
 
				+        """
			
 
				+        Export documents to a CSV file, streaming the results directly to disk.
			
 
				+
			
 
				+        Args:
			
 
				+            output_path (str | Path): Local path where the CSV file should be saved
			
 
				+            columns (Optional[list[str]]): Specific columns to export. If None, exports default columns
			
 
				+            filters (Optional[dict]): Optional filters to apply when selecting documents
			
 
				+            include_header (bool): Whether to include column headers in the CSV (default: True)
			
 
				+        """
			
 
				+        # Convert path to string if it's a Path object
			
 
				+        output_path = (
			
 
				+            str(output_path) if isinstance(output_path, Path) else output_path
			
 
				+        )
			
 
				+
			
 
				+        data: dict[str, Any] = {"include_header": include_header}
			
 
				+        if columns:
			
 
				+            data["columns"] = columns
			
 
				+        if filters:
			
 
				+            data["filters"] = filters
			
 
				+
			
 
				+        # Stream response directly to file
			
 
				+        async with aiofiles.open(output_path, "wb") as f:
			
 
				+            async with self.client.session.post(
			
 
				+                f"{self.client.base_url}/v3/documents/export",
			
 
				+                json=data,
			
 
				+                headers={
			
 
				+                    "Accept": "text/csv",
			
 
				+                    **self.client._get_auth_headers(),
			
 
				+                },
			
 
				+            ) as response:
			
 
				+                if response.status != 200:
			
 
				+                    raise ValueError(
			
 
				+                        f"Export failed with status {response.status}",
			
 
				+                        response,
			
 
				+                    )
			
 
				+
			
 
				+                async for chunk in response.content.iter_chunks():
			
 
				+                    if chunk:
			
 
				+                        await f.write(chunk[0])
			
 
				+
			
 
				+    async def export_entities(
			
 
				+        self,
			
 
				+        id: str | UUID,
			
 
				+        output_path: str | Path,
			
 
				+        columns: Optional[list[str]] = None,
			
 
				+        filters: Optional[dict] = None,
			
 
				+        include_header: bool = True,
			
 
				+    ) -> None:
			
 
				+        """
			
 
				+        Export documents to a CSV file, streaming the results directly to disk.
			
 
				+
			
 
				+        Args:
			
 
				+            output_path (str | Path): Local path where the CSV file should be saved
			
 
				+            columns (Optional[list[str]]): Specific columns to export. If None, exports default columns
			
 
				+            filters (Optional[dict]): Optional filters to apply when selecting documents
			
 
				+            include_header (bool): Whether to include column headers in the CSV (default: True)
			
 
				+        """
			
 
				+        # Convert path to string if it's a Path object
			
 
				+        output_path = (
			
 
				+            str(output_path) if isinstance(output_path, Path) else output_path
			
 
				+        )
			
 
				+
			
 
				+        # Prepare request data
			
 
				+        data: dict[str, Any] = {"include_header": include_header}
			
 
				+        if columns:
			
 
				+            data["columns"] = columns
			
 
				+        if filters:
			
 
				+            data["filters"] = filters
			
 
				+
			
 
				+        # Stream response directly to file
			
 
				+        async with aiofiles.open(output_path, "wb") as f:
			
 
				+            async with self.client.session.post(
			
 
				+                f"{self.client.base_url}/v3/documents/{str(id)}/entities/export",
			
 
				+                json=data,
			
 
				+                headers={
			
 
				+                    "Accept": "text/csv",
			
 
				+                    **self.client._get_auth_headers(),
			
 
				+                },
			
 
				+            ) as response:
			
 
				+                if response.status != 200:
			
 
				+                    raise ValueError(
			
 
				+                        f"Export failed with status {response.status}",
			
 
				+                        response,
			
 
				+                    )
			
 
				+
			
 
				+                async for chunk in response.content.iter_chunks():
			
 
				+                    if chunk:
			
 
				+                        await f.write(chunk[0])
			
 
				+
			
 
				+    async def export_relationships(
			
 
				+        self,
			
 
				+        id: str | UUID,
			
 
				+        output_path: str | Path,
			
 
				+        columns: Optional[list[str]] = None,
			
 
				+        filters: Optional[dict] = None,
			
 
				+        include_header: bool = True,
			
 
				+    ) -> None:
			
 
				+        """
			
 
				+        Export document relationships to a CSV file, streaming the results directly to disk.
			
 
				+
			
 
				+        Args:
			
 
				+            output_path (str | Path): Local path where the CSV file should be saved
			
 
				+            columns (Optional[list[str]]): Specific columns to export. If None, exports default columns
			
 
				+            filters (Optional[dict]): Optional filters to apply when selecting documents
			
 
				+            include_header (bool): Whether to include column headers in the CSV (default: True)
			
 
				+        """
			
 
				+        # Convert path to string if it's a Path object
			
 
				+        output_path = (
			
 
				+            str(output_path) if isinstance(output_path, Path) else output_path
			
 
				+        )
			
 
				+
			
 
				+        # Prepare request data
			
 
				+        data: dict[str, Any] = {"include_header": include_header}
			
 
				+        if columns:
			
 
				+            data["columns"] = columns
			
 
				+        if filters:
			
 
				+            data["filters"] = filters
			
 
				+
			
 
				+        # Stream response directly to file
			
 
				+        async with aiofiles.open(output_path, "wb") as f:
			
 
				+            async with self.client.session.post(
			
 
				+                f"{self.client.base_url}/v3/documents/{str(id)}/relationships/export",
			
 
				+                json=data,
			
 
				+                headers={
			
 
				+                    "Accept": "text/csv",
			
 
				+                    **self.client._get_auth_headers(),
			
 
				+                },
			
 
				+            ) as response:
			
 
				+                if response.status != 200:
			
 
				+                    raise ValueError(
			
 
				+                        f"Export failed with status {response.status}",
			
 
				+                        response,
			
 
				+                    )
			
 
				+
			
 
				+                async for chunk in response.content.iter_chunks():
			
 
				+                    if chunk:
			
 
				+                        await f.write(chunk[0])
			
 
				+
			
 
				     async def delete(
			
 
				         self,
			
 
				         id: str | UUID,
			
@@ -159,7 +352,7 @@ class DocumentsSDK:
 
				         Delete a specific document.
			
 
				 
			
 
				         Args:
			
 
				-            id (Union[str, UUID]): ID of document to delete
			
 
				+            id (str | UUID): ID of document to delete
			
 
				         """
			
 
				         return await self.client._make_request(
			
 
				             "DELETE",
			
@@ -178,7 +371,7 @@ class DocumentsSDK:
 
				         Get chunks for a specific document.
			
 
				 
			
 
				         Args:
			
 
				-            id (Union[str, UUID]): ID of document to retrieve chunks for
			
 
				+            id (str | UUID): ID of document to retrieve chunks for
			
 
				             include_vectors (Optional[bool]): Whether to include vector embeddings in the response
			
 
				             offset (int, optional): Specifies the number of objects to skip. Defaults to 0.
			
 
				             limit (int, optional): Specifies a limit on the number of objects to return, ranging between 1 and 100. Defaults to 100.
			
@@ -209,7 +402,7 @@ class DocumentsSDK:
 
				         List collections for a specific document.
			
 
				 
			
 
				         Args:
			
 
				-            id (Union[str, UUID]): ID of document to retrieve collections for
			
 
				+            id (str | UUID): ID of document to retrieve collections for
			
 
				             offset (int, optional): Specifies the number of objects to skip. Defaults to 0.
			
 
				             limit (int, optional): Specifies a limit on the number of objects to return, ranging between 1 and 100. Defaults to 100.
			
 
				 
			
@@ -259,7 +452,7 @@ class DocumentsSDK:
 
				         Extract entities and relationships from a document.
			
 
				 
			
 
				         Args:
			
 
				-            id (Union[str, UUID]): ID of document to extract from
			
 
				+            id (str, UUID): ID of document to extract from
			
 
				             run_type (Optional[str]): Whether to return an estimate or run extraction
			
 
				             settings (Optional[dict]): Settings for extraction process
			
 
				             run_with_orchestration (Optional[bool]): Whether to run with orchestration
			
@@ -267,7 +460,7 @@ class DocumentsSDK:
 
				         Returns:
			
 
				             dict: Extraction results or cost estimate
			
 
				         """
			
 
				-        data = {}
			
 
				+        data: dict[str, Any] = {}
			
 
				         if run_type:
			
 
				             data["run_type"] = run_type
			
 
				         if settings:
			
@@ -293,7 +486,7 @@ class DocumentsSDK:
 
				         List entities extracted from a document.
			
 
				 
			
 
				         Args:
			
 
				-            id (Union[str, UUID]): ID of document to get entities from
			
 
				+            id (str | UUID): ID of document to get entities from
			
 
				             offset (Optional[int]): Number of items to skip
			
 
				             limit (Optional[int]): Max number of items to return
			
 
				             include_embeddings (Optional[bool]): Whether to include embeddings
			
@@ -325,7 +518,7 @@ class DocumentsSDK:
 
				         List relationships extracted from a document.
			
 
				 
			
 
				         Args:
			
 
				-            id (Union[str, UUID]): ID of document to get relationships from
			
 
				+            id (str | UUID): ID of document to get relationships from
			
 
				             offset (Optional[int]): Number of items to skip
			
 
				             limit (Optional[int]): Max number of items to return
			
 
				             entity_names (Optional[list[str]]): Filter by entity names
			
@@ -350,28 +543,6 @@ class DocumentsSDK:
 
				             version="v3",
			
 
				         )
			
 
				 
			
 
				-    # async def extract(
			
 
				-    #     self,
			
 
				-    #     id: str | UUID,
			
 
				-    #     run_type: Optional[str] = None,
			
 
				-    #     run_with_orchestration: Optional[bool] = True,
			
 
				-    # ):
			
 
				-    #     data = {}
			
 
				-
			
 
				-    #     if run_type:
			
 
				-    #         data["run_type"] = run_type
			
 
				-    #     if run_with_orchestration is not None:
			
 
				-    #         data["run_with_orchestration"] = str(run_with_orchestration)
			
 
				-
			
 
				-    #     return await self.client._make_request(
			
 
				-    #         "POST",
			
 
				-    #         f"documents/{str(id)}/extract",
			
 
				-    #         params=data,
			
 
				-    #         version="v3",
			
 
				-    #     )
			
 
				-
			
 
				-    # Be sure to put at bottom of the page...
			
 
				-
			
 
				     async def list(
			
 
				         self,
			
 
				         ids: Optional[list[str | UUID]] = None,
			
@@ -382,7 +553,7 @@ class DocumentsSDK:
 
				         List documents with pagination.
			
 
				 
			
 
				         Args:
			
 
				-            ids (Optional[list[Union[str, UUID]]]): Optional list of document IDs to filter by
			
 
				+            ids (Optional[list[str | UUID]]): Optional list of document IDs to filter by
			
 
				             offset (int, optional): Specifies the number of objects to skip. Defaults to 0.
			
 
				             limit (int, optional): Specifies a limit on the number of objects to return, ranging between 1 and 100. Defaults to 100.
			
 
				 
			
@@ -424,7 +595,7 @@ class DocumentsSDK:
 
				 
			
 
				         if search_settings and not isinstance(search_settings, dict):
			
 
				             search_settings = search_settings.model_dump()
			
 
				-        data = {
			
 
				+        data: dict[str, Any] = {
			
 
				             "query": query,
			
 
				             "search_settings": search_settings,
			
 
				         }
			
--- a/sdk/v3/graphs.py
+++ b/sdk/v3/graphs.py
@@ -108,7 +108,7 @@ class GraphsSDK:
 
				         Returns:
			
 
				             dict: Updated graph information
			
 
				         """
			
 
				-        data = {}
			
 
				+        data: dict[str, Any] = {}
			
 
				         if name is not None:
			
 
				             data["name"] = name
			
 
				         if description is not None:
			
@@ -290,7 +290,7 @@ class GraphsSDK:
 
				         Returns:
			
 
				             dict: Success message
			
 
				         """
			
 
				-        data = {
			
 
				+        data: dict[str, Any] = {
			
 
				             "run_type": run_type,
			
 
				             "run_with_orchestration": run_with_orchestration,
			
 
				         }
			
--- a/sdk/v3/indices.py
+++ b/sdk/v3/indices.py
@@ -1,5 +1,5 @@
 
				 import json
			
 
				-from typing import Optional
			
 
				+from typing import Any, Optional
			
 
				 
			
 
				 from shared.api.models.base import WrappedGenericMessageResponse
			
 
				 from shared.api.models.ingestion.responses import (
			
@@ -20,13 +20,13 @@ class IndicesSDK:
 
				         Create a new vector similarity search index in the database.
			
 
				 
			
 
				         Args:
			
 
				-            config (Union[dict, IndexConfig]): Configuration for the vector index.
			
 
				+            config (dict | IndexConfig): Configuration for the vector index.
			
 
				             run_with_orchestration (Optional[bool]): Whether to run index creation as an orchestrated task.
			
 
				         """
			
 
				         if not isinstance(config, dict):
			
 
				             config = config.model_dump()
			
 
				 
			
 
				-        data = {
			
 
				+        data: dict[str, Any] = {
			
 
				             "config": config,
			
 
				             "run_with_orchestration": run_with_orchestration,
			
 
				         }
			
@@ -88,32 +88,6 @@ class IndicesSDK:
 
				             version="v3",
			
 
				         )
			
 
				 
			
 
				-    # async def update_index(
			
 
				-    #     self,
			
 
				-    #     id: Union[str, UUID],
			
 
				-    #     config: dict,  # Union[dict, IndexConfig],
			
 
				-    #     run_with_orchestration: Optional[bool] = True,
			
 
				-    # ) -> dict:
			
 
				-    #     """
			
 
				-    #     Update an existing index's configuration.
			
 
				-
			
 
				-    #     Args:
			
 
				-    #         id (Union[str, UUID]): The ID of the index to update.
			
 
				-    #         config (Union[dict, IndexConfig]): The new configuration for the index.
			
 
				-    #         run_with_orchestration (Optional[bool]): Whether to run the update as an orchestrated task.
			
 
				-
			
 
				-    #     Returns:
			
 
				-    #         WrappedUpdateIndexResponse: The response containing the updated index details.
			
 
				-    #     """
			
 
				-    #     if not isinstance(config, dict):
			
 
				-    #         config = config.model_dump()
			
 
				-
			
 
				-    #     data = {
			
 
				-    #         "config": config,
			
 
				-    #         "run_with_orchestration": run_with_orchestration,
			
 
				-    #     }
			
 
				-    #     return await self.client._make_request("POST", f"indices/{id}", json=data)  # type: ignore
			
 
				-
			
 
				     async def delete(
			
 
				         self,
			
 
				         index_name: str,
			
--- a/sdk/v3/prompts.py
+++ b/sdk/v3/prompts.py
@@ -1,5 +1,5 @@
 
				 import json
			
 
				-from typing import Optional
			
 
				+from typing import Any, Optional
			
 
				 
			
 
				 from shared.api.models.base import (
			
 
				     WrappedBooleanResponse,
			
@@ -27,7 +27,11 @@ class PromptsSDK:
 
				         Returns:
			
 
				             dict: Created prompt information
			
 
				         """
			
 
				-        data = {"name": name, "template": template, "input_types": input_types}
			
 
				+        data: dict[str, Any] = {
			
 
				+            "name": name,
			
 
				+            "template": template,
			
 
				+            "input_types": input_types,
			
 
				+        }
			
 
				         return await self.client._make_request(
			
 
				             "POST",
			
 
				             "prompts",
			
--- a/sdk/v3/retrieval.py
+++ b/sdk/v3/retrieval.py
@@ -1,4 +1,4 @@
 
				-from typing import AsyncGenerator, Optional
			
 
				+from typing import Any, AsyncGenerator, Optional
			
 
				 
			
 
				 from ..models import (
			
 
				     CombinedSearchResponse,
			
@@ -41,7 +41,7 @@ class RetrievalSDK:
 
				         if search_settings and not isinstance(search_settings, dict):
			
 
				             search_settings = search_settings.model_dump()
			
 
				 
			
 
				-        data = {
			
 
				+        data: dict[str, Any] = {
			
 
				             "query": query,
			
 
				             "search_settings": search_settings,
			
 
				         }
			
@@ -68,7 +68,7 @@ class RetrievalSDK:
 
				         if generation_config and not isinstance(generation_config, dict):
			
 
				             generation_config = generation_config.model_dump()
			
 
				 
			
 
				-        data = {
			
 
				+        data: dict[str, Any] = {
			
 
				             "messages": [msg.model_dump() for msg in cast_messages],
			
 
				             "generation_config": generation_config,
			
 
				         }
			
@@ -83,7 +83,7 @@ class RetrievalSDK:
 
				         self,
			
 
				         text: str,
			
 
				     ):
			
 
				-        data = {
			
 
				+        data: dict[str, Any] = {
			
 
				             "text": text,
			
 
				         }
			
 
				 
			
@@ -123,7 +123,7 @@ class RetrievalSDK:
 
				         if search_settings and not isinstance(search_settings, dict):
			
 
				             search_settings = search_settings.model_dump()
			
 
				 
			
 
				-        data = {
			
 
				+        data: dict[str, Any] = {
			
 
				             "query": query,
			
 
				             "rag_generation_config": rag_generation_config,
			
 
				             "search_settings": search_settings,
			
@@ -179,7 +179,7 @@ class RetrievalSDK:
 
				         if search_settings and not isinstance(search_settings, dict):
			
 
				             search_settings = search_settings.model_dump()
			
 
				 
			
 
				-        data = {
			
 
				+        data: dict[str, Any] = {
			
 
				             "rag_generation_config": rag_generation_config or {},
			
 
				             "search_settings": search_settings,
			
 
				             "task_prompt_override": task_prompt_override,
			
--- a/sdk/v3/users.py
+++ b/sdk/v3/users.py
@@ -1,7 +1,6 @@
 
				 from __future__ import annotations  # for Python 3.10+
			
 
				-import json
			
 
				 
			
 
				-from typing import Optional
			
 
				+from typing import Any, Optional
			
 
				 from uuid import UUID
			
 
				 
			
 
				 from shared.api.models.auth.responses import WrappedTokenResponse
			
@@ -72,7 +71,7 @@ class UsersSDK:
 
				         Returns:
			
 
				             UserResponse: New user information
			
 
				         """
			
 
				-        data = {"email": email, "password": password}
			
 
				+        data: dict[str, Any] = {"email": email, "password": password}
			
 
				         return await self.client._make_request(
			
 
				             "POST",
			
 
				             "users/register",
			
@@ -94,7 +93,7 @@ class UsersSDK:
 
				         Returns:
			
 
				             dict: Deletion result
			
 
				         """
			
 
				-        data = {"password": password}
			
 
				+        data: dict[str, Any] = {"password": password}
			
 
				         response = await self.client._make_request(
			
 
				             "DELETE",
			
 
				             f"users/{str(id)}",
			
@@ -118,7 +117,10 @@ class UsersSDK:
 
				         Returns:
			
 
				             dict: Verification result
			
 
				         """
			
 
				-        data = {"email": email, "verification_code": verification_code}
			
 
				+        data: dict[str, Any] = {
			
 
				+            "email": email,
			
 
				+            "verification_code": verification_code,
			
 
				+        }
			
 
				         return await self.client._make_request(
			
 
				             "POST",
			
 
				             "users/verify-email",
			
@@ -141,7 +143,7 @@ class UsersSDK:
 
				             raise ValueError(
			
 
				                 "Cannot log in after setting an API key, please unset your R2R_API_KEY variable or call client.set_api_key(None)"
			
 
				             )
			
 
				-        data = {"username": email, "password": password}
			
 
				+        data: dict[str, Any] = {"username": email, "password": password}
			
 
				         response = await self.client._make_request(
			
 
				             "POST",
			
 
				             "users/login",
			
@@ -227,7 +229,7 @@ class UsersSDK:
 
				         Returns:
			
 
				             dict: Change password result
			
 
				         """
			
 
				-        data = {
			
 
				+        data: dict[str, Any] = {
			
 
				             "current_password": current_password,
			
 
				             "new_password": new_password,
			
 
				         }
			
@@ -270,7 +272,10 @@ class UsersSDK:
 
				         Returns:
			
 
				             dict: Password reset result
			
 
				         """
			
 
				-        data = {"reset_token": reset_token, "new_password": new_password}
			
 
				+        data: dict[str, Any] = {
			
 
				+            "reset_token": reset_token,
			
 
				+            "new_password": new_password,
			
 
				+        }
			
 
				         return await self.client._make_request(
			
 
				             "POST",
			
 
				             "users/reset-password",
			
--- a/shared/abstractions/__init__.py
+++ b/shared/abstractions/__init__.py
@@ -18,15 +18,11 @@ from .exception import (
 
				     R2RDocumentProcessingError,
			
 
				     R2RException,
			
 
				 )
			
 
				-from .graph import Community, Entity, KGExtraction, Relationship
			
 
				+from .graph import Community, Entity, KGExtraction, Relationship, StoreType
			
 
				 from .kg import (
			
 
				-    GraphBuildSettings,
			
 
				     GraphCommunitySettings,
			
 
				-    GraphEntitySettings,
			
 
				-    GraphRelationshipSettings,
			
 
				     KGCreationSettings,
			
 
				     KGEnrichmentSettings,
			
 
				-    KGEntityDeduplicationSettings,
			
 
				     KGRunType,
			
 
				 )
			
 
				 from .llm import (
			
@@ -47,7 +43,6 @@ from .search import (
 
				     HybridSearchSettings,
			
 
				     KGCommunityResult,
			
 
				     KGEntityResult,
			
 
				-    KGGlobalResult,
			
 
				     KGRelationshipResult,
			
 
				     KGSearchResultType,
			
 
				     SearchMode,
			
@@ -99,6 +94,7 @@ __all__ = [
 
				     "Community",
			
 
				     "KGExtraction",
			
 
				     "Relationship",
			
 
				+    "StoreType",
			
 
				     # LLM abstractions
			
 
				     "GenerationConfig",
			
 
				     "LLMChatCompletion",
			
@@ -114,7 +110,6 @@ __all__ = [
 
				     "KGEntityResult",
			
 
				     "KGRelationshipResult",
			
 
				     "KGCommunityResult",
			
 
				-    "KGGlobalResult",
			
 
				     "GraphSearchSettings",
			
 
				     "ChunkSearchSettings",
			
 
				     "ChunkSearchResult",
			
@@ -127,8 +122,6 @@ __all__ = [
 
				     "KGEnrichmentSettings",
			
 
				     "KGExtraction",
			
 
				     "KGRunType",
			
 
				-    "GraphEntitySettings",
			
 
				-    "GraphRelationshipSettings",
			
 
				     "GraphCommunitySettings",
			
 
				     # User abstractions
			
 
				     "Token",
			
--- a/shared/abstractions/base.py
+++ b/shared/abstractions/base.py
@@ -2,7 +2,7 @@ import asyncio
 
				 import json
			
 
				 from datetime import datetime
			
 
				 from enum import Enum
			
 
				-from typing import Any, Type, TypeVar, Union
			
 
				+from typing import Any, Type, TypeVar
			
 
				 from uuid import UUID
			
 
				 
			
 
				 from pydantic import BaseModel
			
@@ -12,10 +12,15 @@ T = TypeVar("T", bound="R2RSerializable")
 
				 
			
 
				 class R2RSerializable(BaseModel):
			
 
				     @classmethod
			
 
				-    def from_dict(cls: Type[T], data: Union[dict[str, Any], str]) -> T:
			
 
				+    def from_dict(cls: Type[T], data: dict[str, Any] | str) -> T:
			
 
				         if isinstance(data, str):
			
 
				-            data = json.loads(data)
			
 
				-        return cls(**data)
			
 
				+            try:
			
 
				+                data_dict = json.loads(data)
			
 
				+            except json.JSONDecodeError as e:
			
 
				+                raise ValueError(f"Invalid JSON string: {e}") from e
			
 
				+        else:
			
 
				+            data_dict = data
			
 
				+        return cls(**data_dict)
			
 
				 
			
 
				     def to_dict(self) -> dict[str, Any]:
			
 
				         data = self.model_dump(exclude_unset=True)
			
--- a/shared/abstractions/document.py
+++ b/shared/abstractions/document.py
@@ -282,7 +282,7 @@ class ChunkEnrichmentSettings(R2RSerializable):
 
				     )
			
 
				     strategies: list[ChunkEnrichmentStrategy] = Field(
			
 
				         default=[],
			
 
				-        description="The strategies to use for chunk enrichment. Union of chunks obtained from each strategy is used as context.",
			
 
				+        description="The strategies to use for chunk enrichment. List of chunks obtained from each strategy is used as context.",
			
 
				     )
			
 
				     forward_chunks: int = Field(
			
 
				         default=3,
			
--- a/shared/abstractions/graph.py
+++ b/shared/abstractions/graph.py
@@ -1,6 +1,7 @@
 
				 import json
			
 
				 from dataclasses import dataclass
			
 
				 from datetime import datetime
			
 
				+from enum import Enum
			
 
				 from typing import Any, Optional
			
 
				 from uuid import UUID
			
 
				 
			
@@ -134,3 +135,8 @@ class Graph(R2RSerializable):
 
				 
			
 
				     def __init__(self, **kwargs):
			
 
				         super().__init__(**kwargs)
			
 
				+
			
 
				+
			
 
				+class StoreType(str, Enum):
			
 
				+    GRAPHS = "graphs"
			
 
				+    DOCUMENTS = "documents"
			
--- a/shared/abstractions/ingestion.py
+++ b/shared/abstractions/ingestion.py
@@ -27,7 +27,7 @@ class ChunkEnrichmentSettings(R2RSerializable):
 
				     )
			
 
				     strategies: list[ChunkEnrichmentStrategy] = Field(
			
 
				         default=[],
			
 
				-        description="The strategies to use for chunk enrichment. Union of chunks obtained from each strategy is used as context.",
			
 
				+        description="The strategies to use for chunk enrichment.",
			
 
				     )
			
 
				     forward_chunks: int = Field(
			
 
				         default=3,
			
--- a/shared/abstractions/kg.py
+++ b/shared/abstractions/kg.py
@@ -19,17 +19,6 @@ class KGRunType(str, Enum):
 
				 GraphRunType = KGRunType
			
 
				 
			
 
				 
			
 
				-class KGEntityDeduplicationType(str, Enum):
			
 
				-    """Type of KG entity deduplication."""
			
 
				-
			
 
				-    BY_NAME = "by_name"
			
 
				-    BY_DESCRIPTION = "by_description"
			
 
				-    BY_LLM = "by_llm"
			
 
				-
			
 
				-    def __str__(self):
			
 
				-        return self.value
			
 
				-
			
 
				-
			
 
				 class KGCreationSettings(R2RSerializable):
			
 
				     """Settings for knowledge graph creation."""
			
 
				 
			
@@ -81,30 +70,6 @@ class KGCreationSettings(R2RSerializable):
 
				     )
			
 
				 
			
 
				 
			
 
				-class KGEntityDeduplicationSettings(R2RSerializable):
			
 
				-    """Settings for knowledge graph entity deduplication."""
			
 
				-
			
 
				-    graph_entity_deduplication_type: KGEntityDeduplicationType = Field(
			
 
				-        default=KGEntityDeduplicationType.BY_NAME,
			
 
				-        description="The type of entity deduplication to use.",
			
 
				-    )
			
 
				-
			
 
				-    max_description_input_length: int = Field(
			
 
				-        default=65536,
			
 
				-        description="The maximum length of the description for a node in the graph.",
			
 
				-    )
			
 
				-
			
 
				-    graph_entity_deduplication_prompt: str = Field(
			
 
				-        default="graphrag_entity_deduplication",
			
 
				-        description="The prompt to use for knowledge graph entity deduplication.",
			
 
				-    )
			
 
				-
			
 
				-    generation_config: GenerationConfig = Field(
			
 
				-        default_factory=GenerationConfig,
			
 
				-        description="Configuration for text generation during graph entity deduplication.",
			
 
				-    )
			
 
				-
			
 
				-
			
 
				 class KGEnrichmentSettings(R2RSerializable):
			
 
				     """Settings for knowledge graph enrichment."""
			
 
				 
			
@@ -135,36 +100,6 @@ class KGEnrichmentSettings(R2RSerializable):
 
				     )
			
 
				 
			
 
				 
			
 
				-class GraphEntitySettings(R2RSerializable):
			
 
				-    """Settings for knowledge graph entity creation."""
			
 
				-
			
 
				-    graph_entity_deduplication_type: KGEntityDeduplicationType = Field(
			
 
				-        default=KGEntityDeduplicationType.BY_NAME,
			
 
				-        description="The type of entity deduplication to use.",
			
 
				-    )
			
 
				-
			
 
				-    max_description_input_length: int = Field(
			
 
				-        default=65536,
			
 
				-        description="The maximum length of the description for a node in the graph.",
			
 
				-    )
			
 
				-
			
 
				-    graph_entity_deduplication_prompt: str = Field(
			
 
				-        default="graphrag_entity_deduplication",
			
 
				-        description="The prompt to use for knowledge graph entity deduplication.",
			
 
				-    )
			
 
				-
			
 
				-    generation_config: GenerationConfig = Field(
			
 
				-        default_factory=GenerationConfig,
			
 
				-        description="Configuration for text generation during graph entity deduplication.",
			
 
				-    )
			
 
				-
			
 
				-
			
 
				-class GraphRelationshipSettings(R2RSerializable):
			
 
				-    """Settings for knowledge graph relationship creation."""
			
 
				-
			
 
				-    pass
			
 
				-
			
 
				-
			
 
				 class GraphCommunitySettings(R2RSerializable):
			
 
				     """Settings for knowledge graph community enrichment."""
			
 
				 
			
@@ -192,22 +127,3 @@ class GraphCommunitySettings(R2RSerializable):
 
				         default_factory=dict,
			
 
				         description="Parameters for the Leiden algorithm.",
			
 
				     )
			
 
				-
			
 
				-
			
 
				-class GraphBuildSettings(R2RSerializable):
			
 
				-    """Settings for knowledge graph build."""
			
 
				-
			
 
				-    entity_settings: GraphEntitySettings = Field(
			
 
				-        default=GraphEntitySettings(),
			
 
				-        description="Settings for knowledge graph entity creation.",
			
 
				-    )
			
 
				-
			
 
				-    relationship_settings: GraphRelationshipSettings = Field(
			
 
				-        default=GraphRelationshipSettings(),
			
 
				-        description="Settings for knowledge graph relationship creation.",
			
 
				-    )
			
 
				-
			
 
				-    community_settings: GraphCommunitySettings = Field(
			
 
				-        default=GraphCommunitySettings(),
			
 
				-        description="Settings for knowledge graph community enrichment.",
			
 
				-    )
			
--- a/shared/abstractions/search.py
+++ b/shared/abstractions/search.py
@@ -113,26 +113,8 @@ class KGCommunityResult(R2RSerializable):
 
				         }
			
 
				 
			
 
				 
			
 
				-class KGGlobalResult(R2RSerializable):
			
 
				-    name: str
			
 
				-    description: str
			
 
				-    metadata: Optional[dict[str, Any]] = None
			
 
				-
			
 
				-    class Config:
			
 
				-        json_schema_extra = {
			
 
				-            "name": "Global Result Name",
			
 
				-            "description": "Global Result Description",
			
 
				-            "metadata": {},
			
 
				-        }
			
 
				-
			
 
				-
			
 
				 class GraphSearchResult(R2RSerializable):
			
 
				-    content: (
			
 
				-        KGEntityResult
			
 
				-        | KGRelationshipResult
			
 
				-        | KGCommunityResult
			
 
				-        | KGGlobalResult
			
 
				-    )
			
 
				+    content: KGEntityResult | KGRelationshipResult | KGCommunityResult
			
 
				     result_type: Optional[KGSearchResultType] = None
			
 
				     chunk_ids: Optional[list[UUID]] = None
			
 
				     metadata: dict[str, Any] = {}
			
--- a/shared/utils/base_utils.py
+++ b/shared/utils/base_utils.py
@@ -17,7 +17,6 @@ from ..abstractions.search import (
 
				     AggregateSearchResult,
			
 
				     KGCommunityResult,
			
 
				     KGEntityResult,
			
 
				-    KGGlobalResult,
			
 
				     KGRelationshipResult,
			
 
				 )
			
 
				 from ..abstractions.vector import VectorQuantizationType
			
--- a/shared/utils/splitter/text.py
+++ b/shared/utils/splitter/text.py
@@ -37,9 +37,7 @@ from typing import (
 
				     Any,
			
 
				     Callable,
			
 
				     Collection,
			
 
				-    Dict,
			
 
				     Iterable,
			
 
				-    List,
			
 
				     Literal,
			
 
				     Optional,
			
 
				     Sequence,
			
@@ -47,7 +45,6 @@ from typing import (
 
				     Type,
			
 
				     TypedDict,
			
 
				     TypeVar,
			
 
				-    Union,
			
 
				     cast,
			
 
				 )
			
 
				 
			
@@ -64,16 +61,16 @@ class BaseSerialized(TypedDict):
 
				     """Base class for serialized objects."""
			
 
				 
			
 
				     lc: int
			
 
				-    id: List[str]
			
 
				+    id: list[str]
			
 
				     name: NotRequired[str]
			
 
				-    graph: NotRequired[Dict[str, Any]]
			
 
				+    graph: NotRequired[dict[str, Any]]
			
 
				 
			
 
				 
			
 
				 class SerializedConstructor(BaseSerialized):
			
 
				     """Serialized constructor."""
			
 
				 
			
 
				     type: Literal["constructor"]
			
 
				-    kwargs: Dict[str, Any]
			
 
				+    kwargs: dict[str, Any]
			
 
				 
			
 
				 
			
 
				 class SerializedSecret(BaseSerialized):
			
@@ -115,7 +112,7 @@ class Serializable(BaseModel, ABC):
 
				         return False
			
 
				 
			
 
				     @classmethod
			
 
				-    def get_lc_namespace(cls) -> List[str]:
			
 
				+    def get_lc_namespace(cls) -> list[str]:
			
 
				         """Get the namespace of the langchain object.
			
 
				 
			
 
				         For example, if the class is `langchain.llms.openai.OpenAI`, then the
			
@@ -124,16 +121,16 @@ class Serializable(BaseModel, ABC):
 
				         return cls.__module__.split(".")
			
 
				 
			
 
				     @property
			
 
				-    def lc_secrets(self) -> Dict[str, str]:
			
 
				+    def lc_secrets(self) -> dict[str, str]:
			
 
				         """A map of constructor argument names to secret ids.
			
 
				 
			
 
				         For example,
			
 
				             {"openai_api_key": "OPENAI_API_KEY"}
			
 
				         """
			
 
				-        return dict()
			
 
				+        return {}
			
 
				 
			
 
				     @property
			
 
				-    def lc_attributes(self) -> Dict:
			
 
				+    def lc_attributes(self) -> dict:
			
 
				         """List of attribute names that should be included in the serialized kwargs.
			
 
				 
			
 
				         These attributes must be accepted by the constructor.
			
@@ -141,7 +138,7 @@ class Serializable(BaseModel, ABC):
 
				         return {}
			
 
				 
			
 
				     @classmethod
			
 
				-    def lc_id(cls) -> List[str]:
			
 
				+    def lc_id(cls) -> list[str]:
			
 
				         """A unique identifier for this class for serialization purposes.
			
 
				 
			
 
				         The unique identifier is a list of strings that describes the path
			
@@ -159,7 +156,7 @@ class Serializable(BaseModel, ABC):
 
				             if (k not in self.__fields__ or try_neq_default(v, k, self))
			
 
				         ]
			
 
				 
			
 
				-    _lc_kwargs = PrivateAttr(default_factory=dict)
			
 
				+    _lc_kwargs: dict[str, Any] = PrivateAttr(default_factory=dict)
			
 
				 
			
 
				     def __init__(self, **kwargs: Any) -> None:
			
 
				         super().__init__(**kwargs)
			
@@ -167,7 +164,7 @@ class Serializable(BaseModel, ABC):
 
				 
			
 
				     def to_json(
			
 
				         self,
			
 
				-    ) -> Union[SerializedConstructor, SerializedNotImplemented]:
			
 
				+    ) -> SerializedConstructor | SerializedNotImplemented:
			
 
				         if not self.is_lc_serializable():
			
 
				             return self.to_json_not_implemented()
			
 
				 
			
@@ -238,8 +235,8 @@ class Serializable(BaseModel, ABC):
 
				 
			
 
				 
			
 
				 def _replace_secrets(
			
 
				-    root: Dict[Any, Any], secrets_map: Dict[str, str]
			
 
				-) -> Dict[Any, Any]:
			
 
				+    root: dict[Any, Any], secrets_map: dict[str, str]
			
 
				+) -> dict[Any, Any]:
			
 
				     result = root.copy()
			
 
				     for path, secret_id in secrets_map.items():
			
 
				         [*parts, last] = path.split(".")
			
@@ -267,7 +264,7 @@ def to_json_not_implemented(obj: object) -> SerializedNotImplemented:
 
				     Returns:
			
 
				         SerializedNotImplemented
			
 
				     """
			
 
				-    _id: List[str] = []
			
 
				+    _id: list[str] = []
			
 
				     try:
			
 
				         if hasattr(obj, "__name__"):
			
 
				             _id = [*obj.__module__.split("."), obj.__name__]
			
@@ -313,7 +310,7 @@ class SplitterDocument(Serializable):
 
				         return True
			
 
				 
			
 
				     @classmethod
			
 
				-    def get_lc_namespace(cls) -> List[str]:
			
 
				+    def get_lc_namespace(cls) -> list[str]:
			
 
				         """Get the namespace of the langchain object."""
			
 
				         return ["langchain", "schema", "document"]
			
 
				 
			
@@ -406,7 +403,7 @@ def _make_spacy_pipe_for_splitting(
 
				 
			
 
				 def _split_text_with_regex(
			
 
				     text: str, separator: str, keep_separator: bool
			
 
				-) -> List[str]:
			
 
				+) -> list[str]:
			
 
				     # Now that we have the separator, split the text
			
 
				     if separator:
			
 
				         if keep_separator:
			
@@ -461,12 +458,12 @@ class TextSplitter(BaseDocumentTransformer, ABC):
 
				         self._strip_whitespace = strip_whitespace
			
 
				 
			
 
				     @abstractmethod
			
 
				-    def split_text(self, text: str) -> List[str]:
			
 
				+    def split_text(self, text: str) -> list[str]:
			
 
				         """Split text into multiple components."""
			
 
				 
			
 
				     def create_documents(
			
 
				-        self, texts: List[str], metadatas: Optional[List[dict]] = None
			
 
				-    ) -> List[SplitterDocument]:
			
 
				+        self, texts: list[str], metadatas: Optional[list[dict]] = None
			
 
				+    ) -> list[SplitterDocument]:
			
 
				         """Create documents from a list of texts."""
			
 
				         _metadatas = metadatas or [{}] * len(texts)
			
 
				         documents = []
			
@@ -488,7 +485,7 @@ class TextSplitter(BaseDocumentTransformer, ABC):
 
				 
			
 
				     def split_documents(
			
 
				         self, documents: Iterable[SplitterDocument]
			
 
				-    ) -> List[SplitterDocument]:
			
 
				+    ) -> list[SplitterDocument]:
			
 
				         """Split documents."""
			
 
				         texts, metadatas = [], []
			
 
				         for doc in documents:
			
@@ -496,7 +493,7 @@ class TextSplitter(BaseDocumentTransformer, ABC):
 
				             metadatas.append(doc.metadata)
			
 
				         return self.create_documents(texts, metadatas=metadatas)
			
 
				 
			
 
				-    def _join_docs(self, docs: List[str], separator: str) -> Optional[str]:
			
 
				+    def _join_docs(self, docs: list[str], separator: str) -> Optional[str]:
			
 
				         text = separator.join(docs)
			
 
				         if self._strip_whitespace:
			
 
				             text = text.strip()
			
@@ -507,13 +504,13 @@ class TextSplitter(BaseDocumentTransformer, ABC):
 
				 
			
 
				     def _merge_splits(
			
 
				         self, splits: Iterable[str], separator: str
			
 
				-    ) -> List[str]:
			
 
				+    ) -> list[str]:
			
 
				         # We now want to combine these smaller pieces into medium size
			
 
				         # chunks to send to the LLM.
			
 
				         separator_len = self._length_function(separator)
			
 
				 
			
 
				         docs = []
			
 
				-        current_doc: List[str] = []
			
 
				+        current_doc: list[str] = []
			
 
				         total = 0
			
 
				         for d in splits:
			
 
				             _len = self._length_function(d)
			
@@ -579,8 +576,8 @@ class TextSplitter(BaseDocumentTransformer, ABC):
 
				         cls: Type[TS],
			
 
				         encoding_name: str = "gpt2",
			
 
				         model: Optional[str] = None,
			
 
				-        allowed_special: Union[Literal["all"], AbstractSet[str]] = set(),
			
 
				-        disallowed_special: Union[Literal["all"], Collection[str]] = "all",
			
 
				+        allowed_special: Literal["all"] | AbstractSet[str] = set(),
			
 
				+        disallowed_special: Literal["all"] | Collection[str] = "all",
			
 
				         **kwargs: Any,
			
 
				     ) -> TS:
			
 
				         """Text splitter that uses tiktoken encoder to count length."""
			
@@ -641,7 +638,7 @@ class CharacterTextSplitter(TextSplitter):
 
				         self._separator = separator
			
 
				         self._is_separator_regex = is_separator_regex
			
 
				 
			
 
				-    def split_text(self, text: str) -> List[str]:
			
 
				+    def split_text(self, text: str) -> list[str]:
			
 
				         """Split incoming text and return chunks."""
			
 
				         # First we naively split the large input into a bunch of smaller ones.
			
 
				         separator = (
			
@@ -657,7 +654,7 @@ class CharacterTextSplitter(TextSplitter):
 
				 class LineType(TypedDict):
			
 
				     """Line type as typed dict."""
			
 
				 
			
 
				-    metadata: Dict[str, str]
			
 
				+    metadata: dict[str, str]
			
 
				     content: str
			
 
				 
			
 
				 
			
@@ -674,7 +671,7 @@ class MarkdownHeaderTextSplitter:
 
				 
			
 
				     def __init__(
			
 
				         self,
			
 
				-        headers_to_split_on: List[Tuple[str, str]],
			
 
				+        headers_to_split_on: list[Tuple[str, str]],
			
 
				         return_each_line: bool = False,
			
 
				         strip_headers: bool = True,
			
 
				     ):
			
@@ -696,13 +693,13 @@ class MarkdownHeaderTextSplitter:
 
				         self.strip_headers = strip_headers
			
 
				 
			
 
				     def aggregate_lines_to_chunks(
			
 
				-        self, lines: List[LineType]
			
 
				-    ) -> List[SplitterDocument]:
			
 
				+        self, lines: list[LineType]
			
 
				+    ) -> list[SplitterDocument]:
			
 
				         """Combine lines with common metadata into chunks
			
 
				         Args:
			
 
				             lines: Line of text / associated header metadata
			
 
				         """
			
 
				-        aggregated_chunks: List[LineType] = []
			
 
				+        aggregated_chunks: list[LineType] = []
			
 
				 
			
 
				         for line in lines:
			
 
				             if (
			
@@ -742,7 +739,7 @@ class MarkdownHeaderTextSplitter:
 
				             for chunk in aggregated_chunks
			
 
				         ]
			
 
				 
			
 
				-    def split_text(self, text: str) -> List[SplitterDocument]:
			
 
				+    def split_text(self, text: str) -> list[SplitterDocument]:
			
 
				         """Split markdown file
			
 
				         Args:
			
 
				             text: Markdown file"""
			
@@ -750,14 +747,14 @@ class MarkdownHeaderTextSplitter:
 
				         # Split the input text by newline character ("\n").
			
 
				         lines = text.split("\n")
			
 
				         # Final output
			
 
				-        lines_with_metadata: List[LineType] = []
			
 
				+        lines_with_metadata: list[LineType] = []
			
 
				         # Content and metadata of the chunk currently being processed
			
 
				-        current_content: List[str] = []
			
 
				-        current_metadata: Dict[str, str] = {}
			
 
				+        current_content: list[str] = []
			
 
				+        current_metadata: dict[str, str] = {}
			
 
				         # Keep track of the nested header structure
			
 
				-        # header_stack: List[Dict[str, Union[int, str]]] = []
			
 
				-        header_stack: List[HeaderType] = []
			
 
				-        initial_metadata: Dict[str, str] = {}
			
 
				+        # header_stack: list[dict[str, int | str]] = []
			
 
				+        header_stack: list[HeaderType] = []
			
 
				+        initial_metadata: dict[str, str] = {}
			
 
				 
			
 
				         in_code_block = False
			
 
				         opening_fence = ""
			
@@ -879,7 +876,7 @@ class ElementType(TypedDict):
 
				     url: str
			
 
				     xpath: str
			
 
				     content: str
			
 
				-    metadata: Dict[str, str]
			
 
				+    metadata: dict[str, str]
			
 
				 
			
 
				 
			
 
				 class HTMLHeaderTextSplitter:
			
@@ -890,7 +887,7 @@ class HTMLHeaderTextSplitter:
 
				 
			
 
				     def __init__(
			
 
				         self,
			
 
				-        headers_to_split_on: List[Tuple[str, str]],
			
 
				+        headers_to_split_on: list[Tuple[str, str]],
			
 
				         return_each_element: bool = False,
			
 
				     ):
			
 
				         """Create a new HTMLHeaderTextSplitter.
			
@@ -906,14 +903,14 @@ class HTMLHeaderTextSplitter:
 
				         self.headers_to_split_on = sorted(headers_to_split_on)
			
 
				 
			
 
				     def aggregate_elements_to_chunks(
			
 
				-        self, elements: List[ElementType]
			
 
				-    ) -> List[SplitterDocument]:
			
 
				+        self, elements: list[ElementType]
			
 
				+    ) -> list[SplitterDocument]:
			
 
				         """Combine elements with common metadata into chunks
			
 
				 
			
 
				         Args:
			
 
				             elements: HTML element content with associated identifying info and metadata
			
 
				         """
			
 
				-        aggregated_chunks: List[ElementType] = []
			
 
				+        aggregated_chunks: list[ElementType] = []
			
 
				 
			
 
				         for element in elements:
			
 
				             if (
			
@@ -935,7 +932,7 @@ class HTMLHeaderTextSplitter:
 
				             for chunk in aggregated_chunks
			
 
				         ]
			
 
				 
			
 
				-    def split_text_from_url(self, url: str) -> List[SplitterDocument]:
			
 
				+    def split_text_from_url(self, url: str) -> list[SplitterDocument]:
			
 
				         """Split HTML from web URL
			
 
				 
			
 
				         Args:
			
@@ -944,7 +941,7 @@ class HTMLHeaderTextSplitter:
 
				         r = requests.get(url)
			
 
				         return self.split_text_from_file(BytesIO(r.content))
			
 
				 
			
 
				-    def split_text(self, text: str) -> List[SplitterDocument]:
			
 
				+    def split_text(self, text: str) -> list[SplitterDocument]:
			
 
				         """Split HTML text string
			
 
				 
			
 
				         Args:
			
@@ -952,7 +949,7 @@ class HTMLHeaderTextSplitter:
 
				         """
			
 
				         return self.split_text_from_file(StringIO(text))
			
 
				 
			
 
				-    def split_text_from_file(self, file: Any) -> List[SplitterDocument]:
			
 
				+    def split_text_from_file(self, file: Any) -> list[SplitterDocument]:
			
 
				         """Split HTML file
			
 
				 
			
 
				         Args:
			
@@ -1048,15 +1045,15 @@ class Tokenizer:
 
				     """Overlap in tokens between chunks"""
			
 
				     tokens_per_chunk: int
			
 
				     """Maximum number of tokens per chunk"""
			
 
				-    decode: Callable[[List[int]], str]
			
 
				+    decode: Callable[[list[int]], str]
			
 
				     """ Function to decode a list of token ids to a string"""
			
 
				-    encode: Callable[[str], List[int]]
			
 
				+    encode: Callable[[str], list[int]]
			
 
				     """ Function to encode a string to a list of token ids"""
			
 
				 
			
 
				 
			
 
				-def split_text_on_tokens(*, text: str, tokenizer: Tokenizer) -> List[str]:
			
 
				+def split_text_on_tokens(*, text: str, tokenizer: Tokenizer) -> list[str]:
			
 
				     """Split incoming text and return chunks using tokenizer."""
			
 
				-    splits: List[str] = []
			
 
				+    splits: list[str] = []
			
 
				     input_ids = tokenizer.encode(text)
			
 
				     start_idx = 0
			
 
				     cur_idx = min(start_idx + tokenizer.tokens_per_chunk, len(input_ids))
			
@@ -1078,8 +1075,8 @@ class TokenTextSplitter(TextSplitter):
 
				         self,
			
 
				         encoding_name: str = "gpt2",
			
 
				         model: Optional[str] = None,
			
 
				-        allowed_special: Union[Literal["all"], AbstractSet[str]] = set(),
			
 
				-        disallowed_special: Union[Literal["all"], Collection[str]] = "all",
			
 
				+        allowed_special: Literal["all"] | AbstractSet[str] = set(),
			
 
				+        disallowed_special: Literal["all"] | Collection[str] = "all",
			
 
				         **kwargs: Any,
			
 
				     ) -> None:
			
 
				         """Create a new TextSplitter."""
			
@@ -1101,8 +1098,8 @@ class TokenTextSplitter(TextSplitter):
 
				         self._allowed_special = allowed_special
			
 
				         self._disallowed_special = disallowed_special
			
 
				 
			
 
				-    def split_text(self, text: str) -> List[str]:
			
 
				-        def _encode(_text: str) -> List[int]:
			
 
				+    def split_text(self, text: str) -> list[str]:
			
 
				+        def _encode(_text: str) -> list[int]:
			
 
				             return self._tokenizer.encode(
			
 
				                 _text,
			
 
				                 allowed_special=self._allowed_special,
			
@@ -1164,8 +1161,8 @@ class SentenceTransformersTokenTextSplitter(TextSplitter):
 
				                 f" > maximum token limit."
			
 
				             )
			
 
				 
			
 
				-    def split_text(self, text: str) -> List[str]:
			
 
				-        def encode_strip_start_and_stop_token_ids(text: str) -> List[int]:
			
 
				+    def split_text(self, text: str) -> list[str]:
			
 
				+        def encode_strip_start_and_stop_token_ids(text: str) -> list[int]:
			
 
				             return self._encode(text)[1:-1]
			
 
				 
			
 
				         tokenizer = Tokenizer(
			
@@ -1182,7 +1179,7 @@ class SentenceTransformersTokenTextSplitter(TextSplitter):
 
				 
			
 
				     _max_length_equal_32_bit_integer: int = 2**32
			
 
				 
			
 
				-    def _encode(self, text: str) -> List[int]:
			
 
				+    def _encode(self, text: str) -> list[int]:
			
 
				         token_ids_with_start_and_end_token_ids = self.tokenizer.encode(
			
 
				             text,
			
 
				             max_length=self._max_length_equal_32_bit_integer,
			
@@ -1228,7 +1225,7 @@ class RecursiveCharacterTextSplitter(TextSplitter):
 
				 
			
 
				     def __init__(
			
 
				         self,
			
 
				-        separators: Optional[List[str]] = None,
			
 
				+        separators: Optional[list[str]] = None,
			
 
				         keep_separator: bool = True,
			
 
				         is_separator_regex: bool = False,
			
 
				         chunk_size: int = 4000,
			
@@ -1247,7 +1244,7 @@ class RecursiveCharacterTextSplitter(TextSplitter):
 
				         self.chunk_size = chunk_size
			
 
				         self.chunk_overlap = chunk_overlap
			
 
				 
			
 
				-    def _split_text(self, text: str, separators: List[str]) -> List[str]:
			
 
				+    def _split_text(self, text: str, separators: list[str]) -> list[str]:
			
 
				         """Split incoming text and return chunks."""
			
 
				         final_chunks = []
			
 
				         # Get appropriate separator to use
			
@@ -1289,7 +1286,7 @@ class RecursiveCharacterTextSplitter(TextSplitter):
 
				             final_chunks.extend(merged_text)
			
 
				         return final_chunks
			
 
				 
			
 
				-    def split_text(self, text: str) -> List[str]:
			
 
				+    def split_text(self, text: str) -> list[str]:
			
 
				         return self._split_text(text, self._separators)
			
 
				 
			
 
				     @classmethod
			
@@ -1300,7 +1297,7 @@ class RecursiveCharacterTextSplitter(TextSplitter):
 
				         return cls(separators=separators, is_separator_regex=True, **kwargs)
			
 
				 
			
 
				     @staticmethod
			
 
				-    def get_separators_for_language(language: Language) -> List[str]:
			
 
				+    def get_separators_for_language(language: Language) -> list[str]:
			
 
				         if language == Language.CPP:
			
 
				             return [
			
 
				                 # Split along class definitions
			
@@ -1781,7 +1778,7 @@ class NLTKTextSplitter(TextSplitter):
 
				         self._separator = separator
			
 
				         self._language = language
			
 
				 
			
 
				-    def split_text(self, text: str) -> List[str]:
			
 
				+    def split_text(self, text: str) -> list[str]:
			
 
				         """Split incoming text and return chunks."""
			
 
				         # First we naively split the large input into a bunch of smaller ones.
			
 
				         splits = self._tokenizer(text, language=self._language)
			
@@ -1812,7 +1809,7 @@ class SpacyTextSplitter(TextSplitter):
 
				         )
			
 
				         self._separator = separator
			
 
				 
			
 
				-    def split_text(self, text: str) -> List[str]:
			
 
				+    def split_text(self, text: str) -> list[str]:
			
 
				         """Split incoming text and return chunks."""
			
 
				         splits = (s.text for s in self._tokenizer(text).sents)
			
 
				         return self._merge_splits(splits, self._separator)
			
@@ -1843,7 +1840,7 @@ class KonlpyTextSplitter(TextSplitter):
 
				             )
			
 
				         self.kkma = Kkma()
			
 
				 
			
 
				-    def split_text(self, text: str) -> List[str]:
			
 
				+    def split_text(self, text: str) -> list[str]:
			
 
				         """Split incoming text and return chunks."""
			
 
				         splits = self.kkma.sentences(text)
			
 
				         return self._merge_splits(splits, self._separator)
			
@@ -1890,12 +1887,12 @@ class RecursiveJsonSplitter:
 
				         )
			
 
				 
			
 
				     @staticmethod
			
 
				-    def _json_size(data: Dict) -> int:
			
 
				+    def _json_size(data: dict) -> int:
			
 
				         """Calculate the size of the serialized JSON object."""
			
 
				         return len(json.dumps(data))
			
 
				 
			
 
				     @staticmethod
			
 
				-    def _set_nested_dict(d: Dict, path: List[str], value: Any) -> None:
			
 
				+    def _set_nested_dict(d: dict, path: list[str], value: Any) -> None:
			
 
				         """Set a value in a nested dictionary based on the given path."""
			
 
				         for key in path[:-1]:
			
 
				             d = d.setdefault(key, {})
			
@@ -1919,10 +1916,10 @@ class RecursiveJsonSplitter:
 
				 
			
 
				     def _json_split(
			
 
				         self,
			
 
				-        data: Dict[str, Any],
			
 
				-        current_path: List[str] = [],
			
 
				-        chunks: List[Dict] = [{}],
			
 
				-    ) -> List[Dict]:
			
 
				+        data: dict[str, Any],
			
 
				+        current_path: list[str] = [],
			
 
				+        chunks: list[dict] = [{}],
			
 
				+    ) -> list[dict]:
			
 
				         """
			
 
				         Split json into maximum size dictionaries while preserving structure.
			
 
				         """
			
@@ -1950,9 +1947,9 @@ class RecursiveJsonSplitter:
 
				 
			
 
				     def split_json(
			
 
				         self,
			
 
				-        json_data: Dict[str, Any],
			
 
				+        json_data: dict[str, Any],
			
 
				         convert_lists: bool = False,
			
 
				-    ) -> List[Dict]:
			
 
				+    ) -> list[dict]:
			
 
				         """Splits JSON into a list of JSON chunks"""
			
 
				 
			
 
				         if convert_lists:
			
@@ -1968,8 +1965,8 @@ class RecursiveJsonSplitter:
 
				         return chunks
			
 
				 
			
 
				     def split_text(
			
 
				-        self, json_data: Dict[str, Any], convert_lists: bool = False
			
 
				-    ) -> List[str]:
			
 
				+        self, json_data: dict[str, Any], convert_lists: bool = False
			
 
				+    ) -> list[str]:
			
 
				         """Splits JSON into a list of JSON formatted strings"""
			
 
				 
			
 
				         chunks = self.split_json(
			
@@ -1981,11 +1978,11 @@ class RecursiveJsonSplitter:
 
				 
			
 
				     def create_documents(
			
 
				         self,
			
 
				-        texts: List[Dict],
			
 
				+        texts: list[dict],
			
 
				         convert_lists: bool = False,
			
 
				-        metadatas: Optional[List[dict]] = None,
			
 
				-    ) -> List[SplitterDocument]:
			
 
				-        """Create documents from a list of json objects (Dict)."""
			
 
				+        metadatas: Optional[list[dict]] = None,
			
 
				+    ) -> list[SplitterDocument]:
			
 
				+        """Create documents from a list of json objects (dict)."""
			
 
				         _metadatas = metadatas or [{}] * len(texts)
			
 
				         documents = []
			
 
				         for i, text in enumerate(texts):
			
--- a/tests/unit/test_collections.py
+++ b/tests/unit/test_collections.py
@@ -1,8 +1,10 @@
 
				-import pytest
			
 
				 import uuid
			
 
				 from uuid import UUID
			
 
				-from core.base.api.models import CollectionResponse
			
 
				+
			
 
				+import pytest
			
 
				+
			
 
				 from core.base import R2RException
			
 
				+from core.base.api.models import CollectionResponse
			
 
				 
			
 
				 
			
 
				 @pytest.mark.asyncio
			
--- a/tests/unit/test_graphs.py
+++ b/tests/unit/test_graphs.py
@@ -1,9 +1,8 @@
 
				-import pytest
			
 
				 import uuid
			
 
				-from uuid import UUID
			
 
				-
			
 
				 from enum import Enum
			
 
				-from core.base.abstractions import Entity, Relationship, Community
			
 
				+
			
 
				+import pytest
			
 
				+
			
 
				 from core.base.api.models import GraphResponse
			
 
				 
			
 
				 
			
@@ -35,7 +34,7 @@ async def test_add_entities_and_relationships(graphs_handler):
 
				     # Add an entity
			
 
				     entity = await graphs_handler.entities.create(
			
 
				         parent_id=graph_id,
			
 
				-        store_type=StoreType.GRAPHS.value,
			
 
				+        store_type=StoreType.GRAPHS,
			
 
				         name="TestEntity",
			
 
				         category="Person",
			
 
				         description="A test entity",
			
@@ -45,7 +44,7 @@ async def test_add_entities_and_relationships(graphs_handler):
 
				     # Add another entity
			
 
				     entity2 = await graphs_handler.entities.create(
			
 
				         parent_id=graph_id,
			
 
				-        store_type=StoreType.GRAPHS.value,
			
 
				+        store_type=StoreType.GRAPHS,
			
 
				         name="AnotherEntity",
			
 
				         category="Place",
			
 
				         description="A test place",
			
@@ -59,7 +58,7 @@ async def test_add_entities_and_relationships(graphs_handler):
 
				         object="AnotherEntity",
			
 
				         object_id=entity2.id,
			
 
				         parent_id=graph_id,
			
 
				-        store_type=StoreType.GRAPHS.value,
			
 
				+        store_type=StoreType.GRAPHS,
			
 
				         description="Entity lives in AnotherEntity",
			
 
				     )
			
 
				     assert rel.predicate == "lives_in"
			
@@ -92,12 +91,12 @@ async def test_delete_entities_and_relationships(graphs_handler):
 
				     # Add entities
			
 
				     e1 = await graphs_handler.entities.create(
			
 
				         parent_id=graph_id,
			
 
				-        store_type=StoreType.GRAPHS.value,
			
 
				+        store_type=StoreType.GRAPHS,
			
 
				         name="DeleteMe",
			
 
				     )
			
 
				     e2 = await graphs_handler.entities.create(
			
 
				         parent_id=graph_id,
			
 
				-        store_type=StoreType.GRAPHS.value,
			
 
				+        store_type=StoreType.GRAPHS,
			
 
				         name="DeleteMeToo",
			
 
				     )
			
 
				 
			
@@ -109,14 +108,14 @@ async def test_delete_entities_and_relationships(graphs_handler):
 
				         object="DeleteMeToo",
			
 
				         object_id=e2.id,
			
 
				         parent_id=graph_id,
			
 
				-        store_type=StoreType.GRAPHS.value,
			
 
				+        store_type=StoreType.GRAPHS,
			
 
				     )
			
 
				 
			
 
				     # Delete one entity
			
 
				     await graphs_handler.entities.delete(
			
 
				         parent_id=graph_id,
			
 
				         entity_ids=[e1.id],
			
 
				-        store_type=StoreType.GRAPHS.value,
			
 
				+        store_type=StoreType.GRAPHS,
			
 
				     )
			
 
				     ents, count = await graphs_handler.get_entities(
			
 
				         parent_id=graph_id, offset=0, limit=10
			
@@ -128,7 +127,7 @@ async def test_delete_entities_and_relationships(graphs_handler):
 
				     await graphs_handler.relationships.delete(
			
 
				         parent_id=graph_id,
			
 
				         relationship_ids=[rel.id],
			
 
				-        store_type=StoreType.GRAPHS.value,
			
 
				+        store_type=StoreType.GRAPHS,
			
 
				     )
			
 
				     rels, rel_count = await graphs_handler.get_relationships(
			
 
				         parent_id=graph_id, offset=0, limit=10
			
@@ -142,7 +141,7 @@ async def test_communities(graphs_handler):
 
				     coll_id = uuid.uuid4()
			
 
				     await graphs_handler.communities.create(
			
 
				         parent_id=coll_id,
			
 
				-        store_type=StoreType.GRAPHS.value,
			
 
				+        store_type=StoreType.GRAPHS,
			
 
				         name="CommunityOne",
			
 
				         summary="Test community",
			
 
				         findings=["finding1", "finding2"],
			
@@ -153,7 +152,7 @@ async def test_communities(graphs_handler):
 
				 
			
 
				     comms, count = await graphs_handler.communities.get(
			
 
				         parent_id=coll_id,
			
 
				-        store_type=StoreType.GRAPHS.value,
			
 
				+        store_type=StoreType.GRAPHS,
			
 
				         offset=0,
			
 
				         limit=10,
			
 
				     )
			
@@ -284,7 +283,7 @@ async def test_bulk_entities(graphs_handler):
 
				     for ent in entities_to_add:
			
 
				         await graphs_handler.entities.create(
			
 
				             parent_id=graph_id,
			
 
				-            store_type=StoreType.GRAPHS.value,
			
 
				+            store_type=StoreType.GRAPHS,
			
 
				             name=ent["name"],
			
 
				             category=ent["category"],
			
 
				             description=ent["description"],
			
@@ -309,13 +308,13 @@ async def test_relationship_filtering(graphs_handler):
 
				 
			
 
				     # Add entities
			
 
				     e1 = await graphs_handler.entities.create(
			
 
				-        parent_id=graph_id, store_type=StoreType.GRAPHS.value, name="Node1"
			
 
				+        parent_id=graph_id, store_type=StoreType.GRAPHS, name="Node1"
			
 
				     )
			
 
				     e2 = await graphs_handler.entities.create(
			
 
				-        parent_id=graph_id, store_type=StoreType.GRAPHS.value, name="Node2"
			
 
				+        parent_id=graph_id, store_type=StoreType.GRAPHS, name="Node2"
			
 
				     )
			
 
				     e3 = await graphs_handler.entities.create(
			
 
				-        parent_id=graph_id, store_type=StoreType.GRAPHS.value, name="Node3"
			
 
				+        parent_id=graph_id, store_type=StoreType.GRAPHS, name="Node3"
			
 
				     )
			
 
				 
			
 
				     # Add different relationships
			
@@ -326,7 +325,7 @@ async def test_relationship_filtering(graphs_handler):
 
				         object="Node2",
			
 
				         object_id=e2.id,
			
 
				         parent_id=graph_id,
			
 
				-        store_type=StoreType.GRAPHS.value,
			
 
				+        store_type=StoreType.GRAPHS,
			
 
				     )
			
 
				 
			
 
				     await graphs_handler.relationships.create(
			
@@ -336,7 +335,7 @@ async def test_relationship_filtering(graphs_handler):
 
				         object="Node3",
			
 
				         object_id=e3.id,
			
 
				         parent_id=graph_id,
			
 
				-        store_type=StoreType.GRAPHS.value,
			
 
				+        store_type=StoreType.GRAPHS,
			
 
				     )
			
 
				 
			
 
				     # Get all relationships
			
@@ -366,15 +365,15 @@ async def test_delete_all_entities(graphs_handler):
 
				 
			
 
				     # Add some entities
			
 
				     await graphs_handler.entities.create(
			
 
				-        parent_id=graph_id, store_type=StoreType.GRAPHS.value, name="E1"
			
 
				+        parent_id=graph_id, store_type=StoreType.GRAPHS, name="E1"
			
 
				     )
			
 
				     await graphs_handler.entities.create(
			
 
				-        parent_id=graph_id, store_type=StoreType.GRAPHS.value, name="E2"
			
 
				+        parent_id=graph_id, store_type=StoreType.GRAPHS, name="E2"
			
 
				     )
			
 
				 
			
 
				     # Delete all entities without specifying IDs
			
 
				     await graphs_handler.entities.delete(
			
 
				-        parent_id=graph_id, store_type=StoreType.GRAPHS.value
			
 
				+        parent_id=graph_id, store_type=StoreType.GRAPHS
			
 
				     )
			
 
				     ents, count = await graphs_handler.get_entities(
			
 
				         parent_id=graph_id, offset=0, limit=10
			
@@ -392,10 +391,10 @@ async def test_delete_all_relationships(graphs_handler):
 
				 
			
 
				     # Add two entities and a relationship
			
 
				     e1 = await graphs_handler.entities.create(
			
 
				-        parent_id=graph_id, store_type=StoreType.GRAPHS.value, name="E1"
			
 
				+        parent_id=graph_id, store_type=StoreType.GRAPHS, name="E1"
			
 
				     )
			
 
				     e2 = await graphs_handler.entities.create(
			
 
				-        parent_id=graph_id, store_type=StoreType.GRAPHS.value, name="E2"
			
 
				+        parent_id=graph_id, store_type=StoreType.GRAPHS, name="E2"
			
 
				     )
			
 
				     await graphs_handler.relationships.create(
			
 
				         subject="E1",
			
@@ -404,12 +403,12 @@ async def test_delete_all_relationships(graphs_handler):
 
				         object="E2",
			
 
				         object_id=e2.id,
			
 
				         parent_id=graph_id,
			
 
				-        store_type=StoreType.GRAPHS.value,
			
 
				+        store_type=StoreType.GRAPHS,
			
 
				     )
			
 
				 
			
 
				     # Delete all relationships
			
 
				     await graphs_handler.relationships.delete(
			
 
				-        parent_id=graph_id, store_type=StoreType.GRAPHS.value
			
 
				+        parent_id=graph_id, store_type=StoreType.GRAPHS
			
 
				     )
			
 
				     rels, rel_count = await graphs_handler.get_relationships(
			
 
				         parent_id=graph_id, offset=0, limit=10