chunks.py 5.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180
  1. import json
  2. from typing import Any, Optional
  3. from uuid import UUID
  4. from shared.api.models.base import WrappedBooleanResponse
  5. from shared.api.models.management.responses import (
  6. WrappedChunkResponse,
  7. WrappedChunksResponse,
  8. )
  9. from ..models import SearchSettings
  10. class ChunksSDK:
  11. """
  12. SDK for interacting with chunks in the v3 API.
  13. """
  14. def __init__(self, client):
  15. self.client = client
  16. async def update(
  17. self,
  18. chunk: dict[str, str],
  19. ) -> WrappedChunkResponse:
  20. """
  21. Update an existing chunk.
  22. Args:
  23. chunk (dict[str, str]): Chunk to update. Should contain:
  24. - id: UUID of the chunk
  25. - metadata: Dictionary of metadata
  26. Returns:
  27. dict: Update results containing processed chunk information
  28. """
  29. return await self.client._make_request(
  30. "POST",
  31. f"chunks/{str(chunk['id'])}",
  32. json=chunk,
  33. version="v3",
  34. )
  35. async def retrieve(
  36. self,
  37. id: str | UUID,
  38. ) -> WrappedChunkResponse:
  39. """
  40. Get a specific chunk.
  41. Args:
  42. id (str | UUID): Chunk ID to retrieve
  43. Returns:
  44. dict: List of chunks and pagination information
  45. """
  46. return await self.client._make_request(
  47. "GET",
  48. f"chunks/{id}",
  49. version="v3",
  50. )
  51. # FIXME: Is this the most appropriate name for this method?
  52. async def list_by_document(
  53. self,
  54. document_id: str | UUID,
  55. metadata_filter: Optional[dict] = None,
  56. offset: Optional[int] = 0,
  57. limit: Optional[int] = 100,
  58. ) -> WrappedChunksResponse:
  59. """
  60. List chunks for a specific document.
  61. Args:
  62. document_id (str | UUID): Document ID to get chunks for
  63. metadata_filter (Optional[dict]): Filter chunks by metadata
  64. offset (int, optional): Specifies the number of objects to skip. Defaults to 0.
  65. limit (int, optional): Specifies a limit on the number of objects to return, ranging between 1 and 100. Defaults to 100.
  66. Returns:
  67. dict: List of chunks and pagination information
  68. """
  69. params: dict = {
  70. "offset": offset,
  71. "limit": limit,
  72. }
  73. if metadata_filter:
  74. params["metadata_filter"] = json.dumps(metadata_filter)
  75. return await self.client._make_request(
  76. "GET",
  77. f"documents/{str(document_id)}/chunks",
  78. params=params,
  79. version="v3",
  80. )
  81. async def delete(
  82. self,
  83. id: str | UUID,
  84. ) -> WrappedBooleanResponse:
  85. """
  86. Delete a specific chunk.
  87. Args:
  88. id (str | UUID): ID of chunk to delete
  89. """
  90. return await self.client._make_request(
  91. "DELETE",
  92. f"chunks/{str(id)}",
  93. version="v3",
  94. )
  95. async def list(
  96. self,
  97. include_vectors: bool = False,
  98. metadata_filter: Optional[dict] = None,
  99. offset: Optional[int] = 0,
  100. limit: Optional[int] = 100,
  101. filters: Optional[dict] = None,
  102. ) -> WrappedChunksResponse:
  103. """
  104. List chunks with pagination support.
  105. Args:
  106. include_vectors (bool, optional): Include vector data in response. Defaults to False.
  107. metadata_filter (Optional[dict], optional): Filter by metadata. Defaults to None.
  108. offset (int, optional): Specifies the number of objects to skip. Defaults to 0.
  109. limit (int, optional): Specifies a limit on the number of objects to return, ranging between 1 and 100. Defaults to 100.
  110. Returns:
  111. dict: Dictionary containing:
  112. - results: List of chunks
  113. - page_info: Pagination information
  114. """
  115. params: dict = {
  116. "offset": offset,
  117. "limit": limit,
  118. "include_vectors": include_vectors,
  119. }
  120. if filters:
  121. params["filters"] = json.dumps(filters)
  122. if metadata_filter:
  123. params["metadata_filter"] = json.dumps(metadata_filter)
  124. return await self.client._make_request(
  125. "GET",
  126. "chunks",
  127. params=params,
  128. version="v3",
  129. )
  130. async def search(
  131. self,
  132. query: str,
  133. search_settings: Optional[dict | SearchSettings] = None,
  134. ): # -> CombinedSearchResponse:
  135. """
  136. Conduct a vector and/or KG search.
  137. Args:
  138. query (str): The query to search for.
  139. search_settings (Optional[dict, SearchSettings]]): Vector search settings.
  140. Returns:
  141. CombinedSearchResponse: The search response.
  142. """
  143. if search_settings and not isinstance(search_settings, dict):
  144. search_settings = search_settings.model_dump()
  145. data: dict[str, Any] = {
  146. "query": query,
  147. "search_settings": search_settings,
  148. }
  149. return await self.client._make_request(
  150. "POST",
  151. "chunks/search",
  152. json=data,
  153. version="v3",
  154. )