chunks.py 5.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177
  1. import json
  2. from typing import Optional
  3. from uuid import UUID
  4. from shared.api.models.base import WrappedBooleanResponse
  5. from shared.api.models.management.responses import (
  6. WrappedChunkResponse,
  7. WrappedChunksResponse,
  8. )
  9. from ..models import SearchSettings
  10. class ChunksSDK:
  11. """
  12. SDK for interacting with chunks in the v3 API.
  13. """
  14. def __init__(self, client):
  15. self.client = client
  16. async def update(
  17. self,
  18. chunk: dict[str, str],
  19. ) -> WrappedChunkResponse:
  20. """
  21. Update an existing chunk.
  22. Args:
  23. chunk (dict[str, str]): Chunk to update. Should contain:
  24. - id: UUID of the chunk
  25. - metadata: Dictionary of metadata
  26. Returns:
  27. dict: Update results containing processed chunk information
  28. """
  29. return await self.client._make_request(
  30. "POST",
  31. f"chunks/{str(chunk['id'])}",
  32. json=chunk,
  33. version="v3",
  34. )
  35. async def retrieve(
  36. self,
  37. id: str | UUID,
  38. ) -> WrappedChunkResponse:
  39. """
  40. Get a specific chunk.
  41. Args:
  42. id (str | UUID): Chunk ID to retrieve
  43. Returns:
  44. dict: List of chunks and pagination information
  45. """
  46. return await self.client._make_request(
  47. "GET",
  48. f"chunks/{id}",
  49. version="v3",
  50. )
  51. # FIXME: Is this the most appropriate name for this method?
  52. async def list_by_document(
  53. self,
  54. document_id: str | UUID,
  55. metadata_filter: Optional[dict] = None,
  56. offset: Optional[int] = 0,
  57. limit: Optional[int] = 100,
  58. ) -> WrappedChunksResponse:
  59. """
  60. List chunks for a specific document.
  61. Args:
  62. document_id (str | UUID): Document ID to get chunks for
  63. metadata_filter (Optional[dict]): Filter chunks by metadata
  64. offset (int, optional): Specifies the number of objects to skip. Defaults to 0.
  65. limit (int, optional): Specifies a limit on the number of objects to return, ranging between 1 and 100. Defaults to 100.
  66. Returns:
  67. dict: List of chunks and pagination information
  68. """
  69. params: dict = {
  70. "offset": offset,
  71. "limit": limit,
  72. }
  73. if metadata_filter:
  74. params["metadata_filter"] = json.dumps(metadata_filter)
  75. return await self.client._make_request(
  76. "GET",
  77. f"documents/{str(document_id)}/chunks",
  78. params=params,
  79. version="v3",
  80. )
  81. async def delete(
  82. self,
  83. id: str | UUID,
  84. ) -> WrappedBooleanResponse:
  85. """
  86. Delete a specific chunk.
  87. Args:
  88. id (Union[str, UUID]): ID of chunk to delete
  89. """
  90. return await self.client._make_request(
  91. "DELETE",
  92. f"chunks/{str(id)}",
  93. version="v3",
  94. )
  95. async def list(
  96. self,
  97. include_vectors: bool = False,
  98. metadata_filter: Optional[dict] = None,
  99. offset: Optional[int] = 0,
  100. limit: Optional[int] = 100,
  101. ) -> WrappedChunksResponse:
  102. """
  103. List chunks with pagination support.
  104. Args:
  105. include_vectors (bool, optional): Include vector data in response. Defaults to False.
  106. metadata_filter (Optional[dict], optional): Filter by metadata. Defaults to None.
  107. offset (int, optional): Specifies the number of objects to skip. Defaults to 0.
  108. limit (int, optional): Specifies a limit on the number of objects to return, ranging between 1 and 100. Defaults to 100.
  109. Returns:
  110. dict: Dictionary containing:
  111. - results: List of chunks
  112. - page_info: Pagination information
  113. """
  114. params: dict = {
  115. "offset": offset,
  116. "limit": limit,
  117. "include_vectors": include_vectors,
  118. }
  119. if metadata_filter:
  120. params["metadata_filter"] = json.dumps(metadata_filter)
  121. return await self.client._make_request(
  122. "GET",
  123. "chunks",
  124. params=params,
  125. version="v3",
  126. )
  127. async def search(
  128. self,
  129. query: str,
  130. search_settings: Optional[dict | SearchSettings] = None,
  131. ): # -> CombinedSearchResponse:
  132. """
  133. Conduct a vector and/or KG search.
  134. Args:
  135. query (str): The query to search for.
  136. search_settings (Optional[dict, SearchSettings]]): Vector search settings.
  137. Returns:
  138. CombinedSearchResponse: The search response.
  139. """
  140. if search_settings and not isinstance(search_settings, dict):
  141. search_settings = search_settings.model_dump()
  142. data = {
  143. "query": query,
  144. "search_settings": search_settings,
  145. }
  146. return await self.client._make_request(
  147. "POST",
  148. "chunks/search",
  149. json=data,
  150. version="v3",
  151. )