documents.py 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786
  1. import json
  2. import os
  3. import tempfile
  4. from datetime import datetime
  5. from io import BytesIO
  6. from pathlib import Path
  7. from typing import Any, Optional
  8. from uuid import UUID
  9. import aiofiles
  10. import requests
  11. from shared.abstractions import R2RClientException
  12. from shared.api.models import (
  13. WrappedBooleanResponse,
  14. WrappedChunksResponse,
  15. WrappedCollectionsResponse,
  16. WrappedDocumentResponse,
  17. WrappedDocumentSearchResponse,
  18. WrappedDocumentsResponse,
  19. WrappedEntitiesResponse,
  20. WrappedGenericMessageResponse,
  21. WrappedIngestionResponse,
  22. WrappedRelationshipsResponse,
  23. )
  24. from ..models import (
  25. GraphCreationSettings,
  26. IngestionMode,
  27. SearchMode,
  28. SearchSettings,
  29. )
  30. class DocumentsSDK:
  31. """SDK for interacting with documents in the v3 API."""
  32. def __init__(self, client):
  33. self.client = client
  34. async def create(
  35. self,
  36. file_path: Optional[str] = None,
  37. raw_text: Optional[str] = None,
  38. chunks: Optional[list[str]] = None,
  39. s3_url: Optional[str] = None,
  40. id: Optional[str | UUID] = None,
  41. ingestion_mode: Optional[str] = None,
  42. collection_ids: Optional[list[str | UUID]] = None,
  43. metadata: Optional[dict] = None,
  44. ingestion_config: Optional[dict | IngestionMode] = None,
  45. run_with_orchestration: Optional[bool] = True,
  46. ) -> WrappedIngestionResponse:
  47. """Create a new document from either a file or content.
  48. Args:
  49. file_path (Optional[str]): The path to the file to upload, if any.
  50. raw_text (Optional[str]): Raw text content to upload, if no file path is provided.
  51. chunks (Optional[list[str]]): Pre-processed text chunks to ingest.
  52. s3_url (Optional[str]): A presigned S3 URL to upload the file from, if any.
  53. id (Optional[str | UUID]): Optional ID to assign to the document.
  54. ingestion_mode (Optional[IngestionMode | str]): The ingestion mode preset ('hi-res', 'ocr', 'fast', 'custom'). Defaults to 'custom'.
  55. collection_ids (Optional[list[str | UUID]]): Collection IDs to associate. Defaults to user's default collection if None.
  56. metadata (Optional[dict]): Optional metadata to assign to the document.
  57. ingestion_config (Optional[dict | IngestionMode]): Optional ingestion config or preset mode enum. Used when ingestion_mode='custom'.
  58. run_with_orchestration (Optional[bool]): Whether to run with orchestration (default: True).
  59. Returns:
  60. WrappedIngestionResponse
  61. """
  62. if (
  63. sum(x is not None for x in [file_path, raw_text, chunks, s3_url])
  64. != 1
  65. ):
  66. raise ValueError(
  67. "Exactly one of file_path, raw_text, chunks, or s3_url must be provided."
  68. )
  69. data: dict[str, Any] = {}
  70. files = None
  71. if id:
  72. data["id"] = str(id)
  73. if metadata:
  74. data["metadata"] = json.dumps(metadata)
  75. if ingestion_config:
  76. if isinstance(ingestion_config, IngestionMode):
  77. ingestion_config = {"mode": ingestion_config.value}
  78. app_config: dict[str, Any] = (
  79. {}
  80. if isinstance(ingestion_config, dict)
  81. else ingestion_config["app"]
  82. )
  83. ingestion_config = dict(ingestion_config)
  84. ingestion_config["app"] = app_config
  85. data["ingestion_config"] = json.dumps(ingestion_config)
  86. if collection_ids:
  87. collection_ids = [
  88. str(collection_id) for collection_id in collection_ids
  89. ] # type: ignore
  90. data["collection_ids"] = json.dumps(collection_ids)
  91. if run_with_orchestration is not None:
  92. data["run_with_orchestration"] = str(run_with_orchestration)
  93. if ingestion_mode is not None:
  94. data["ingestion_mode"] = (
  95. ingestion_mode.value
  96. if isinstance(ingestion_mode, IngestionMode)
  97. else ingestion_mode
  98. )
  99. if file_path:
  100. # Create a new file instance that will remain open during the request
  101. file_instance = open(file_path, "rb")
  102. filename = os.path.basename(file_path)
  103. files = [
  104. (
  105. "file",
  106. (filename, file_instance, "application/octet-stream"),
  107. )
  108. ]
  109. try:
  110. response_dict = await self.client._make_request(
  111. "POST",
  112. "documents",
  113. data=data,
  114. files=files,
  115. version="v3",
  116. )
  117. finally:
  118. # Ensure we close the file after the request is complete
  119. file_instance.close()
  120. elif raw_text:
  121. data["raw_text"] = raw_text # type: ignore
  122. response_dict = await self.client._make_request(
  123. "POST",
  124. "documents",
  125. data=data,
  126. version="v3",
  127. )
  128. elif chunks:
  129. data["chunks"] = json.dumps(chunks)
  130. response_dict = await self.client._make_request(
  131. "POST",
  132. "documents",
  133. data=data,
  134. version="v3",
  135. )
  136. elif s3_url:
  137. try:
  138. s3_file = requests.get(s3_url)
  139. with tempfile.NamedTemporaryFile(delete=False) as temp_file:
  140. temp_file_path = temp_file.name
  141. temp_file.write(s3_file.content)
  142. # Get the filename from the URL
  143. filename = os.path.basename(s3_url.split("?")[0]) or "s3_file"
  144. with open(temp_file_path, "rb") as file_instance:
  145. files = [
  146. (
  147. "file",
  148. (
  149. filename,
  150. file_instance,
  151. "application/octet-stream",
  152. ),
  153. )
  154. ]
  155. response_dict = await self.client._make_request(
  156. "POST",
  157. "documents",
  158. data=data,
  159. files=files,
  160. version="v3",
  161. )
  162. except requests.RequestException as e:
  163. raise R2RClientException(
  164. f"Failed to download file from S3 URL: {s3_url}"
  165. ) from e
  166. finally:
  167. # Clean up the temporary file
  168. if os.path.exists(temp_file_path):
  169. os.unlink(temp_file_path)
  170. return WrappedIngestionResponse(**response_dict)
  171. async def append_metadata(
  172. self,
  173. id: str | UUID,
  174. metadata: list[dict[str, Any]],
  175. ) -> WrappedDocumentResponse:
  176. """Append metadata to a document.
  177. Args:
  178. id (str | UUID): ID of document to append metadata to
  179. metadata (list[dict]): Metadata to append
  180. Returns:
  181. WrappedDocumentResponse
  182. """
  183. data = json.dumps(metadata)
  184. response_dict = await self.client._make_request(
  185. "PATCH",
  186. f"documents/{str(id)}/metadata",
  187. data=data,
  188. version="v3",
  189. )
  190. return WrappedDocumentResponse(**response_dict)
  191. async def replace_metadata(
  192. self,
  193. id: str | UUID,
  194. metadata: list[dict[str, Any]],
  195. ) -> WrappedDocumentResponse:
  196. """Replace metadata for a document.
  197. Args:
  198. id (str | UUID): ID of document to replace metadata for
  199. metadata (list[dict]): The metadata that will replace the existing metadata
  200. Returns:
  201. WrappedDocumentResponse
  202. """
  203. data = json.dumps(metadata)
  204. response_dict = await self.client._make_request(
  205. "PUT",
  206. f"documents/{str(id)}/metadata",
  207. data=data,
  208. version="v3",
  209. )
  210. return WrappedDocumentResponse(**response_dict)
  211. async def retrieve(
  212. self,
  213. id: str | UUID,
  214. ) -> WrappedDocumentResponse:
  215. """Get a specific document by ID.
  216. Args:
  217. id (str | UUID): ID of document to retrieve
  218. Returns:
  219. WrappedDocumentResponse
  220. """
  221. response_dict = await self.client._make_request(
  222. "GET",
  223. f"documents/{str(id)}",
  224. version="v3",
  225. )
  226. return WrappedDocumentResponse(**response_dict)
  227. async def download(
  228. self,
  229. id: str | UUID,
  230. ) -> BytesIO:
  231. """Download a document's original file content.
  232. Args:
  233. id (str | UUID): ID of document to download
  234. Returns:
  235. BytesIO: In-memory bytes buffer containing the document's file content.
  236. """
  237. response = await self.client._make_request(
  238. "GET",
  239. f"documents/{str(id)}/download",
  240. version="v3",
  241. )
  242. if not isinstance(response, BytesIO):
  243. raise ValueError(
  244. f"Expected BytesIO response, got {type(response)}"
  245. )
  246. return response
  247. async def download_zip(
  248. self,
  249. document_ids: Optional[list[str | UUID]] = None,
  250. start_date: Optional[datetime] = None,
  251. end_date: Optional[datetime] = None,
  252. output_path: Optional[str | Path] = None,
  253. ) -> BytesIO | None:
  254. """Download multiple documents as a zip file.
  255. Args:
  256. document_ids (Optional[list[str | UUID]]): IDs to include. May be required for non-superusers.
  257. start_date (Optional[datetime]): Filter documents created on or after this date.
  258. end_date (Optional[datetime]): Filter documents created on or before this date.
  259. output_path (Optional[str | Path]): If provided, save the zip file to this path and return None. Otherwise, return BytesIO.
  260. Returns:
  261. Optional[BytesIO]: BytesIO object with zip content if output_path is None, else None.
  262. """
  263. params: dict[str, Any] = {}
  264. if document_ids:
  265. params["document_ids"] = [str(doc_id) for doc_id in document_ids]
  266. if start_date:
  267. params["start_date"] = start_date.isoformat()
  268. if end_date:
  269. params["end_date"] = end_date.isoformat()
  270. response = await self.client._make_request(
  271. "GET",
  272. "documents/download_zip",
  273. params=params,
  274. version="v3",
  275. )
  276. if not isinstance(response, BytesIO):
  277. raise ValueError(
  278. f"Expected BytesIO response, got {type(response)}"
  279. )
  280. if output_path:
  281. output_path = (
  282. Path(output_path)
  283. if isinstance(output_path, str)
  284. else output_path
  285. )
  286. async with aiofiles.open(output_path, "wb") as f:
  287. await f.write(response.getvalue())
  288. return None
  289. return response
  290. async def export(
  291. self,
  292. output_path: str | Path,
  293. columns: Optional[list[str]] = None,
  294. filters: Optional[dict] = None,
  295. include_header: bool = True,
  296. ) -> None:
  297. """Export documents to a CSV file, streaming the results directly to
  298. disk.
  299. Args:
  300. output_path (str | Path): Local path where the CSV file should be saved
  301. columns (Optional[list[str]]): Specific columns to export. If None, exports default columns
  302. filters (Optional[dict]): Optional filters to apply when selecting documents
  303. include_header (bool): Whether to include column headers in the CSV (default: True)
  304. Returns:
  305. None
  306. """
  307. # Convert path to string if it's a Path object
  308. output_path = (
  309. str(output_path) if isinstance(output_path, Path) else output_path
  310. )
  311. data: dict[str, Any] = {"include_header": include_header}
  312. if columns:
  313. data["columns"] = columns
  314. if filters:
  315. data["filters"] = filters
  316. # Stream response directly to file
  317. async with aiofiles.open(output_path, "wb") as f:
  318. async with self.client.session.post(
  319. f"{self.client.base_url}/v3/documents/export",
  320. json=data,
  321. headers={
  322. "Accept": "text/csv",
  323. **self.client._get_auth_header(),
  324. },
  325. ) as response:
  326. if response.status != 200:
  327. raise ValueError(
  328. f"Export failed with status {response.status}",
  329. response,
  330. )
  331. async for chunk in response.content.iter_chunks():
  332. if chunk:
  333. await f.write(chunk[0])
  334. async def export_entities(
  335. self,
  336. id: str | UUID,
  337. output_path: str | Path,
  338. columns: Optional[list[str]] = None,
  339. filters: Optional[dict] = None,
  340. include_header: bool = True,
  341. ) -> None:
  342. """Export documents to a CSV file, streaming the results directly to
  343. disk.
  344. Args:
  345. output_path (str | Path): Local path where the CSV file should be saved
  346. columns (Optional[list[str]]): Specific columns to export. If None, exports default columns
  347. filters (Optional[dict]): Optional filters to apply when selecting documents
  348. include_header (bool): Whether to include column headers in the CSV (default: True)
  349. Returns:
  350. None
  351. """
  352. # Convert path to string if it's a Path object
  353. output_path = (
  354. str(output_path) if isinstance(output_path, Path) else output_path
  355. )
  356. # Prepare request data
  357. data: dict[str, Any] = {"include_header": include_header}
  358. if columns:
  359. data["columns"] = columns
  360. if filters:
  361. data["filters"] = filters
  362. # Stream response directly to file
  363. async with aiofiles.open(output_path, "wb") as f:
  364. async with self.client.session.post(
  365. f"{self.client.base_url}/v3/documents/{str(id)}/entities/export",
  366. json=data,
  367. headers={
  368. "Accept": "text/csv",
  369. **self.client._get_auth_header(),
  370. },
  371. ) as response:
  372. if response.status != 200:
  373. raise ValueError(
  374. f"Export failed with status {response.status}",
  375. response,
  376. )
  377. async for chunk in response.content.iter_chunks():
  378. if chunk:
  379. await f.write(chunk[0])
  380. async def export_relationships(
  381. self,
  382. id: str | UUID,
  383. output_path: str | Path,
  384. columns: Optional[list[str]] = None,
  385. filters: Optional[dict] = None,
  386. include_header: bool = True,
  387. ) -> None:
  388. """Export document relationships to a CSV file, streaming the results
  389. directly to disk.
  390. Args:
  391. output_path (str | Path): Local path where the CSV file should be saved
  392. columns (Optional[list[str]]): Specific columns to export. If None, exports default columns
  393. filters (Optional[dict]): Optional filters to apply when selecting documents
  394. include_header (bool): Whether to include column headers in the CSV (default: True)
  395. Returns:
  396. None
  397. """
  398. # Convert path to string if it's a Path object
  399. output_path = (
  400. str(output_path) if isinstance(output_path, Path) else output_path
  401. )
  402. # Prepare request data
  403. data: dict[str, Any] = {"include_header": include_header}
  404. if columns:
  405. data["columns"] = columns
  406. if filters:
  407. data["filters"] = filters
  408. # Stream response directly to file
  409. async with aiofiles.open(output_path, "wb") as f:
  410. async with self.client.session.post(
  411. f"{self.client.base_url}/v3/documents/{str(id)}/relationships/export",
  412. json=data,
  413. headers={
  414. "Accept": "text/csv",
  415. **self.client._get_auth_header(),
  416. },
  417. ) as response:
  418. if response.status != 200:
  419. raise ValueError(
  420. f"Export failed with status {response.status}",
  421. response,
  422. )
  423. async for chunk in response.content.iter_chunks():
  424. if chunk:
  425. await f.write(chunk[0])
  426. async def delete(
  427. self,
  428. id: str | UUID,
  429. ) -> WrappedBooleanResponse:
  430. """Delete a specific document.
  431. Args:
  432. id (str | UUID): ID of document to delete
  433. Returns:
  434. WrappedBooleanResponse
  435. """
  436. response_dict = await self.client._make_request(
  437. "DELETE",
  438. f"documents/{str(id)}",
  439. version="v3",
  440. )
  441. return WrappedBooleanResponse(**response_dict)
  442. async def list_chunks(
  443. self,
  444. id: str | UUID,
  445. include_vectors: Optional[bool] = False,
  446. offset: Optional[int] = 0,
  447. limit: Optional[int] = 100,
  448. ) -> WrappedChunksResponse:
  449. """Get chunks for a specific document.
  450. Args:
  451. id (str | UUID): ID of document to retrieve chunks for
  452. include_vectors (Optional[bool]): Whether to include vector embeddings in the response
  453. offset (int, optional): Specifies the number of objects to skip. Defaults to 0.
  454. limit (int, optional): Specifies a limit on the number of objects to return, ranging between 1 and 100. Defaults to 100.
  455. Returns:
  456. WrappedChunksResponse
  457. """
  458. params = {
  459. "offset": offset,
  460. "limit": limit,
  461. "include_vectors": include_vectors,
  462. }
  463. response_dict = await self.client._make_request(
  464. "GET",
  465. f"documents/{str(id)}/chunks",
  466. params=params,
  467. version="v3",
  468. )
  469. return WrappedChunksResponse(**response_dict)
  470. async def list_collections(
  471. self,
  472. id: str | UUID,
  473. offset: Optional[int] = 0,
  474. limit: Optional[int] = 100,
  475. ) -> WrappedCollectionsResponse:
  476. """List collections for a specific document.
  477. Args:
  478. id (str | UUID): ID of document to retrieve collections for
  479. offset (int, optional): Specifies the number of objects to skip. Defaults to 0.
  480. limit (int, optional): Specifies a limit on the number of objects to return, ranging between 1 and 100. Defaults to 100.
  481. Returns:
  482. WrappedCollectionsResponse
  483. """
  484. params = {
  485. "offset": offset,
  486. "limit": limit,
  487. }
  488. response_dict = await self.client._make_request(
  489. "GET",
  490. f"documents/{str(id)}/collections",
  491. params=params,
  492. version="v3",
  493. )
  494. return WrappedCollectionsResponse(**response_dict)
  495. async def delete_by_filter(
  496. self,
  497. filters: dict[str, Any],
  498. ) -> WrappedBooleanResponse:
  499. """Delete documents based on metadata filters.
  500. Args:
  501. filters (dict): Filters to apply (e.g., `{"metadata.year": {"$lt": 2020}}`).
  502. Returns:
  503. WrappedBooleanResponse
  504. """
  505. filters_json = json.dumps(filters)
  506. response_dict = await self.client._make_request(
  507. "DELETE",
  508. "documents/by-filter",
  509. data=filters_json,
  510. version="v3",
  511. )
  512. return WrappedBooleanResponse(**response_dict)
  513. async def extract(
  514. self,
  515. id: str | UUID,
  516. settings: Optional[dict | GraphCreationSettings] = None,
  517. run_with_orchestration: Optional[bool] = True,
  518. ) -> WrappedGenericMessageResponse:
  519. """Extract entities and relationships from a document.
  520. Args:
  521. id (str, UUID): ID of document to extract from
  522. settings (Optional[dict]): Settings for extraction process
  523. run_with_orchestration (Optional[bool]): Whether to run with orchestration
  524. Returns:
  525. WrappedGenericMessageResponse
  526. """
  527. data: dict[str, Any] = {}
  528. if settings:
  529. data["settings"] = json.dumps(settings)
  530. if run_with_orchestration is not None:
  531. data["run_with_orchestration"] = str(run_with_orchestration)
  532. response_dict = await self.client._make_request(
  533. "POST",
  534. f"documents/{str(id)}/extract",
  535. params=data,
  536. version="v3",
  537. )
  538. return WrappedGenericMessageResponse(**response_dict)
  539. async def list_entities(
  540. self,
  541. id: str | UUID,
  542. offset: Optional[int] = 0,
  543. limit: Optional[int] = 100,
  544. include_embeddings: Optional[bool] = False,
  545. ) -> WrappedEntitiesResponse:
  546. """List entities extracted from a document.
  547. Args:
  548. id (str | UUID): ID of document to get entities from
  549. offset (Optional[int]): Number of items to skip
  550. limit (Optional[int]): Max number of items to return
  551. include_embeddings (Optional[bool]): Whether to include embeddings
  552. Returns:
  553. WrappedEntitiesResponse
  554. """
  555. params = {
  556. "offset": offset,
  557. "limit": limit,
  558. "include_embeddings": include_embeddings,
  559. }
  560. response_dict = await self.client._make_request(
  561. "GET",
  562. f"documents/{str(id)}/entities",
  563. params=params,
  564. version="v3",
  565. )
  566. return WrappedEntitiesResponse(**response_dict)
  567. async def list_relationships(
  568. self,
  569. id: str | UUID,
  570. offset: Optional[int] = 0,
  571. limit: Optional[int] = 100,
  572. entity_names: Optional[list[str]] = None,
  573. relationship_types: Optional[list[str]] = None,
  574. ) -> WrappedRelationshipsResponse:
  575. """List relationships extracted from a document.
  576. Args:
  577. id (str | UUID): ID of document to get relationships from
  578. offset (Optional[int]): Number of items to skip
  579. limit (Optional[int]): Max number of items to return
  580. entity_names (Optional[list[str]]): Filter by entity names
  581. relationship_types (Optional[list[str]]): Filter by relationship types
  582. Returns:
  583. WrappedRelationshipsResponse
  584. """
  585. params: dict[str, Any] = {
  586. "offset": offset,
  587. "limit": limit,
  588. }
  589. if entity_names:
  590. params["entity_names"] = entity_names
  591. if relationship_types:
  592. params["relationship_types"] = relationship_types
  593. response_dict = await self.client._make_request(
  594. "GET",
  595. f"documents/{str(id)}/relationships",
  596. params=params,
  597. version="v3",
  598. )
  599. return WrappedRelationshipsResponse(**response_dict)
  600. async def list(
  601. self,
  602. ids: Optional[list[str | UUID]] = None,
  603. offset: Optional[int] = 0,
  604. limit: Optional[int] = 100,
  605. include_summary_embeddings: Optional[bool] = False,
  606. owner_only: Optional[bool] = False,
  607. ) -> WrappedDocumentsResponse:
  608. """List documents with pagination.
  609. Args:
  610. ids (Optional[list[str | UUID]]): Optional list of document IDs to filter by.
  611. offset (int, optional): Number of objects to skip. Defaults to 0.
  612. limit (int, optional): Max number of objects to return (1-1000). Defaults to 100.
  613. include_summary_embeddings (Optional[bool]): Whether to include summary embeddings (default: False).
  614. owner_only (Optional[bool]): If true, only returns documents owned by the user, not all accessible documents.
  615. Returns:
  616. WrappedDocumentsResponse
  617. """
  618. params: dict[str, Any] = {
  619. "offset": offset,
  620. "limit": limit,
  621. "include_summary_embeddings": include_summary_embeddings,
  622. "owner_only": owner_only,
  623. }
  624. if ids:
  625. params["ids"] = [str(doc_id) for doc_id in ids] # type: ignore
  626. response_dict = await self.client._make_request(
  627. "GET",
  628. "documents",
  629. params=params,
  630. version="v3",
  631. )
  632. return WrappedDocumentsResponse(**response_dict)
  633. async def search(
  634. self,
  635. query: str,
  636. search_mode: Optional[str | SearchMode] = SearchMode.custom,
  637. search_settings: Optional[dict | SearchSettings] = None,
  638. ) -> WrappedDocumentSearchResponse:
  639. """Conduct a search query on document summaries.
  640. Args:
  641. query (str): The query to search for.
  642. search_mode (Optional[str | SearchMode]): Search mode ('basic', 'advanced', 'custom'). Defaults to 'custom'.
  643. search_settings (Optional[dict, SearchSettings]]): Vector search settings.
  644. Returns:
  645. WrappedDocumentSearchResponse
  646. """
  647. if search_settings and not isinstance(search_settings, dict):
  648. search_settings = search_settings.model_dump()
  649. data: dict[str, Any] = {
  650. "query": query,
  651. "search_settings": search_settings,
  652. }
  653. if search_mode:
  654. data["search_mode"] = search_mode
  655. response_dict = await self.client._make_request(
  656. "POST",
  657. "documents/search",
  658. json=data,
  659. version="v3",
  660. )
  661. return WrappedDocumentSearchResponse(**response_dict)
  662. async def deduplicate(
  663. self,
  664. id: str | UUID,
  665. settings: Optional[dict | GraphCreationSettings] = None,
  666. run_with_orchestration: Optional[bool] = True,
  667. ) -> WrappedGenericMessageResponse:
  668. """Deduplicate entities and relationships from a document.
  669. Args:
  670. id (str | UUID): ID of document to deduplicate entities for.
  671. settings (Optional[dict | GraphCreationSettings]): Settings for deduplication process.
  672. run_with_orchestration (Optional[bool]): Whether to run with orchestration (default: True).
  673. Returns:
  674. WrappedGenericMessageResponse: Indicating task status.
  675. """
  676. data: dict[str, Any] = {}
  677. if settings:
  678. data["settings"] = json.dumps(settings)
  679. if run_with_orchestration is not None:
  680. data["run_with_orchestration"] = str(run_with_orchestration)
  681. response_dict = await self.client._make_request(
  682. "POST",
  683. f"documents/{str(id)}/deduplicate",
  684. params=data,
  685. version="v3",
  686. )
  687. return WrappedGenericMessageResponse(**response_dict)