documents.py 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784
  1. import json
  2. import os
  3. import tempfile
  4. from datetime import datetime
  5. from io import BytesIO
  6. from pathlib import Path
  7. from typing import Any, Optional
  8. from uuid import UUID
  9. import requests
  10. from shared.abstractions import R2RClientException
  11. from shared.api.models import (
  12. WrappedBooleanResponse,
  13. WrappedChunksResponse,
  14. WrappedCollectionsResponse,
  15. WrappedDocumentResponse,
  16. WrappedDocumentSearchResponse,
  17. WrappedDocumentsResponse,
  18. WrappedEntitiesResponse,
  19. WrappedGenericMessageResponse,
  20. WrappedIngestionResponse,
  21. WrappedRelationshipsResponse,
  22. )
  23. from ..models import (
  24. GraphCreationSettings,
  25. IngestionMode,
  26. SearchMode,
  27. SearchSettings,
  28. )
  29. class DocumentsSDK:
  30. """SDK for interacting with documents in the v3 API."""
  31. def __init__(self, client):
  32. self.client = client
  33. def create(
  34. self,
  35. file_path: Optional[str] = None,
  36. raw_text: Optional[str] = None,
  37. chunks: Optional[list[str]] = None,
  38. s3_url: Optional[str] = None,
  39. id: Optional[str | UUID] = None,
  40. ingestion_mode: Optional[IngestionMode | str] = None,
  41. collection_ids: Optional[list[str | UUID]] = None,
  42. metadata: Optional[dict[str, Any]] = None,
  43. ingestion_config: Optional[dict | IngestionMode] = None,
  44. run_with_orchestration: Optional[bool] = True,
  45. ) -> WrappedIngestionResponse:
  46. """Create a new document from either a file, raw text, or chunks.
  47. Args:
  48. file_path (Optional[str]): The path to the file to upload, if any.
  49. raw_text (Optional[str]): Raw text content to upload, if no file path is provided.
  50. chunks (Optional[list[str]]): Pre-processed text chunks to ingest.
  51. s3_url (Optional[str]): A presigned S3 URL to upload the file from, if any.
  52. id (Optional[str | UUID]): Optional ID to assign to the document.
  53. ingestion_mode (Optional[IngestionMode | str]): The ingestion mode preset ('hi-res', 'ocr', 'fast', 'custom'). Defaults to 'custom'.
  54. collection_ids (Optional[list[str | UUID]]): Collection IDs to associate. Defaults to user's default collection if None.
  55. metadata (Optional[dict]): Optional metadata to assign to the document.
  56. ingestion_config (Optional[dict | IngestionMode]): Optional ingestion config or preset mode enum. Used when ingestion_mode='custom'.
  57. run_with_orchestration (Optional[bool]): Whether to run with orchestration (default: True).
  58. Returns:
  59. WrappedIngestionResponse
  60. """
  61. if (
  62. sum(x is not None for x in [file_path, raw_text, chunks, s3_url])
  63. != 1
  64. ):
  65. raise ValueError(
  66. "Exactly one of file_path, raw_text, chunks, or s3_url must be provided."
  67. )
  68. data: dict[str, Any] = {}
  69. files = None
  70. if id:
  71. data["id"] = str(id)
  72. if metadata:
  73. data["metadata"] = json.dumps(metadata)
  74. if ingestion_config:
  75. if isinstance(ingestion_config, IngestionMode):
  76. ingestion_config = {"mode": ingestion_config.value}
  77. app_config: dict[str, Any] = (
  78. {}
  79. if isinstance(ingestion_config, dict)
  80. else ingestion_config["app"]
  81. )
  82. ingestion_config = dict(ingestion_config)
  83. ingestion_config["app"] = app_config
  84. data["ingestion_config"] = json.dumps(ingestion_config)
  85. if collection_ids:
  86. collection_ids = [
  87. str(collection_id) for collection_id in collection_ids
  88. ]
  89. data["collection_ids"] = json.dumps(collection_ids)
  90. if run_with_orchestration is not None:
  91. data["run_with_orchestration"] = str(run_with_orchestration)
  92. if ingestion_mode is not None:
  93. data["ingestion_mode"] = (
  94. ingestion_mode.value
  95. if isinstance(ingestion_mode, IngestionMode)
  96. else ingestion_mode
  97. )
  98. if file_path:
  99. # Create a new file instance that will remain open during the request
  100. file_instance = open(file_path, "rb")
  101. filename = os.path.basename(file_path)
  102. files = [
  103. (
  104. "file",
  105. (filename, file_instance, "application/octet-stream"),
  106. )
  107. ]
  108. try:
  109. response_dict = self.client._make_request(
  110. "POST",
  111. "documents",
  112. data=data,
  113. files=files,
  114. version="v3",
  115. )
  116. finally:
  117. # Ensure we close the file after the request is complete
  118. file_instance.close()
  119. elif raw_text:
  120. data["raw_text"] = raw_text
  121. response_dict = self.client._make_request(
  122. "POST",
  123. "documents",
  124. data=data,
  125. version="v3",
  126. )
  127. elif chunks:
  128. data["chunks"] = json.dumps(chunks)
  129. response_dict = self.client._make_request(
  130. "POST",
  131. "documents",
  132. data=data,
  133. version="v3",
  134. )
  135. elif s3_url:
  136. try:
  137. s3_file = requests.get(s3_url)
  138. with tempfile.NamedTemporaryFile(delete=False) as temp_file:
  139. temp_file_path = temp_file.name
  140. temp_file.write(s3_file.content)
  141. # Get the filename from the URL
  142. filename = os.path.basename(s3_url.split("?")[0]) or "s3_file"
  143. with open(temp_file_path, "rb") as file_instance:
  144. files = [
  145. (
  146. "file",
  147. (
  148. filename,
  149. file_instance,
  150. "application/octet-stream",
  151. ),
  152. )
  153. ]
  154. response_dict = self.client._make_request(
  155. "POST",
  156. "documents",
  157. data=data,
  158. files=files,
  159. version="v3",
  160. )
  161. except requests.RequestException as e:
  162. raise R2RClientException(
  163. f"Failed to download file from S3 URL: {s3_url}"
  164. ) from e
  165. finally:
  166. # Clean up the temporary file
  167. if os.path.exists(temp_file_path):
  168. os.unlink(temp_file_path)
  169. return WrappedIngestionResponse(**response_dict)
  170. def append_metadata(
  171. self,
  172. id: str | UUID,
  173. metadata: list[dict[str, Any]],
  174. ) -> WrappedDocumentResponse:
  175. """Append metadata to a document.
  176. Args:
  177. id (str | UUID): ID of document to append metadata to
  178. metadata (list[dict]): Metadata to append
  179. Returns:
  180. WrappedDocumentResponse
  181. """
  182. data = json.dumps(metadata)
  183. response_dict = self.client._make_request(
  184. "PATCH",
  185. f"documents/{str(id)}/metadata",
  186. data=data,
  187. version="v3",
  188. )
  189. return WrappedDocumentResponse(**response_dict)
  190. def replace_metadata(
  191. self,
  192. id: str | UUID,
  193. metadata: list[dict[str, Any]],
  194. ) -> WrappedDocumentResponse:
  195. """Replace metadata for a document.
  196. Args:
  197. id (str | UUID): ID of document to replace metadata for
  198. metadata (list[dict]): The metadata that will replace the existing metadata
  199. Returns:
  200. WrappedDocumentResponse
  201. """
  202. data = json.dumps(metadata)
  203. response_dict = self.client._make_request(
  204. "PUT",
  205. f"documents/{str(id)}/metadata",
  206. data=data,
  207. version="v3",
  208. )
  209. return WrappedDocumentResponse(**response_dict)
  210. def retrieve(
  211. self,
  212. id: str | UUID,
  213. ) -> WrappedDocumentResponse:
  214. """Get a specific document by ID.
  215. Args:
  216. id (str | UUID): ID of document to retrieve
  217. Returns:
  218. WrappedDocumentResponse
  219. """
  220. response_dict = self.client._make_request(
  221. "GET",
  222. f"documents/{str(id)}",
  223. version="v3",
  224. )
  225. return WrappedDocumentResponse(**response_dict)
  226. def download(
  227. self,
  228. id: str | UUID,
  229. ) -> BytesIO:
  230. """Download a document's original file content.
  231. Args:
  232. id (str | UUID): ID of document to download
  233. Returns:
  234. BytesIO: In-memory bytes buffer containing the document's file content.
  235. """
  236. response = self.client._make_request(
  237. "GET",
  238. f"documents/{str(id)}/download",
  239. version="v3",
  240. )
  241. if not isinstance(response, BytesIO):
  242. raise ValueError(
  243. f"Expected BytesIO response, got {type(response)}"
  244. )
  245. return response
  246. def download_zip(
  247. self,
  248. document_ids: Optional[list[str | UUID]] = None,
  249. start_date: Optional[datetime] = None,
  250. end_date: Optional[datetime] = None,
  251. output_path: Optional[str | Path] = None,
  252. ) -> Optional[BytesIO]:
  253. """Download multiple documents as a zip file.
  254. Args:
  255. document_ids (Optional[list[str | UUID]]): IDs to include. May be required for non-superusers.
  256. start_date (Optional[datetime]): Filter documents created on or after this date.
  257. end_date (Optional[datetime]): Filter documents created on or before this date.
  258. output_path (Optional[str | Path]): If provided, save the zip file to this path and return None. Otherwise, return BytesIO.
  259. Returns:
  260. Optional[BytesIO]: BytesIO object with zip content if output_path is None, else None.
  261. """
  262. params: dict[str, Any] = {}
  263. if document_ids:
  264. params["document_ids"] = [str(doc_id) for doc_id in document_ids]
  265. if start_date:
  266. params["start_date"] = start_date.isoformat()
  267. if end_date:
  268. params["end_date"] = end_date.isoformat()
  269. response = self.client._make_request(
  270. "GET",
  271. "documents/download_zip",
  272. params=params,
  273. version="v3",
  274. )
  275. if not isinstance(response, BytesIO):
  276. raise ValueError(
  277. f"Expected BytesIO response, got {type(response)}"
  278. )
  279. if output_path:
  280. output_path = (
  281. Path(output_path)
  282. if isinstance(output_path, str)
  283. else output_path
  284. )
  285. with open(output_path, "wb") as f:
  286. f.write(response.getvalue())
  287. return None
  288. return response
  289. def export(
  290. self,
  291. output_path: str | Path,
  292. columns: Optional[list[str]] = None,
  293. filters: Optional[dict[str, Any]] = None,
  294. include_header: bool = True,
  295. ) -> None:
  296. """Export documents to a CSV file, streaming the results directly to
  297. disk.
  298. Args:
  299. output_path (str | Path): Local path where the CSV file should be saved
  300. columns (Optional[list[str]]): Specific columns to export. If None, exports default columns
  301. filters (Optional[dict]): Optional filters to apply when selecting documents
  302. include_header (bool): Whether to include column headers in the CSV (default: True)
  303. Returns:
  304. None
  305. """
  306. output_path = (
  307. str(output_path) if isinstance(output_path, Path) else output_path
  308. )
  309. data: dict[str, Any] = {"include_header": include_header}
  310. if columns:
  311. data["columns"] = columns
  312. if filters:
  313. data["filters"] = filters
  314. with open(output_path, "wb") as f:
  315. response = self.client.client.post(
  316. f"{self.client.base_url}/v3/documents/export",
  317. json=data,
  318. headers={
  319. "Accept": "text/csv",
  320. **self.client._get_auth_header(),
  321. },
  322. )
  323. if response.status_code != 200:
  324. raise ValueError(
  325. f"Export failed with status {response.status_code}",
  326. response,
  327. )
  328. for chunk in response.iter_bytes():
  329. if chunk:
  330. f.write(chunk)
  331. def export_entities(
  332. self,
  333. id: str | UUID,
  334. output_path: str | Path,
  335. columns: Optional[list[str]] = None,
  336. filters: Optional[dict] = None,
  337. include_header: bool = True,
  338. ) -> None:
  339. """Export entities to a CSV file, streaming the results directly to
  340. disk.
  341. Args:
  342. output_path (str | Path): Local path where the CSV file should be saved
  343. columns (Optional[list[str]]): Specific columns to export. If None, exports default columns
  344. filters (Optional[dict]): Optional filters to apply when selecting documents
  345. include_header (bool): Whether to include column headers in the CSV (default: True)
  346. Returns:
  347. None
  348. """
  349. # Convert path to string if it's a Path object
  350. output_path = (
  351. str(output_path) if isinstance(output_path, Path) else output_path
  352. )
  353. # Prepare request data
  354. data: dict[str, Any] = {"include_header": include_header}
  355. if columns:
  356. data["columns"] = columns
  357. if filters:
  358. data["filters"] = filters
  359. # Stream response directly to file
  360. with open(output_path, "wb") as f:
  361. response = self.client.client.post(
  362. f"{self.client.base_url}/v3/documents/{str(id)}/entities/export",
  363. json=data,
  364. headers={
  365. "Accept": "text/csv",
  366. **self.client._get_auth_header(),
  367. },
  368. )
  369. if response.status_code != 200:
  370. raise ValueError(
  371. f"Export failed with status {response.status_code}",
  372. response,
  373. )
  374. for chunk in response.iter_bytes():
  375. if chunk:
  376. f.write(chunk)
  377. def export_relationships(
  378. self,
  379. id: str | UUID,
  380. output_path: str | Path,
  381. columns: Optional[list[str]] = None,
  382. filters: Optional[dict] = None,
  383. include_header: bool = True,
  384. ) -> None:
  385. """Export document relationships to a CSV file, streaming the results
  386. directly to disk.
  387. Args:
  388. output_path (str | Path): Local path where the CSV file should be saved
  389. columns (Optional[list[str]]): Specific columns to export. If None, exports default columns
  390. filters (Optional[dict]): Optional filters to apply when selecting documents
  391. include_header (bool): Whether to include column headers in the CSV (default: True)
  392. Returns:
  393. None
  394. """
  395. # Convert path to string if it's a Path object
  396. output_path = (
  397. str(output_path) if isinstance(output_path, Path) else output_path
  398. )
  399. # Prepare request data
  400. data: dict[str, Any] = {"include_header": include_header}
  401. if columns:
  402. data["columns"] = columns
  403. if filters:
  404. data["filters"] = filters
  405. # Stream response directly to file
  406. with open(output_path, "wb") as f:
  407. response = self.client.client.post(
  408. f"{self.client.base_url}/v3/documents/{str(id)}/relationships/export",
  409. json=data,
  410. headers={
  411. "Accept": "text/csv",
  412. **self.client._get_auth_header(),
  413. },
  414. )
  415. if response.status_code != 200:
  416. raise ValueError(
  417. f"Export failed with status {response.status_code}",
  418. response,
  419. )
  420. for chunk in response.iter_bytes():
  421. if chunk:
  422. f.write(chunk)
  423. def delete(
  424. self,
  425. id: str | UUID,
  426. ) -> WrappedBooleanResponse:
  427. """Delete a specific document.
  428. Args:
  429. id (str | UUID): ID of document to delete
  430. Returns:
  431. WrappedBooleanResponse
  432. """
  433. response_dict = self.client._make_request(
  434. "DELETE",
  435. f"documents/{str(id)}",
  436. version="v3",
  437. )
  438. return WrappedBooleanResponse(**response_dict)
  439. def list_chunks(
  440. self,
  441. id: str | UUID,
  442. include_vectors: Optional[bool] = False,
  443. offset: Optional[int] = 0,
  444. limit: Optional[int] = 100,
  445. ) -> WrappedChunksResponse:
  446. """Get chunks for a specific document.
  447. Args:
  448. id (str | UUID): ID of document to retrieve chunks for
  449. include_vectors (Optional[bool]): Whether to include vector embeddings in the response
  450. offset (int, optional): Specifies the number of objects to skip. Defaults to 0.
  451. limit (int, optional): Specifies a limit on the number of objects to return, ranging between 1 and 100. Defaults to 100.
  452. Returns:
  453. WrappedChunksResponse
  454. """
  455. params = {
  456. "offset": offset,
  457. "limit": limit,
  458. "include_vectors": include_vectors,
  459. }
  460. response_dict = self.client._make_request(
  461. "GET",
  462. f"documents/{str(id)}/chunks",
  463. params=params,
  464. version="v3",
  465. )
  466. return WrappedChunksResponse(**response_dict)
  467. def list_collections(
  468. self,
  469. id: str | UUID,
  470. offset: Optional[int] = 0,
  471. limit: Optional[int] = 100,
  472. ) -> WrappedCollectionsResponse:
  473. """List collections for a specific document.
  474. Args:
  475. id (str | UUID): ID of document to retrieve collections for
  476. offset (int, optional): Specifies the number of objects to skip. Defaults to 0.
  477. limit (int, optional): Specifies a limit on the number of objects to return, ranging between 1 and 100. Defaults to 100.
  478. Returns:
  479. WrappedCollectionsResponse
  480. """
  481. params = {
  482. "offset": offset,
  483. "limit": limit,
  484. }
  485. response_dict = self.client._make_request(
  486. "GET",
  487. f"documents/{str(id)}/collections",
  488. params=params,
  489. version="v3",
  490. )
  491. return WrappedCollectionsResponse(**response_dict)
  492. def delete_by_filter(
  493. self,
  494. filters: dict[str, Any],
  495. ) -> WrappedBooleanResponse:
  496. """Delete documents based on metadata filters.
  497. Args:
  498. filters (dict): Filters to apply (e.g., `{"metadata.year": {"$lt": 2020}}`).
  499. Returns:
  500. WrappedBooleanResponse
  501. """
  502. filters_json = json.dumps(filters)
  503. response_dict = self.client._make_request(
  504. "DELETE",
  505. "documents/by-filter",
  506. data=filters_json,
  507. version="v3",
  508. )
  509. return WrappedBooleanResponse(**response_dict)
  510. def extract(
  511. self,
  512. id: str | UUID,
  513. settings: Optional[dict | GraphCreationSettings] = None,
  514. run_with_orchestration: Optional[bool] = True,
  515. ) -> WrappedGenericMessageResponse:
  516. """Extract entities and relationships from a document.
  517. Args:
  518. id (str, UUID): ID of document to extract from
  519. settings (Optional[dict]): Settings for extraction process
  520. run_with_orchestration (Optional[bool]): Whether to run with orchestration
  521. Returns:
  522. WrappedGenericMessageResponse
  523. """
  524. data: dict[str, Any] = {}
  525. if settings:
  526. data["settings"] = json.dumps(settings)
  527. if run_with_orchestration is not None:
  528. data["run_with_orchestration"] = str(run_with_orchestration)
  529. response_dict = self.client._make_request(
  530. "POST",
  531. f"documents/{str(id)}/extract",
  532. params=data,
  533. version="v3",
  534. )
  535. return WrappedGenericMessageResponse(**response_dict)
  536. def list_entities(
  537. self,
  538. id: str | UUID,
  539. offset: Optional[int] = 0,
  540. limit: Optional[int] = 100,
  541. include_embeddings: Optional[bool] = False,
  542. ) -> WrappedEntitiesResponse:
  543. """List entities extracted from a document.
  544. Args:
  545. id (str | UUID): ID of document to get entities from
  546. offset (Optional[int]): Number of items to skip
  547. limit (Optional[int]): Max number of items to return
  548. include_embeddings (Optional[bool]): Whether to include embeddings
  549. Returns:
  550. WrappedEntitiesResponse
  551. """
  552. params = {
  553. "offset": offset,
  554. "limit": limit,
  555. "include_embeddings": include_embeddings,
  556. }
  557. response_dict = self.client._make_request(
  558. "GET",
  559. f"documents/{str(id)}/entities",
  560. params=params,
  561. version="v3",
  562. )
  563. return WrappedEntitiesResponse(**response_dict)
  564. def list_relationships(
  565. self,
  566. id: str | UUID,
  567. offset: Optional[int] = 0,
  568. limit: Optional[int] = 100,
  569. entity_names: Optional[list[str]] = None,
  570. relationship_types: Optional[list[str]] = None,
  571. ) -> WrappedRelationshipsResponse:
  572. """List relationships extracted from a document.
  573. Args:
  574. id (str | UUID): ID of document to get relationships from
  575. offset (Optional[int]): Number of items to skip
  576. limit (Optional[int]): Max number of items to return
  577. entity_names (Optional[list[str]]): Filter by entity names
  578. relationship_types (Optional[list[str]]): Filter by relationship types
  579. Returns:
  580. WrappedRelationshipsResponse
  581. """
  582. params: dict[str, Any] = {
  583. "offset": offset,
  584. "limit": limit,
  585. }
  586. if entity_names:
  587. params["entity_names"] = entity_names
  588. if relationship_types:
  589. params["relationship_types"] = relationship_types
  590. response_dict = self.client._make_request(
  591. "GET",
  592. f"documents/{str(id)}/relationships",
  593. params=params,
  594. version="v3",
  595. )
  596. return WrappedRelationshipsResponse(**response_dict)
  597. def list(
  598. self,
  599. ids: Optional[list[str | UUID]] = None,
  600. offset: Optional[int] = 0,
  601. limit: Optional[int] = 100,
  602. include_summary_embeddings: Optional[bool] = False,
  603. owner_only: Optional[bool] = False,
  604. ) -> WrappedDocumentsResponse:
  605. """List documents with pagination.
  606. Args:
  607. ids (Optional[list[str | UUID]]): Optional list of document IDs to filter by.
  608. offset (int, optional): Number of objects to skip. Defaults to 0.
  609. limit (int, optional): Max number of objects to return (1-1000). Defaults to 100.
  610. include_summary_embeddings (Optional[bool]): Whether to include summary embeddings (default: False).
  611. owner_only (Optional[bool]): If true, only returns documents owned by the user, not all accessible documents.
  612. Returns:
  613. WrappedDocumentsResponse
  614. """
  615. params: dict[str, Any] = {
  616. "offset": offset,
  617. "limit": limit,
  618. "include_summary_embeddings": include_summary_embeddings,
  619. "owner_only": owner_only,
  620. }
  621. if ids:
  622. params["ids"] = [str(doc_id) for doc_id in ids]
  623. response_dict = self.client._make_request(
  624. "GET",
  625. "documents",
  626. params=params,
  627. version="v3",
  628. )
  629. return WrappedDocumentsResponse(**response_dict)
  630. def search(
  631. self,
  632. query: str,
  633. search_mode: Optional[str | SearchMode] = SearchMode.custom,
  634. search_settings: Optional[dict | SearchSettings] = None,
  635. ) -> WrappedDocumentSearchResponse:
  636. """Conduct a search query on document summaries.
  637. Args:
  638. query (str): The search query.
  639. search_mode (Optional[str | SearchMode]): Search mode ('basic', 'advanced', 'custom'). Defaults to 'custom'.
  640. search_settings (Optional[dict | SearchSettings]): Search settings (filters, limits, hybrid options, etc.).
  641. Returns:
  642. WrappedDocumentSearchResponse
  643. """
  644. if search_settings and not isinstance(search_settings, dict):
  645. search_settings = search_settings.model_dump()
  646. data: dict[str, Any] = {
  647. "query": query,
  648. "search_settings": search_settings,
  649. }
  650. if search_mode:
  651. data["search_mode"] = search_mode
  652. response_dict = self.client._make_request(
  653. "POST",
  654. "documents/search",
  655. json=data,
  656. version="v3",
  657. )
  658. return WrappedDocumentSearchResponse(**response_dict)
  659. def deduplicate(
  660. self,
  661. id: str | UUID,
  662. settings: Optional[dict | GraphCreationSettings] = None,
  663. run_with_orchestration: Optional[bool] = True,
  664. ) -> WrappedGenericMessageResponse:
  665. """Deduplicate entities and relationships from a document.
  666. Args:
  667. id (str | UUID): ID of document to deduplicate entities for.
  668. settings (Optional[dict | GraphCreationSettings]): Settings for deduplication process.
  669. run_with_orchestration (Optional[bool]): Whether to run with orchestration (default: True).
  670. Returns:
  671. WrappedGenericMessageResponse: Indicating task status.
  672. """
  673. data: dict[str, Any] = {}
  674. if settings:
  675. data["settings"] = json.dumps(settings)
  676. if run_with_orchestration is not None:
  677. data["run_with_orchestration"] = run_with_orchestration
  678. response_dict = self.client._make_request(
  679. "POST",
  680. f"documents/{str(id)}/deduplicate",
  681. params=data,
  682. version="v3",
  683. )
  684. return WrappedGenericMessageResponse(**response_dict)