file_batches.py 31 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785
  1. # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
  2. from __future__ import annotations
  3. import asyncio
  4. from typing import List, Iterable
  5. from typing_extensions import Literal
  6. from concurrent.futures import Future, ThreadPoolExecutor, as_completed
  7. import httpx
  8. import sniffio
  9. from .... import _legacy_response
  10. from ....types import FileObject
  11. from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven, FileTypes
  12. from ...._utils import (
  13. is_given,
  14. maybe_transform,
  15. async_maybe_transform,
  16. )
  17. from ...._compat import cached_property
  18. from ...._resource import SyncAPIResource, AsyncAPIResource
  19. from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
  20. from ....pagination import SyncCursorPage, AsyncCursorPage
  21. from ....types.beta import FileChunkingStrategyParam
  22. from ...._base_client import AsyncPaginator, make_request_options
  23. from ....types.beta.vector_stores import file_batch_create_params, file_batch_list_files_params
  24. from ....types.beta.file_chunking_strategy_param import FileChunkingStrategyParam
  25. from ....types.beta.vector_stores.vector_store_file import VectorStoreFile
  26. from ....types.beta.vector_stores.vector_store_file_batch import VectorStoreFileBatch
  27. __all__ = ["FileBatches", "AsyncFileBatches"]
  28. class FileBatches(SyncAPIResource):
  29. @cached_property
  30. def with_raw_response(self) -> FileBatchesWithRawResponse:
  31. """
  32. This property can be used as a prefix for any HTTP method call to return
  33. the raw response object instead of the parsed content.
  34. For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
  35. """
  36. return FileBatchesWithRawResponse(self)
  37. @cached_property
  38. def with_streaming_response(self) -> FileBatchesWithStreamingResponse:
  39. """
  40. An alternative to `.with_raw_response` that doesn't eagerly read the response body.
  41. For more information, see https://www.github.com/openai/openai-python#with_streaming_response
  42. """
  43. return FileBatchesWithStreamingResponse(self)
  44. def create(
  45. self,
  46. vector_store_id: str,
  47. *,
  48. file_ids: List[str],
  49. chunking_strategy: FileChunkingStrategyParam | NotGiven = NOT_GIVEN,
  50. # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
  51. # The extra values given here take precedence over values defined on the client or passed to this method.
  52. extra_headers: Headers | None = None,
  53. extra_query: Query | None = None,
  54. extra_body: Body | None = None,
  55. timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
  56. ) -> VectorStoreFileBatch:
  57. """
  58. Create a vector store file batch.
  59. Args:
  60. file_ids: A list of [File](https://platform.openai.com/docs/api-reference/files) IDs that
  61. the vector store should use. Useful for tools like `file_search` that can access
  62. files.
  63. chunking_strategy: The chunking strategy used to chunk the file(s). If not set, will use the `auto`
  64. strategy. Only applicable if `file_ids` is non-empty.
  65. extra_headers: Send extra headers
  66. extra_query: Add additional query parameters to the request
  67. extra_body: Add additional JSON properties to the request
  68. timeout: Override the client-level default timeout for this request, in seconds
  69. """
  70. if not vector_store_id:
  71. raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
  72. extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
  73. return self._post(
  74. f"/vector_stores/{vector_store_id}/file_batches",
  75. body=maybe_transform(
  76. {
  77. "file_ids": file_ids,
  78. "chunking_strategy": chunking_strategy,
  79. },
  80. file_batch_create_params.FileBatchCreateParams,
  81. ),
  82. options=make_request_options(
  83. extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
  84. ),
  85. cast_to=VectorStoreFileBatch,
  86. )
  87. def retrieve(
  88. self,
  89. batch_id: str,
  90. *,
  91. vector_store_id: str,
  92. # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
  93. # The extra values given here take precedence over values defined on the client or passed to this method.
  94. extra_headers: Headers | None = None,
  95. extra_query: Query | None = None,
  96. extra_body: Body | None = None,
  97. timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
  98. ) -> VectorStoreFileBatch:
  99. """
  100. Retrieves a vector store file batch.
  101. Args:
  102. extra_headers: Send extra headers
  103. extra_query: Add additional query parameters to the request
  104. extra_body: Add additional JSON properties to the request
  105. timeout: Override the client-level default timeout for this request, in seconds
  106. """
  107. if not vector_store_id:
  108. raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
  109. if not batch_id:
  110. raise ValueError(f"Expected a non-empty value for `batch_id` but received {batch_id!r}")
  111. extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
  112. return self._get(
  113. f"/vector_stores/{vector_store_id}/file_batches/{batch_id}",
  114. options=make_request_options(
  115. extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
  116. ),
  117. cast_to=VectorStoreFileBatch,
  118. )
  119. def cancel(
  120. self,
  121. batch_id: str,
  122. *,
  123. vector_store_id: str,
  124. # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
  125. # The extra values given here take precedence over values defined on the client or passed to this method.
  126. extra_headers: Headers | None = None,
  127. extra_query: Query | None = None,
  128. extra_body: Body | None = None,
  129. timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
  130. ) -> VectorStoreFileBatch:
  131. """Cancel a vector store file batch.
  132. This attempts to cancel the processing of
  133. files in this batch as soon as possible.
  134. Args:
  135. extra_headers: Send extra headers
  136. extra_query: Add additional query parameters to the request
  137. extra_body: Add additional JSON properties to the request
  138. timeout: Override the client-level default timeout for this request, in seconds
  139. """
  140. if not vector_store_id:
  141. raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
  142. if not batch_id:
  143. raise ValueError(f"Expected a non-empty value for `batch_id` but received {batch_id!r}")
  144. extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
  145. return self._post(
  146. f"/vector_stores/{vector_store_id}/file_batches/{batch_id}/cancel",
  147. options=make_request_options(
  148. extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
  149. ),
  150. cast_to=VectorStoreFileBatch,
  151. )
  152. def create_and_poll(
  153. self,
  154. vector_store_id: str,
  155. *,
  156. file_ids: List[str],
  157. poll_interval_ms: int | NotGiven = NOT_GIVEN,
  158. chunking_strategy: FileChunkingStrategyParam | NotGiven = NOT_GIVEN,
  159. ) -> VectorStoreFileBatch:
  160. """Create a vector store batch and poll until all files have been processed."""
  161. batch = self.create(
  162. vector_store_id=vector_store_id,
  163. file_ids=file_ids,
  164. chunking_strategy=chunking_strategy,
  165. )
  166. # TODO: don't poll unless necessary??
  167. return self.poll(
  168. batch.id,
  169. vector_store_id=vector_store_id,
  170. poll_interval_ms=poll_interval_ms,
  171. )
  172. def list_files(
  173. self,
  174. batch_id: str,
  175. *,
  176. vector_store_id: str,
  177. after: str | NotGiven = NOT_GIVEN,
  178. before: str | NotGiven = NOT_GIVEN,
  179. filter: Literal["in_progress", "completed", "failed", "cancelled"] | NotGiven = NOT_GIVEN,
  180. limit: int | NotGiven = NOT_GIVEN,
  181. order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
  182. # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
  183. # The extra values given here take precedence over values defined on the client or passed to this method.
  184. extra_headers: Headers | None = None,
  185. extra_query: Query | None = None,
  186. extra_body: Body | None = None,
  187. timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
  188. ) -> SyncCursorPage[VectorStoreFile]:
  189. """
  190. Returns a list of vector store files in a batch.
  191. Args:
  192. after: A cursor for use in pagination. `after` is an object ID that defines your place
  193. in the list. For instance, if you make a list request and receive 100 objects,
  194. ending with obj_foo, your subsequent call can include after=obj_foo in order to
  195. fetch the next page of the list.
  196. before: A cursor for use in pagination. `before` is an object ID that defines your place
  197. in the list. For instance, if you make a list request and receive 100 objects,
  198. starting with obj_foo, your subsequent call can include before=obj_foo in order
  199. to fetch the previous page of the list.
  200. filter: Filter by file status. One of `in_progress`, `completed`, `failed`, `cancelled`.
  201. limit: A limit on the number of objects to be returned. Limit can range between 1 and
  202. 100, and the default is 20.
  203. order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
  204. order and `desc` for descending order.
  205. extra_headers: Send extra headers
  206. extra_query: Add additional query parameters to the request
  207. extra_body: Add additional JSON properties to the request
  208. timeout: Override the client-level default timeout for this request, in seconds
  209. """
  210. if not vector_store_id:
  211. raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
  212. if not batch_id:
  213. raise ValueError(f"Expected a non-empty value for `batch_id` but received {batch_id!r}")
  214. extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
  215. return self._get_api_list(
  216. f"/vector_stores/{vector_store_id}/file_batches/{batch_id}/files",
  217. page=SyncCursorPage[VectorStoreFile],
  218. options=make_request_options(
  219. extra_headers=extra_headers,
  220. extra_query=extra_query,
  221. extra_body=extra_body,
  222. timeout=timeout,
  223. query=maybe_transform(
  224. {
  225. "after": after,
  226. "before": before,
  227. "filter": filter,
  228. "limit": limit,
  229. "order": order,
  230. },
  231. file_batch_list_files_params.FileBatchListFilesParams,
  232. ),
  233. ),
  234. model=VectorStoreFile,
  235. )
  236. def poll(
  237. self,
  238. batch_id: str,
  239. *,
  240. vector_store_id: str,
  241. poll_interval_ms: int | NotGiven = NOT_GIVEN,
  242. ) -> VectorStoreFileBatch:
  243. """Wait for the given file batch to be processed.
  244. Note: this will return even if one of the files failed to process, you need to
  245. check batch.file_counts.failed_count to handle this case.
  246. """
  247. headers: dict[str, str] = {"X-Stainless-Poll-Helper": "true"}
  248. if is_given(poll_interval_ms):
  249. headers["X-Stainless-Custom-Poll-Interval"] = str(poll_interval_ms)
  250. while True:
  251. response = self.with_raw_response.retrieve(
  252. batch_id,
  253. vector_store_id=vector_store_id,
  254. extra_headers=headers,
  255. )
  256. batch = response.parse()
  257. if batch.file_counts.in_progress > 0:
  258. if not is_given(poll_interval_ms):
  259. from_header = response.headers.get("openai-poll-after-ms")
  260. if from_header is not None:
  261. poll_interval_ms = int(from_header)
  262. else:
  263. poll_interval_ms = 1000
  264. self._sleep(poll_interval_ms / 1000)
  265. continue
  266. return batch
  267. def upload_and_poll(
  268. self,
  269. vector_store_id: str,
  270. *,
  271. files: Iterable[FileTypes],
  272. max_concurrency: int = 5,
  273. file_ids: List[str] = [],
  274. poll_interval_ms: int | NotGiven = NOT_GIVEN,
  275. chunking_strategy: FileChunkingStrategyParam | NotGiven = NOT_GIVEN,
  276. ) -> VectorStoreFileBatch:
  277. """Uploads the given files concurrently and then creates a vector store file batch.
  278. If you've already uploaded certain files that you want to include in this batch
  279. then you can pass their IDs through the `file_ids` argument.
  280. By default, if any file upload fails then an exception will be eagerly raised.
  281. The number of concurrency uploads is configurable using the `max_concurrency`
  282. parameter.
  283. Note: this method only supports `asyncio` or `trio` as the backing async
  284. runtime.
  285. """
  286. results: list[FileObject] = []
  287. with ThreadPoolExecutor(max_workers=max_concurrency) as executor:
  288. futures: list[Future[FileObject]] = [
  289. executor.submit(
  290. self._client.files.create,
  291. file=file,
  292. purpose="assistants",
  293. )
  294. for file in files
  295. ]
  296. for future in as_completed(futures):
  297. exc = future.exception()
  298. if exc:
  299. raise exc
  300. results.append(future.result())
  301. batch = self.create_and_poll(
  302. vector_store_id=vector_store_id,
  303. file_ids=[*file_ids, *(f.id for f in results)],
  304. poll_interval_ms=poll_interval_ms,
  305. chunking_strategy=chunking_strategy,
  306. )
  307. return batch
  308. class AsyncFileBatches(AsyncAPIResource):
  309. @cached_property
  310. def with_raw_response(self) -> AsyncFileBatchesWithRawResponse:
  311. """
  312. This property can be used as a prefix for any HTTP method call to return
  313. the raw response object instead of the parsed content.
  314. For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
  315. """
  316. return AsyncFileBatchesWithRawResponse(self)
  317. @cached_property
  318. def with_streaming_response(self) -> AsyncFileBatchesWithStreamingResponse:
  319. """
  320. An alternative to `.with_raw_response` that doesn't eagerly read the response body.
  321. For more information, see https://www.github.com/openai/openai-python#with_streaming_response
  322. """
  323. return AsyncFileBatchesWithStreamingResponse(self)
  324. async def create(
  325. self,
  326. vector_store_id: str,
  327. *,
  328. file_ids: List[str],
  329. chunking_strategy: FileChunkingStrategyParam | NotGiven = NOT_GIVEN,
  330. # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
  331. # The extra values given here take precedence over values defined on the client or passed to this method.
  332. extra_headers: Headers | None = None,
  333. extra_query: Query | None = None,
  334. extra_body: Body | None = None,
  335. timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
  336. ) -> VectorStoreFileBatch:
  337. """
  338. Create a vector store file batch.
  339. Args:
  340. file_ids: A list of [File](https://platform.openai.com/docs/api-reference/files) IDs that
  341. the vector store should use. Useful for tools like `file_search` that can access
  342. files.
  343. chunking_strategy: The chunking strategy used to chunk the file(s). If not set, will use the `auto`
  344. strategy. Only applicable if `file_ids` is non-empty.
  345. extra_headers: Send extra headers
  346. extra_query: Add additional query parameters to the request
  347. extra_body: Add additional JSON properties to the request
  348. timeout: Override the client-level default timeout for this request, in seconds
  349. """
  350. if not vector_store_id:
  351. raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
  352. extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
  353. return await self._post(
  354. f"/vector_stores/{vector_store_id}/file_batches",
  355. body=await async_maybe_transform(
  356. {
  357. "file_ids": file_ids,
  358. "chunking_strategy": chunking_strategy,
  359. },
  360. file_batch_create_params.FileBatchCreateParams,
  361. ),
  362. options=make_request_options(
  363. extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
  364. ),
  365. cast_to=VectorStoreFileBatch,
  366. )
  367. async def retrieve(
  368. self,
  369. batch_id: str,
  370. *,
  371. vector_store_id: str,
  372. # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
  373. # The extra values given here take precedence over values defined on the client or passed to this method.
  374. extra_headers: Headers | None = None,
  375. extra_query: Query | None = None,
  376. extra_body: Body | None = None,
  377. timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
  378. ) -> VectorStoreFileBatch:
  379. """
  380. Retrieves a vector store file batch.
  381. Args:
  382. extra_headers: Send extra headers
  383. extra_query: Add additional query parameters to the request
  384. extra_body: Add additional JSON properties to the request
  385. timeout: Override the client-level default timeout for this request, in seconds
  386. """
  387. if not vector_store_id:
  388. raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
  389. if not batch_id:
  390. raise ValueError(f"Expected a non-empty value for `batch_id` but received {batch_id!r}")
  391. extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
  392. return await self._get(
  393. f"/vector_stores/{vector_store_id}/file_batches/{batch_id}",
  394. options=make_request_options(
  395. extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
  396. ),
  397. cast_to=VectorStoreFileBatch,
  398. )
  399. async def cancel(
  400. self,
  401. batch_id: str,
  402. *,
  403. vector_store_id: str,
  404. # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
  405. # The extra values given here take precedence over values defined on the client or passed to this method.
  406. extra_headers: Headers | None = None,
  407. extra_query: Query | None = None,
  408. extra_body: Body | None = None,
  409. timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
  410. ) -> VectorStoreFileBatch:
  411. """Cancel a vector store file batch.
  412. This attempts to cancel the processing of
  413. files in this batch as soon as possible.
  414. Args:
  415. extra_headers: Send extra headers
  416. extra_query: Add additional query parameters to the request
  417. extra_body: Add additional JSON properties to the request
  418. timeout: Override the client-level default timeout for this request, in seconds
  419. """
  420. if not vector_store_id:
  421. raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
  422. if not batch_id:
  423. raise ValueError(f"Expected a non-empty value for `batch_id` but received {batch_id!r}")
  424. extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
  425. return await self._post(
  426. f"/vector_stores/{vector_store_id}/file_batches/{batch_id}/cancel",
  427. options=make_request_options(
  428. extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
  429. ),
  430. cast_to=VectorStoreFileBatch,
  431. )
  432. async def create_and_poll(
  433. self,
  434. vector_store_id: str,
  435. *,
  436. file_ids: List[str],
  437. poll_interval_ms: int | NotGiven = NOT_GIVEN,
  438. chunking_strategy: FileChunkingStrategyParam | NotGiven = NOT_GIVEN,
  439. ) -> VectorStoreFileBatch:
  440. """Create a vector store batch and poll until all files have been processed."""
  441. batch = await self.create(
  442. vector_store_id=vector_store_id,
  443. file_ids=file_ids,
  444. chunking_strategy=chunking_strategy,
  445. )
  446. # TODO: don't poll unless necessary??
  447. return await self.poll(
  448. batch.id,
  449. vector_store_id=vector_store_id,
  450. poll_interval_ms=poll_interval_ms,
  451. )
  452. def list_files(
  453. self,
  454. batch_id: str,
  455. *,
  456. vector_store_id: str,
  457. after: str | NotGiven = NOT_GIVEN,
  458. before: str | NotGiven = NOT_GIVEN,
  459. filter: Literal["in_progress", "completed", "failed", "cancelled"] | NotGiven = NOT_GIVEN,
  460. limit: int | NotGiven = NOT_GIVEN,
  461. order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
  462. # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
  463. # The extra values given here take precedence over values defined on the client or passed to this method.
  464. extra_headers: Headers | None = None,
  465. extra_query: Query | None = None,
  466. extra_body: Body | None = None,
  467. timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
  468. ) -> AsyncPaginator[VectorStoreFile, AsyncCursorPage[VectorStoreFile]]:
  469. """
  470. Returns a list of vector store files in a batch.
  471. Args:
  472. after: A cursor for use in pagination. `after` is an object ID that defines your place
  473. in the list. For instance, if you make a list request and receive 100 objects,
  474. ending with obj_foo, your subsequent call can include after=obj_foo in order to
  475. fetch the next page of the list.
  476. before: A cursor for use in pagination. `before` is an object ID that defines your place
  477. in the list. For instance, if you make a list request and receive 100 objects,
  478. starting with obj_foo, your subsequent call can include before=obj_foo in order
  479. to fetch the previous page of the list.
  480. filter: Filter by file status. One of `in_progress`, `completed`, `failed`, `cancelled`.
  481. limit: A limit on the number of objects to be returned. Limit can range between 1 and
  482. 100, and the default is 20.
  483. order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
  484. order and `desc` for descending order.
  485. extra_headers: Send extra headers
  486. extra_query: Add additional query parameters to the request
  487. extra_body: Add additional JSON properties to the request
  488. timeout: Override the client-level default timeout for this request, in seconds
  489. """
  490. if not vector_store_id:
  491. raise ValueError(f"Expected a non-empty value for `vector_store_id` but received {vector_store_id!r}")
  492. if not batch_id:
  493. raise ValueError(f"Expected a non-empty value for `batch_id` but received {batch_id!r}")
  494. extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
  495. return self._get_api_list(
  496. f"/vector_stores/{vector_store_id}/file_batches/{batch_id}/files",
  497. page=AsyncCursorPage[VectorStoreFile],
  498. options=make_request_options(
  499. extra_headers=extra_headers,
  500. extra_query=extra_query,
  501. extra_body=extra_body,
  502. timeout=timeout,
  503. query=maybe_transform(
  504. {
  505. "after": after,
  506. "before": before,
  507. "filter": filter,
  508. "limit": limit,
  509. "order": order,
  510. },
  511. file_batch_list_files_params.FileBatchListFilesParams,
  512. ),
  513. ),
  514. model=VectorStoreFile,
  515. )
  516. async def poll(
  517. self,
  518. batch_id: str,
  519. *,
  520. vector_store_id: str,
  521. poll_interval_ms: int | NotGiven = NOT_GIVEN,
  522. ) -> VectorStoreFileBatch:
  523. """Wait for the given file batch to be processed.
  524. Note: this will return even if one of the files failed to process, you need to
  525. check batch.file_counts.failed_count to handle this case.
  526. """
  527. headers: dict[str, str] = {"X-Stainless-Poll-Helper": "true"}
  528. if is_given(poll_interval_ms):
  529. headers["X-Stainless-Custom-Poll-Interval"] = str(poll_interval_ms)
  530. while True:
  531. response = await self.with_raw_response.retrieve(
  532. batch_id,
  533. vector_store_id=vector_store_id,
  534. extra_headers=headers,
  535. )
  536. batch = response.parse()
  537. if batch.file_counts.in_progress > 0:
  538. if not is_given(poll_interval_ms):
  539. from_header = response.headers.get("openai-poll-after-ms")
  540. if from_header is not None:
  541. poll_interval_ms = int(from_header)
  542. else:
  543. poll_interval_ms = 1000
  544. await self._sleep(poll_interval_ms / 1000)
  545. continue
  546. return batch
  547. async def upload_and_poll(
  548. self,
  549. vector_store_id: str,
  550. *,
  551. files: Iterable[FileTypes],
  552. max_concurrency: int = 5,
  553. file_ids: List[str] = [],
  554. poll_interval_ms: int | NotGiven = NOT_GIVEN,
  555. chunking_strategy: FileChunkingStrategyParam | NotGiven = NOT_GIVEN,
  556. ) -> VectorStoreFileBatch:
  557. """Uploads the given files concurrently and then creates a vector store file batch.
  558. If you've already uploaded certain files that you want to include in this batch
  559. then you can pass their IDs through the `file_ids` argument.
  560. By default, if any file upload fails then an exception will be eagerly raised.
  561. The number of concurrency uploads is configurable using the `max_concurrency`
  562. parameter.
  563. Note: this method only supports `asyncio` or `trio` as the backing async
  564. runtime.
  565. """
  566. uploaded_files: list[FileObject] = []
  567. async_library = sniffio.current_async_library()
  568. if async_library == "asyncio":
  569. async def asyncio_upload_file(semaphore: asyncio.Semaphore, file: FileTypes) -> None:
  570. async with semaphore:
  571. file_obj = await self._client.files.create(
  572. file=file,
  573. purpose="assistants",
  574. )
  575. uploaded_files.append(file_obj)
  576. semaphore = asyncio.Semaphore(max_concurrency)
  577. tasks = [asyncio_upload_file(semaphore, file) for file in files]
  578. await asyncio.gather(*tasks)
  579. elif async_library == "trio":
  580. # We only import if the library is being used.
  581. # We support Python 3.7 so are using an older version of trio that does not have type information
  582. import trio # type: ignore # pyright: ignore[reportMissingTypeStubs]
  583. async def trio_upload_file(limiter: trio.CapacityLimiter, file: FileTypes) -> None:
  584. async with limiter:
  585. file_obj = await self._client.files.create(
  586. file=file,
  587. purpose="assistants",
  588. )
  589. uploaded_files.append(file_obj)
  590. limiter = trio.CapacityLimiter(max_concurrency)
  591. async with trio.open_nursery() as nursery:
  592. for file in files:
  593. nursery.start_soon(trio_upload_file, limiter, file) # pyright: ignore [reportUnknownMemberType]
  594. else:
  595. raise RuntimeError(
  596. f"Async runtime {async_library} is not supported yet. Only asyncio or trio is supported",
  597. )
  598. batch = await self.create_and_poll(
  599. vector_store_id=vector_store_id,
  600. file_ids=[*file_ids, *(f.id for f in uploaded_files)],
  601. poll_interval_ms=poll_interval_ms,
  602. chunking_strategy=chunking_strategy,
  603. )
  604. return batch
  605. class FileBatchesWithRawResponse:
  606. def __init__(self, file_batches: FileBatches) -> None:
  607. self._file_batches = file_batches
  608. self.create = _legacy_response.to_raw_response_wrapper(
  609. file_batches.create,
  610. )
  611. self.retrieve = _legacy_response.to_raw_response_wrapper(
  612. file_batches.retrieve,
  613. )
  614. self.cancel = _legacy_response.to_raw_response_wrapper(
  615. file_batches.cancel,
  616. )
  617. self.list_files = _legacy_response.to_raw_response_wrapper(
  618. file_batches.list_files,
  619. )
  620. class AsyncFileBatchesWithRawResponse:
  621. def __init__(self, file_batches: AsyncFileBatches) -> None:
  622. self._file_batches = file_batches
  623. self.create = _legacy_response.async_to_raw_response_wrapper(
  624. file_batches.create,
  625. )
  626. self.retrieve = _legacy_response.async_to_raw_response_wrapper(
  627. file_batches.retrieve,
  628. )
  629. self.cancel = _legacy_response.async_to_raw_response_wrapper(
  630. file_batches.cancel,
  631. )
  632. self.list_files = _legacy_response.async_to_raw_response_wrapper(
  633. file_batches.list_files,
  634. )
  635. class FileBatchesWithStreamingResponse:
  636. def __init__(self, file_batches: FileBatches) -> None:
  637. self._file_batches = file_batches
  638. self.create = to_streamed_response_wrapper(
  639. file_batches.create,
  640. )
  641. self.retrieve = to_streamed_response_wrapper(
  642. file_batches.retrieve,
  643. )
  644. self.cancel = to_streamed_response_wrapper(
  645. file_batches.cancel,
  646. )
  647. self.list_files = to_streamed_response_wrapper(
  648. file_batches.list_files,
  649. )
  650. class AsyncFileBatchesWithStreamingResponse:
  651. def __init__(self, file_batches: AsyncFileBatches) -> None:
  652. self._file_batches = file_batches
  653. self.create = async_to_streamed_response_wrapper(
  654. file_batches.create,
  655. )
  656. self.retrieve = async_to_streamed_response_wrapper(
  657. file_batches.retrieve,
  658. )
  659. self.cancel = async_to_streamed_response_wrapper(
  660. file_batches.cancel,
  661. )
  662. self.list_files = async_to_streamed_response_wrapper(
  663. file_batches.list_files,
  664. )