indices_router.py 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614
  1. # TODO - Move indices to 'id' basis
  2. # TODO - Implement update index
  3. # TODO - Implement index data model
  4. import logging
  5. import textwrap
  6. from typing import Optional
  7. from fastapi import Body, Depends, Path, Query
  8. from core.base import IndexConfig, R2RException
  9. from core.base.abstractions import VectorTableName
  10. from core.base.api.models import (
  11. GenericMessageResponse,
  12. WrappedGenericMessageResponse,
  13. WrappedListVectorIndicesResponse,
  14. )
  15. from ...abstractions import R2RProviders, R2RServices
  16. from .base_router import BaseRouterV3
  17. logger = logging.getLogger()
  18. class IndicesRouter(BaseRouterV3):
  19. def __init__(
  20. self,
  21. providers: R2RProviders,
  22. services: R2RServices,
  23. ):
  24. super().__init__(providers, services)
  25. def _setup_routes(self):
  26. ## TODO - Allow developer to pass the index id with the request
  27. @self.router.post(
  28. "/indices",
  29. dependencies=[Depends(self.rate_limit_dependency)],
  30. summary="Create Vector Index",
  31. openapi_extra={
  32. "x-codeSamples": [
  33. {
  34. "lang": "Python",
  35. "source": textwrap.dedent(
  36. """
  37. from r2r import R2RClient
  38. client = R2RClient()
  39. # when using auth, do client.login(...)
  40. # Create an HNSW index for efficient similarity search
  41. result = client.indices.create(
  42. config={
  43. "table_name": "chunks", # The table containing vector embeddings
  44. "index_method": "hnsw", # Hierarchical Navigable Small World graph
  45. "index_measure": "cosine_distance", # Similarity measure
  46. "index_arguments": {
  47. "m": 16, # Number of connections per layer
  48. "ef_construction": 64,# Size of dynamic candidate list for construction
  49. "ef": 40, # Size of dynamic candidate list for search
  50. },
  51. "index_name": "my_document_embeddings_idx",
  52. "index_column": "embedding",
  53. "concurrently": True # Build index without blocking table writes
  54. },
  55. run_with_orchestration=True # Run as orchestrated task for large indices
  56. )
  57. # Create an IVF-Flat index for balanced performance
  58. result = client.indices.create(
  59. config={
  60. "table_name": "chunks",
  61. "index_method": "ivf_flat", # Inverted File with Flat storage
  62. "index_measure": "l2_distance",
  63. "index_arguments": {
  64. "lists": 100, # Number of cluster centroids
  65. "probe": 10, # Number of clusters to search
  66. },
  67. "index_name": "my_ivf_embeddings_idx",
  68. "index_column": "embedding",
  69. "concurrently": True
  70. }
  71. )
  72. """
  73. ),
  74. },
  75. {
  76. "lang": "JavaScript",
  77. "source": textwrap.dedent(
  78. """
  79. const { r2rClient } = require("r2r-js");
  80. const client = new r2rClient();
  81. function main() {
  82. const response = await client.indicies.create({
  83. config: {
  84. tableName: "vectors",
  85. indexMethod: "hnsw",
  86. indexMeasure: "cosine_distance",
  87. indexArguments: {
  88. m: 16,
  89. ef_construction: 64,
  90. ef: 40
  91. },
  92. indexName: "my_document_embeddings_idx",
  93. indexColumn: "embedding",
  94. concurrently: true
  95. },
  96. runWithOrchestration: true
  97. });
  98. }
  99. main();
  100. """
  101. ),
  102. },
  103. {
  104. "lang": "Shell",
  105. "source": textwrap.dedent(
  106. """
  107. # Create HNSW Index
  108. curl -X POST "https://api.example.com/indices" \\
  109. -H "Content-Type: application/json" \\
  110. -H "Authorization: Bearer YOUR_API_KEY" \\
  111. -d '{
  112. "config": {
  113. "table_name": "vectors",
  114. "index_method": "hnsw",
  115. "index_measure": "cosine_distance",
  116. "index_arguments": {
  117. "m": 16,
  118. "ef_construction": 64,
  119. "ef": 40
  120. },
  121. "index_name": "my_document_embeddings_idx",
  122. "index_column": "embedding",
  123. "concurrently": true
  124. },
  125. "run_with_orchestration": true
  126. }'
  127. # Create IVF-Flat Index
  128. curl -X POST "https://api.example.com/indices" \\
  129. -H "Content-Type: application/json" \\
  130. -H "Authorization: Bearer YOUR_API_KEY" \\
  131. -d '{
  132. "config": {
  133. "table_name": "vectors",
  134. "index_method": "ivf_flat",
  135. "index_measure": "l2_distance",
  136. "index_arguments": {
  137. "lists": 100,
  138. "probe": 10
  139. },
  140. "index_name": "my_ivf_embeddings_idx",
  141. "index_column": "embedding",
  142. "concurrently": true
  143. }
  144. }'
  145. """
  146. ),
  147. },
  148. ]
  149. },
  150. )
  151. @self.base_endpoint
  152. async def create_index(
  153. config: IndexConfig,
  154. run_with_orchestration: Optional[bool] = Body(
  155. True,
  156. description="Whether to run index creation as an orchestrated task (recommended for large indices)",
  157. ),
  158. auth_user=Depends(self.providers.auth.auth_wrapper()),
  159. ) -> WrappedGenericMessageResponse:
  160. """
  161. Create a new vector similarity search index in over the target table. Allowed tables include 'vectors', 'entity', 'document_collections'.
  162. Vectors correspond to the chunks of text that are indexed for similarity search, whereas entity and document_collections are created during knowledge graph construction.
  163. This endpoint creates a database index optimized for efficient similarity search over vector embeddings.
  164. It supports two main indexing methods:
  165. 1. HNSW (Hierarchical Navigable Small World):
  166. - Best for: High-dimensional vectors requiring fast approximate nearest neighbor search
  167. - Pros: Very fast search, good recall, memory-resident for speed
  168. - Cons: Slower index construction, more memory usage
  169. - Key parameters:
  170. * m: Number of connections per layer (higher = better recall but more memory)
  171. * ef_construction: Build-time search width (higher = better recall but slower build)
  172. * ef: Query-time search width (higher = better recall but slower search)
  173. 2. IVF-Flat (Inverted File with Flat Storage):
  174. - Best for: Balance between build speed, search speed, and recall
  175. - Pros: Faster index construction, less memory usage
  176. - Cons: Slightly slower search than HNSW
  177. - Key parameters:
  178. * lists: Number of clusters (usually sqrt(n) where n is number of vectors)
  179. * probe: Number of nearest clusters to search
  180. Supported similarity measures:
  181. - cosine_distance: Best for comparing semantic similarity
  182. - l2_distance: Best for comparing absolute distances
  183. - ip_distance: Best for comparing raw dot products
  184. Notes:
  185. - Index creation can be resource-intensive for large datasets
  186. - Use run_with_orchestration=True for large indices to prevent timeouts
  187. - The 'concurrently' option allows other operations while building
  188. - Index names must be unique per table
  189. """
  190. # TODO: Implement index creation logic
  191. logger.info(
  192. f"Creating vector index for {config.table_name} with method {config.index_method}, measure {config.index_measure}, concurrently {config.concurrently}"
  193. )
  194. result = await self.providers.orchestration.run_workflow(
  195. "create-vector-index",
  196. {
  197. "request": {
  198. "table_name": config.table_name,
  199. "index_method": config.index_method,
  200. "index_measure": config.index_measure,
  201. "index_name": config.index_name,
  202. "index_column": config.index_column,
  203. "index_arguments": config.index_arguments,
  204. "concurrently": config.concurrently,
  205. },
  206. },
  207. options={
  208. "additional_metadata": {},
  209. },
  210. )
  211. return result
  212. @self.router.get(
  213. "/indices",
  214. dependencies=[Depends(self.rate_limit_dependency)],
  215. summary="List Vector Indices",
  216. openapi_extra={
  217. "x-codeSamples": [
  218. {
  219. "lang": "Python",
  220. "source": textwrap.dedent(
  221. """
  222. from r2r import R2RClient
  223. client = R2RClient()
  224. # List all indices
  225. indices = client.indices.list(
  226. offset=0,
  227. limit=10
  228. )
  229. """
  230. ),
  231. },
  232. {
  233. "lang": "JavaScript",
  234. "source": textwrap.dedent(
  235. """
  236. const { r2rClient } = require("r2r-js");
  237. const client = new r2rClient();
  238. function main() {
  239. const response = await client.indicies.list({
  240. offset: 0,
  241. limit: 10,
  242. filters: { table_name: "vectors" }
  243. }
  244. main();
  245. """
  246. ),
  247. },
  248. {
  249. "lang": "CLI",
  250. "source": textwrap.dedent(
  251. """
  252. r2r indices list
  253. """
  254. ),
  255. },
  256. {
  257. "lang": "Shell",
  258. "source": textwrap.dedent(
  259. """
  260. curl -X GET "https://api.example.com/indices?offset=0&limit=10" \\
  261. -H "Authorization: Bearer YOUR_API_KEY" \\
  262. -H "Content-Type: application/json"
  263. # With filters
  264. curl -X GET "https://api.example.com/indices?offset=0&limit=10&filters={\"table_name\":\"vectors\"}" \\
  265. -H "Authorization: Bearer YOUR_API_KEY" \\
  266. -H "Content-Type: application/json"
  267. """
  268. ),
  269. },
  270. ]
  271. },
  272. )
  273. @self.base_endpoint
  274. async def list_indices(
  275. # filters: list[str] = Query([]),
  276. offset: int = Query(
  277. 0,
  278. ge=0,
  279. description="Specifies the number of objects to skip. Defaults to 0.",
  280. ),
  281. limit: int = Query(
  282. 100,
  283. ge=1,
  284. le=1000,
  285. description="Specifies a limit on the number of objects to return, ranging between 1 and 100. Defaults to 100.",
  286. ),
  287. auth_user=Depends(self.providers.auth.auth_wrapper()),
  288. ) -> WrappedListVectorIndicesResponse:
  289. """
  290. List existing vector similarity search indices with pagination support.
  291. Returns details about each index including:
  292. - Name and table name
  293. - Indexing method and parameters
  294. - Size and row count
  295. - Creation timestamp and last updated
  296. - Performance statistics (if available)
  297. The response can be filtered using the filter_by parameter to narrow down results
  298. based on table name, index method, or other attributes.
  299. """
  300. # TODO: Implement index listing logic
  301. indices = (
  302. await self.providers.database.chunks_handler.list_indices(
  303. offset=offset, limit=limit # , filters=filters
  304. )
  305. )
  306. return {"indices": indices["indices"]}, indices["page_info"] # type: ignore
  307. @self.router.get(
  308. "/indices/{table_name}/{index_name}",
  309. dependencies=[Depends(self.rate_limit_dependency)],
  310. summary="Get Vector Index Details",
  311. openapi_extra={
  312. "x-codeSamples": [
  313. {
  314. "lang": "Python",
  315. "source": textwrap.dedent(
  316. """
  317. from r2r import R2RClient
  318. client = R2RClient()
  319. # Get detailed information about a specific index
  320. index = client.indices.retrieve("index_1")
  321. """
  322. ),
  323. },
  324. {
  325. "lang": "JavaScript",
  326. "source": textwrap.dedent(
  327. """
  328. const { r2rClient } = require("r2r-js");
  329. const client = new r2rClient();
  330. function main() {
  331. const response = await client.indicies.retrieve({
  332. indexName: "index_1",
  333. tableName: "vectors"
  334. });
  335. console.log(response);
  336. }
  337. main();
  338. """
  339. ),
  340. },
  341. {
  342. "lang": "CLI",
  343. "source": textwrap.dedent(
  344. """
  345. r2r indices retrieve index_1 vectors
  346. """
  347. ),
  348. },
  349. {
  350. "lang": "Shell",
  351. "source": textwrap.dedent(
  352. """
  353. curl -X GET "https://api.example.com/indices/vectors/index_1" \\
  354. -H "Authorization: Bearer YOUR_API_KEY"
  355. """
  356. ),
  357. },
  358. ]
  359. },
  360. )
  361. @self.base_endpoint
  362. async def get_index(
  363. table_name: VectorTableName = Path(
  364. ...,
  365. description="The table of vector embeddings to delete (e.g. `vectors`, `entity`, `document_collections`)",
  366. ),
  367. index_name: str = Path(
  368. ..., description="The name of the index to delete"
  369. ),
  370. auth_user=Depends(self.providers.auth.auth_wrapper()),
  371. ) -> dict: # -> WrappedGetIndexResponse:
  372. """
  373. Get detailed information about a specific vector index.
  374. Returns comprehensive information about the index including:
  375. - Configuration details (method, measure, parameters)
  376. - Current size and row count
  377. - Build progress (if still under construction)
  378. - Performance statistics:
  379. * Average query time
  380. * Memory usage
  381. * Cache hit rates
  382. * Recent query patterns
  383. - Maintenance information:
  384. * Last vacuum
  385. * Fragmentation level
  386. * Recommended optimizations
  387. """
  388. # TODO: Implement get index logic
  389. indices = (
  390. await self.providers.database.chunks_handler.list_indices(
  391. filters={
  392. "index_name": index_name,
  393. "table_name": table_name,
  394. },
  395. limit=1,
  396. offset=0,
  397. )
  398. )
  399. if len(indices["indices"]) != 1:
  400. raise R2RException(
  401. f"Index '{index_name}' not found", status_code=404
  402. )
  403. return {"index": indices["indices"][0]}
  404. # TODO - Implement update index
  405. # @self.router.post(
  406. # "/indices/{name}",
  407. # summary="Update Vector Index",
  408. # openapi_extra={
  409. # "x-codeSamples": [
  410. # {
  411. # "lang": "Python",
  412. # "source": """
  413. # from r2r import R2RClient
  414. # client = R2RClient()
  415. # # Update HNSW index parameters
  416. # result = client.indices.update(
  417. # "550e8400-e29b-41d4-a716-446655440000",
  418. # config={
  419. # "index_arguments": {
  420. # "ef": 80, # Increase search quality
  421. # "m": 24 # Increase connections per layer
  422. # },
  423. # "concurrently": True
  424. # },
  425. # run_with_orchestration=True
  426. # )""",
  427. # },
  428. # {
  429. # "lang": "Shell",
  430. # "source": """
  431. # curl -X PUT "https://api.example.com/indices/550e8400-e29b-41d4-a716-446655440000" \\
  432. # -H "Content-Type: application/json" \\
  433. # -H "Authorization: Bearer YOUR_API_KEY" \\
  434. # -d '{
  435. # "config": {
  436. # "index_arguments": {
  437. # "ef": 80,
  438. # "m": 24
  439. # },
  440. # "concurrently": true
  441. # },
  442. # "run_with_orchestration": true
  443. # }'""",
  444. # },
  445. # ]
  446. # },
  447. # )
  448. # @self.base_endpoint
  449. # async def update_index(
  450. # id: UUID = Path(...),
  451. # config: IndexConfig = Body(...),
  452. # run_with_orchestration: Optional[bool] = Body(True),
  453. # auth_user=Depends(self.providers.auth.auth_wrapper()),
  454. # ): # -> WrappedUpdateIndexResponse:
  455. # """
  456. # Update an existing index's configuration.
  457. # """
  458. # # TODO: Implement index update logic
  459. # pass
  460. @self.router.delete(
  461. "/indices/{table_name}/{index_name}",
  462. dependencies=[Depends(self.rate_limit_dependency)],
  463. summary="Delete Vector Index",
  464. openapi_extra={
  465. "x-codeSamples": [
  466. {
  467. "lang": "Python",
  468. "source": textwrap.dedent(
  469. """
  470. from r2r import R2RClient
  471. client = R2RClient()
  472. # Delete an index with orchestration for cleanup
  473. result = client.indices.delete(
  474. index_name="index_1",
  475. table_name="vectors",
  476. run_with_orchestration=True
  477. )
  478. """
  479. ),
  480. },
  481. {
  482. "lang": "JavaScript",
  483. "source": textwrap.dedent(
  484. """
  485. const { r2rClient } = require("r2r-js");
  486. const client = new r2rClient();
  487. function main() {
  488. const response = await client.indicies.delete({
  489. indexName: "index_1"
  490. tableName: "vectors"
  491. });
  492. console.log(response);
  493. }
  494. main();
  495. """
  496. ),
  497. },
  498. {
  499. "lang": "CLI",
  500. "source": textwrap.dedent(
  501. """
  502. r2r indices delete index_1 vectors
  503. """
  504. ),
  505. },
  506. {
  507. "lang": "Shell",
  508. "source": textwrap.dedent(
  509. """
  510. curl -X DELETE "https://api.example.com/indices/index_1" \\
  511. -H "Content-Type: application/json" \\
  512. -H "Authorization: Bearer YOUR_API_KEY"
  513. """
  514. ),
  515. },
  516. ]
  517. },
  518. )
  519. @self.base_endpoint
  520. async def delete_index(
  521. table_name: VectorTableName = Path(
  522. default=...,
  523. description="The table of vector embeddings to delete (e.g. `vectors`, `entity`, `document_collections`)",
  524. ),
  525. index_name: str = Path(
  526. ..., description="The name of the index to delete"
  527. ),
  528. # concurrently: bool = Body(
  529. # default=True,
  530. # description="Whether to delete the index concurrently (recommended for large indices)",
  531. # ),
  532. # run_with_orchestration: Optional[bool] = Body(True),
  533. auth_user=Depends(self.providers.auth.auth_wrapper()),
  534. ) -> WrappedGenericMessageResponse:
  535. """
  536. Delete an existing vector similarity search index.
  537. This endpoint removes the specified index from the database. Important considerations:
  538. - Deletion is permanent and cannot be undone
  539. - Underlying vector data remains intact
  540. - Queries will fall back to sequential scan
  541. - Running queries during deletion may be slower
  542. - Use run_with_orchestration=True for large indices to prevent timeouts
  543. - Consider index dependencies before deletion
  544. The operation returns immediately but cleanup may continue in background.
  545. """
  546. logger.info(
  547. f"Deleting vector index {index_name} from table {table_name}"
  548. )
  549. return await self.providers.orchestration.run_workflow(
  550. "delete-vector-index",
  551. {
  552. "request": {
  553. "index_name": index_name,
  554. "table_name": table_name,
  555. "concurrently": True,
  556. },
  557. },
  558. options={
  559. "additional_metadata": {},
  560. },
  561. )