indices_router.py 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576
  1. import logging
  2. import textwrap
  3. from typing import Optional
  4. from fastapi import Body, Depends, Path, Query
  5. from core.base import IndexConfig, R2RException
  6. from core.base.abstractions import VectorTableName
  7. from core.base.api.models import (
  8. VectorIndexResponse,
  9. VectorIndicesResponse,
  10. WrappedGenericMessageResponse,
  11. WrappedVectorIndexResponse,
  12. WrappedVectorIndicesResponse,
  13. )
  14. from ...abstractions import R2RProviders, R2RServices
  15. from ...config import R2RConfig
  16. from .base_router import BaseRouterV3
  17. logger = logging.getLogger()
  18. class IndicesRouter(BaseRouterV3):
  19. def __init__(
  20. self, providers: R2RProviders, services: R2RServices, config: R2RConfig
  21. ):
  22. logging.info("Initializing IndicesRouter")
  23. super().__init__(providers, services, config)
  24. def _setup_routes(self):
  25. ## TODO - Allow developer to pass the index id with the request
  26. @self.router.post(
  27. "/indices",
  28. dependencies=[Depends(self.rate_limit_dependency)],
  29. summary="Create Vector Index",
  30. openapi_extra={
  31. "x-codeSamples": [
  32. {
  33. "lang": "Python",
  34. "source": textwrap.dedent("""
  35. from r2r import R2RClient
  36. client = R2RClient()
  37. # when using auth, do client.login(...)
  38. # Create an HNSW index for efficient similarity search
  39. result = client.indices.create(
  40. config={
  41. "table_name": "chunks", # The table containing vector embeddings
  42. "index_method": "hnsw", # Hierarchical Navigable Small World graph
  43. "index_measure": "cosine_distance", # Similarity measure
  44. "index_arguments": {
  45. "m": 16, # Number of connections per layer
  46. "ef_construction": 64,# Size of dynamic candidate list for construction
  47. "ef": 40, # Size of dynamic candidate list for search
  48. },
  49. "index_name": "my_document_embeddings_idx",
  50. "index_column": "embedding",
  51. "concurrently": True # Build index without blocking table writes
  52. },
  53. run_with_orchestration=True # Run as orchestrated task for large indices
  54. )
  55. # Create an IVF-Flat index for balanced performance
  56. result = client.indices.create(
  57. config={
  58. "table_name": "chunks",
  59. "index_method": "ivf_flat", # Inverted File with Flat storage
  60. "index_measure": "l2_distance",
  61. "index_arguments": {
  62. "lists": 100, # Number of cluster centroids
  63. "probe": 10, # Number of clusters to search
  64. },
  65. "index_name": "my_ivf_embeddings_idx",
  66. "index_column": "embedding",
  67. "concurrently": True
  68. }
  69. )
  70. """),
  71. },
  72. {
  73. "lang": "JavaScript",
  74. "source": textwrap.dedent("""
  75. const { r2rClient } = require("r2r-js");
  76. const client = new r2rClient();
  77. function main() {
  78. const response = await client.indicies.create({
  79. config: {
  80. tableName: "vectors",
  81. indexMethod: "hnsw",
  82. indexMeasure: "cosine_distance",
  83. indexArguments: {
  84. m: 16,
  85. ef_construction: 64,
  86. ef: 40
  87. },
  88. indexName: "my_document_embeddings_idx",
  89. indexColumn: "embedding",
  90. concurrently: true
  91. },
  92. runWithOrchestration: true
  93. });
  94. }
  95. main();
  96. """),
  97. },
  98. {
  99. "lang": "Shell",
  100. "source": textwrap.dedent("""
  101. # Create HNSW Index
  102. curl -X POST "https://api.example.com/indices" \\
  103. -H "Content-Type: application/json" \\
  104. -H "Authorization: Bearer YOUR_API_KEY" \\
  105. -d '{
  106. "config": {
  107. "table_name": "vectors",
  108. "index_method": "hnsw",
  109. "index_measure": "cosine_distance",
  110. "index_arguments": {
  111. "m": 16,
  112. "ef_construction": 64,
  113. "ef": 40
  114. },
  115. "index_name": "my_document_embeddings_idx",
  116. "index_column": "embedding",
  117. "concurrently": true
  118. },
  119. "run_with_orchestration": true
  120. }'
  121. # Create IVF-Flat Index
  122. curl -X POST "https://api.example.com/indices" \\
  123. -H "Content-Type: application/json" \\
  124. -H "Authorization: Bearer YOUR_API_KEY" \\
  125. -d '{
  126. "config": {
  127. "table_name": "vectors",
  128. "index_method": "ivf_flat",
  129. "index_measure": "l2_distance",
  130. "index_arguments": {
  131. "lists": 100,
  132. "probe": 10
  133. },
  134. "index_name": "my_ivf_embeddings_idx",
  135. "index_column": "embedding",
  136. "concurrently": true
  137. }
  138. }'
  139. """),
  140. },
  141. ]
  142. },
  143. )
  144. @self.base_endpoint
  145. async def create_index(
  146. config: IndexConfig,
  147. run_with_orchestration: Optional[bool] = Body(
  148. True,
  149. description="Whether to run index creation as an orchestrated task (recommended for large indices)",
  150. ),
  151. auth_user=Depends(self.providers.auth.auth_wrapper()),
  152. ) -> WrappedGenericMessageResponse:
  153. """Create a new vector similarity search index in over the target
  154. table. Allowed tables include 'vectors', 'entity',
  155. 'document_collections'. Vectors correspond to the chunks of text
  156. that are indexed for similarity search, whereas entity and
  157. document_collections are created during knowledge graph
  158. construction.
  159. This endpoint creates a database index optimized for efficient similarity search over vector embeddings.
  160. It supports two main indexing methods:
  161. 1. HNSW (Hierarchical Navigable Small World):
  162. - Best for: High-dimensional vectors requiring fast approximate nearest neighbor search
  163. - Pros: Very fast search, good recall, memory-resident for speed
  164. - Cons: Slower index construction, more memory usage
  165. - Key parameters:
  166. * m: Number of connections per layer (higher = better recall but more memory)
  167. * ef_construction: Build-time search width (higher = better recall but slower build)
  168. * ef: Query-time search width (higher = better recall but slower search)
  169. 2. IVF-Flat (Inverted File with Flat Storage):
  170. - Best for: Balance between build speed, search speed, and recall
  171. - Pros: Faster index construction, less memory usage
  172. - Cons: Slightly slower search than HNSW
  173. - Key parameters:
  174. * lists: Number of clusters (usually sqrt(n) where n is number of vectors)
  175. * probe: Number of nearest clusters to search
  176. Supported similarity measures:
  177. - cosine_distance: Best for comparing semantic similarity
  178. - l2_distance: Best for comparing absolute distances
  179. - ip_distance: Best for comparing raw dot products
  180. Notes:
  181. - Index creation can be resource-intensive for large datasets
  182. - Use run_with_orchestration=True for large indices to prevent timeouts
  183. - The 'concurrently' option allows other operations while building
  184. - Index names must be unique per table
  185. """
  186. # TODO: Implement index creation logic
  187. logger.info(
  188. f"Creating vector index for {config.table_name} with method {config.index_method}, measure {config.index_measure}, concurrently {config.concurrently}"
  189. )
  190. result = await self.providers.orchestration.run_workflow(
  191. "create-vector-index",
  192. {
  193. "request": {
  194. "table_name": config.table_name,
  195. "index_method": config.index_method,
  196. "index_measure": config.index_measure,
  197. "index_name": config.index_name,
  198. "index_column": config.index_column,
  199. "index_arguments": config.index_arguments,
  200. "concurrently": config.concurrently,
  201. },
  202. },
  203. options={
  204. "additional_metadata": {},
  205. },
  206. )
  207. return result # type: ignore
  208. @self.router.get(
  209. "/indices",
  210. dependencies=[Depends(self.rate_limit_dependency)],
  211. summary="List Vector Indices",
  212. openapi_extra={
  213. "x-codeSamples": [
  214. {
  215. "lang": "Python",
  216. "source": textwrap.dedent("""
  217. from r2r import R2RClient
  218. client = R2RClient()
  219. # List all indices
  220. indices = client.indices.list(
  221. offset=0,
  222. limit=10
  223. )
  224. """),
  225. },
  226. {
  227. "lang": "JavaScript",
  228. "source": textwrap.dedent("""
  229. const { r2rClient } = require("r2r-js");
  230. const client = new r2rClient();
  231. function main() {
  232. const response = await client.indicies.list({
  233. offset: 0,
  234. limit: 10,
  235. filters: { table_name: "vectors" }
  236. }
  237. main();
  238. """),
  239. },
  240. {
  241. "lang": "Shell",
  242. "source": textwrap.dedent("""
  243. curl -X GET "https://api.example.com/indices?offset=0&limit=10" \\
  244. -H "Authorization: Bearer YOUR_API_KEY" \\
  245. -H "Content-Type: application/json"
  246. # With filters
  247. curl -X GET "https://api.example.com/indices?offset=0&limit=10&filters={\"table_name\":\"vectors\"}" \\
  248. -H "Authorization: Bearer YOUR_API_KEY" \\
  249. -H "Content-Type: application/json"
  250. """),
  251. },
  252. ]
  253. },
  254. )
  255. @self.base_endpoint
  256. async def list_indices(
  257. # filters: list[str] = Query([]),
  258. offset: int = Query(
  259. 0,
  260. ge=0,
  261. description="Specifies the number of objects to skip. Defaults to 0.",
  262. ),
  263. limit: int = Query(
  264. 100,
  265. ge=1,
  266. le=1000,
  267. description="Specifies a limit on the number of objects to return, ranging between 1 and 100. Defaults to 100.",
  268. ),
  269. auth_user=Depends(self.providers.auth.auth_wrapper()),
  270. ) -> WrappedVectorIndicesResponse:
  271. """List existing vector similarity search indices with pagination
  272. support.
  273. Returns details about each index including:
  274. - Name and table name
  275. - Indexing method and parameters
  276. - Size and row count
  277. - Creation timestamp and last updated
  278. - Performance statistics (if available)
  279. The response can be filtered using the filter_by parameter to narrow down results
  280. based on table name, index method, or other attributes.
  281. """
  282. # TODO: Implement index listing logic
  283. indices_data = (
  284. await self.providers.database.chunks_handler.list_indices(
  285. offset=offset, limit=limit
  286. )
  287. )
  288. formatted_indices = VectorIndicesResponse(
  289. indices=[
  290. VectorIndexResponse(index=index_data)
  291. for index_data in indices_data["indices"]
  292. ]
  293. )
  294. return ( # type: ignore
  295. formatted_indices,
  296. {"total_entries": indices_data["total_entries"]},
  297. )
  298. @self.router.get(
  299. "/indices/{table_name}/{index_name}",
  300. dependencies=[Depends(self.rate_limit_dependency)],
  301. summary="Get Vector Index Details",
  302. openapi_extra={
  303. "x-codeSamples": [
  304. {
  305. "lang": "Python",
  306. "source": textwrap.dedent("""
  307. from r2r import R2RClient
  308. client = R2RClient()
  309. # Get detailed information about a specific index
  310. index = client.indices.retrieve("index_1")
  311. """),
  312. },
  313. {
  314. "lang": "JavaScript",
  315. "source": textwrap.dedent("""
  316. const { r2rClient } = require("r2r-js");
  317. const client = new r2rClient();
  318. function main() {
  319. const response = await client.indicies.retrieve({
  320. indexName: "index_1",
  321. tableName: "vectors"
  322. });
  323. console.log(response);
  324. }
  325. main();
  326. """),
  327. },
  328. {
  329. "lang": "Shell",
  330. "source": textwrap.dedent("""
  331. curl -X GET "https://api.example.com/indices/vectors/index_1" \\
  332. -H "Authorization: Bearer YOUR_API_KEY"
  333. """),
  334. },
  335. ]
  336. },
  337. )
  338. @self.base_endpoint
  339. async def get_index(
  340. table_name: VectorTableName = Path(
  341. ...,
  342. description="The table of vector embeddings to delete (e.g. `vectors`, `entity`, `document_collections`)",
  343. ),
  344. index_name: str = Path(
  345. ..., description="The name of the index to delete"
  346. ),
  347. auth_user=Depends(self.providers.auth.auth_wrapper()),
  348. ) -> WrappedVectorIndexResponse:
  349. """Get detailed information about a specific vector index.
  350. Returns comprehensive information about the index including:
  351. - Configuration details (method, measure, parameters)
  352. - Current size and row count
  353. - Build progress (if still under construction)
  354. - Performance statistics:
  355. * Average query time
  356. * Memory usage
  357. * Cache hit rates
  358. * Recent query patterns
  359. - Maintenance information:
  360. * Last vacuum
  361. * Fragmentation level
  362. * Recommended optimizations
  363. """
  364. # TODO: Implement get index logic
  365. indices = (
  366. await self.providers.database.chunks_handler.list_indices(
  367. filters={
  368. "index_name": index_name,
  369. "table_name": table_name,
  370. },
  371. limit=1,
  372. offset=0,
  373. )
  374. )
  375. if len(indices["indices"]) != 1:
  376. raise R2RException(
  377. f"Index '{index_name}' not found", status_code=404
  378. )
  379. return {"index": indices["indices"][0]} # type: ignore
  380. # TODO - Implement update index
  381. # @self.router.post(
  382. # "/indices/{name}",
  383. # summary="Update Vector Index",
  384. # openapi_extra={
  385. # "x-codeSamples": [
  386. # {
  387. # "lang": "Python",
  388. # "source": """
  389. # from r2r import R2RClient
  390. # client = R2RClient()
  391. # # Update HNSW index parameters
  392. # result = client.indices.update(
  393. # "550e8400-e29b-41d4-a716-446655440000",
  394. # config={
  395. # "index_arguments": {
  396. # "ef": 80, # Increase search quality
  397. # "m": 24 # Increase connections per layer
  398. # },
  399. # "concurrently": True
  400. # },
  401. # run_with_orchestration=True
  402. # )""",
  403. # },
  404. # {
  405. # "lang": "Shell",
  406. # "source": """
  407. # curl -X PUT "https://api.example.com/indices/550e8400-e29b-41d4-a716-446655440000" \\
  408. # -H "Content-Type: application/json" \\
  409. # -H "Authorization: Bearer YOUR_API_KEY" \\
  410. # -d '{
  411. # "config": {
  412. # "index_arguments": {
  413. # "ef": 80,
  414. # "m": 24
  415. # },
  416. # "concurrently": true
  417. # },
  418. # "run_with_orchestration": true
  419. # }'""",
  420. # },
  421. # ]
  422. # },
  423. # )
  424. # @self.base_endpoint
  425. # async def update_index(
  426. # id: UUID = Path(...),
  427. # config: IndexConfig = Body(...),
  428. # run_with_orchestration: Optional[bool] = Body(True),
  429. # auth_user=Depends(self.providers.auth.auth_wrapper()),
  430. # ): # -> WrappedUpdateIndexResponse:
  431. # """
  432. # Update an existing index's configuration.
  433. # """
  434. # # TODO: Implement index update logic
  435. # pass
  436. @self.router.delete(
  437. "/indices/{table_name}/{index_name}",
  438. dependencies=[Depends(self.rate_limit_dependency)],
  439. summary="Delete Vector Index",
  440. openapi_extra={
  441. "x-codeSamples": [
  442. {
  443. "lang": "Python",
  444. "source": textwrap.dedent("""
  445. from r2r import R2RClient
  446. client = R2RClient()
  447. # Delete an index with orchestration for cleanup
  448. result = client.indices.delete(
  449. index_name="index_1",
  450. table_name="vectors",
  451. run_with_orchestration=True
  452. )
  453. """),
  454. },
  455. {
  456. "lang": "JavaScript",
  457. "source": textwrap.dedent("""
  458. const { r2rClient } = require("r2r-js");
  459. const client = new r2rClient();
  460. function main() {
  461. const response = await client.indicies.delete({
  462. indexName: "index_1"
  463. tableName: "vectors"
  464. });
  465. console.log(response);
  466. }
  467. main();
  468. """),
  469. },
  470. {
  471. "lang": "Shell",
  472. "source": textwrap.dedent("""
  473. curl -X DELETE "https://api.example.com/indices/index_1" \\
  474. -H "Content-Type: application/json" \\
  475. -H "Authorization: Bearer YOUR_API_KEY"
  476. """),
  477. },
  478. ]
  479. },
  480. )
  481. @self.base_endpoint
  482. async def delete_index(
  483. table_name: VectorTableName = Path(
  484. default=...,
  485. description="The table of vector embeddings to delete (e.g. `vectors`, `entity`, `document_collections`)",
  486. ),
  487. index_name: str = Path(
  488. ..., description="The name of the index to delete"
  489. ),
  490. # concurrently: bool = Body(
  491. # default=True,
  492. # description="Whether to delete the index concurrently (recommended for large indices)",
  493. # ),
  494. # run_with_orchestration: Optional[bool] = Body(True),
  495. auth_user=Depends(self.providers.auth.auth_wrapper()),
  496. ) -> WrappedGenericMessageResponse:
  497. """Delete an existing vector similarity search index.
  498. This endpoint removes the specified index from the database. Important considerations:
  499. - Deletion is permanent and cannot be undone
  500. - Underlying vector data remains intact
  501. - Queries will fall back to sequential scan
  502. - Running queries during deletion may be slower
  503. - Use run_with_orchestration=True for large indices to prevent timeouts
  504. - Consider index dependencies before deletion
  505. The operation returns immediately but cleanup may continue in background.
  506. """
  507. logger.info(
  508. f"Deleting vector index {index_name} from table {table_name}"
  509. )
  510. return await self.providers.orchestration.run_workflow( # type: ignore
  511. "delete-vector-index",
  512. {
  513. "request": {
  514. "index_name": index_name,
  515. "table_name": table_name,
  516. "concurrently": True,
  517. },
  518. },
  519. options={
  520. "additional_metadata": {},
  521. },
  522. )